/
Wall_Street_Journal.js.in
51 lines (47 loc) · 1.85 KB
/
Wall_Street_Journal.js.in
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
{
"translatorID" : "fd0556d5-4c99-43d2-96d2-479284a0ae31",
"translatorType" : 4,
"label" :"Wall Street Journal",
"creator" :"Erik Hetzner, based on work by Matt Burton",
"target" :"http://online\\.wsj\\.com/article/",
"minVersion" :"2.0",
"maxVersion" :"",
"priority" :100,
"inRepository" :true,
"lastUpdated" :"2010-06-24 08:20:02"
}
//@framework@
function detectWeb(doc, url) { return FW.detectWeb(doc, url); }
function doWeb(doc, url) { return FW.doWeb(doc, url); }
FW.Scraper({
itemType : 'newspaperArticle',
publicationTitle : "wsj.com",
creators : FW.PageText().match(/authors:'(.*?)'(?=,|})/, 1).
replace("+", " ", "g").
unescapeHTML().
unescape().
split(/,/).
cleanAuthor("author"),
abstractNote : FW.PageText().match(/bodyText:'(.*?)'(?=,|})/, 1).
replace("+", " ", "g").
unescapeHTML().
unescape(),
date : FW.PageText().match(/publicationDate:'(.*?)'(?=,|})/, 1).
replace("+", " ", "g").
unescapeHTML().
unescape(),
title : FW.PageText().match(/articleHeadline:'(.*?)'(?=,|})/, 1).
replace("+", " ", "g").
unescapeHTML().
unescape().
replace(/\\/, ""),
section : FW.PageText().match(/articleType:'(.*?)'(?=,|})/, 1).
replace("+", " ", "g").
unescapeHTML().
unescape(),
attachments : {
url : FW.Url(),
type : "text/html",
title : "Wall Street Journal Snapshot"
}
});