Skip to content

Commit 8d1370e

Browse files
committed
fix Daily Beast, Treesearch;
Small fixes for Hamilton and Telegraph Test update for Time
1 parent 971fbfd commit 8d1370e

File tree

5 files changed

+80
-128
lines changed

5 files changed

+80
-128
lines changed

The Daily Beast.js

+18-70
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"inRepository": true,
1010
"translatorType": 4,
1111
"browserSupport": "gcsibv",
12-
"lastUpdated": "2013-04-23 21:06:52"
12+
"lastUpdated": "2013-12-12 14:16:38"
1313
}
1414

1515
/* FW LINE 57:6869c32952b1 */ function flatten(c){var b=new Array();for(var d in c){var e=c[d];if(e instanceof Array){b=b.concat(flatten(e))}else{b.push(e)}}return b}var FW={_scrapers:new Array()};FW._Base=function(){this.callHook=function(b,c,e,a){if(typeof this["hooks"]==="object"){var d=this["hooks"][b];if(typeof d==="function"){d(c,e,a)}}};this.evaluateThing=function(f,e,c){var b=typeof f;if(b==="object"){if(f instanceof Array){var d=this.evaluateThing;var a=f.map(function(g){return d(g,e,c)});return flatten(a)}else{return f.evaluate(e,c)}}else{if(b==="function"){return f(e,c)}else{return f}}}};FW.Scraper=function(a){FW._scrapers.push(new FW._Scraper(a))};FW._Scraper=function(a){for(x in a){this[x]=a[x]}this._singleFieldNames=["abstractNote","applicationNumber","archive","archiveLocation","artworkMedium","artworkSize","assignee","audioFileType","audioRecordingType","billNumber","blogTitle","bookTitle","callNumber","caseName","code","codeNumber","codePages","codeVolume","committee","company","conferenceName","country","court","date","dateDecided","dateEnacted","dictionaryTitle","distributor","docketNumber","documentNumber","DOI","edition","encyclopediaTitle","episodeNumber","extra","filingDate","firstPage","forumTitle","genre","history","institution","interviewMedium","ISBN","ISSN","issue","issueDate","issuingAuthority","journalAbbreviation","label","language","legalStatus","legislativeBody","letterType","libraryCatalog","manuscriptType","mapType","medium","meetingName","nameOfAct","network","number","numberOfVolumes","numPages","pages","patentNumber","place","postType","presentationType","priorityNumbers","proceedingsTitle","programTitle","programmingLanguage","publicLawNumber","publicationTitle","publisher","references","reportNumber","reportType","reporter","reporterVolume","rights","runningTime","scale","section","series","seriesNumber","seriesText","seriesTitle","session","shortTitle","studio","subject","system","thesisType","title","type","university","url","version","videoRecordingType","volume","websiteTitle","websiteType"];this._makeAttachments=function(p,b,g,t){if(g instanceof Array){g.forEach(function(k){this._makeAttachments(p,b,k,t)},this)}else{if(typeof g==="object"){var o=g.urls||g.url;var m=g.types||g.type;var f=g.titles||g.title;var q=g.snapshots||g.snapshot;var j=this.evaluateThing(o,p,b);var n=this.evaluateThing(f,p,b);var s=this.evaluateThing(m,p,b);var d=this.evaluateThing(q,p,b);if(!(j instanceof Array)){j=[j]}for(var l in j){var c=j[l];var h;var e;var r;if(s instanceof Array){h=s[l]}else{h=s}if(n instanceof Array){e=n[l]}else{e=n}if(d instanceof Array){r=d[l]}else{r=d}t.attachments.push({url:c,title:e,type:h,snapshot:r})}}}};if(this.itemTrans!==undefined){this.makeItems=this.itemTrans.makeItems}else{this.makeItems=function(o,b,m,c,l){var q=new Zotero.Item(this.itemType);q.url=b;for(var h in this._singleFieldNames){var n=this._singleFieldNames[h];if(this[n]){var g=this.evaluateThing(this[n],o,b);if(g instanceof Array){q[n]=g[0]}else{q[n]=g}}}var r=["creators","tags"];for(var f in r){var p=r[f];var d=this.evaluateThing(this[p],o,b);if(d){for(var e in d){q[p].push(d[e])}}}this._makeAttachments(o,b,this["attachments"],q);c(q,this,o,b);l([q])}}};FW._Scraper.prototype=new FW._Base;FW.MultiScraper=function(a){FW._scrapers.push(new FW._MultiScraper(a))};FW._MultiScraper=function(a){for(x in a){this[x]=a[x]}this._mkSelectItems=function(e,d){var b=new Object;for(var c in e){b[d[c]]=e[c]}return b};this._selectItems=function(d,c,e){var b=new Array();Zotero.selectItems(this._mkSelectItems(d,c),function(f){for(var g in f){b.push(g)}e(b)})};this._mkAttachments=function(g,d,f){var b=this.evaluateThing(this["attachments"],g,d);var c=new Object();if(b){for(var e in f){c[f[e]]=b[e]}}return c};this._makeChoices=function(f,p,c,d,h){if(f instanceof Array){f.forEach(function(k){this._makeTitlesUrls(k,p,c,d,h)},this)}else{if(typeof f==="object"){var m=f.urls||f.url;var e=f.titles||f.title;var n=this.evaluateThing(m,p,c);var j=this.evaluateThing(e,p,c);var l=(j instanceof Array);if(!(n instanceof Array)){n=[n]}for(var g in n){var b=n[g];var o;if(l){o=j[g]}else{o=j}h.push(b);d.push(o)}}}};this.makeItems=function(j,b,g,c,f){if(this.beforeFilter){var k=this.beforeFilter(j,b);if(k!=b){this.makeItems(j,k,g,c,f);return}}var e=[];var h=[];this._makeChoices(this["choices"],j,b,e,h);var d=this._mkAttachments(j,b,h);this._selectItems(e,h,function(m){if(!m){f([])}else{var l=[];var n=this.itemTrans;Zotero.Utilities.processDocuments(m,function(q){var p=q.documentURI;var o=n;if(o===undefined){o=FW.getScraper(q,p)}if(o===undefined){}else{o.makeItems(q,p,d[p],function(r){l.push(r);c(r,o,q,p)},function(){})}},function(){f(l)})}})}};FW._MultiScraper.prototype=new FW._Base;FW.DelegateTranslator=function(a){return new FW._DelegateTranslator(a)};FW._DelegateTranslator=function(a){for(x in a){this[x]=a[x]}this._translator=Zotero.loadTranslator(this.translatorType);this._translator.setTranslator(this.translatorId);this.makeItems=function(g,d,b,f,c){var e;Zotero.Utilities.HTTP.doGet(d,function(h){this._translator.setHandler("itemDone",function(k,j){e=j;if(b){j.attachments=b}});if(this.preProcess){h=this.preProcess(h)}this._translator.setString(h);this._translator.translate();f(e)},function(){c([e])})}};FW.DelegateTranslator.prototype=new FW._Scraper;FW._StringMagic=function(){this._filters=new Array();this.addFilter=function(a){this._filters.push(a);return this};this.split=function(a){return this.addFilter(function(b){return b.split(a).filter(function(c){return(c!="")})})};this.replace=function(c,b,a){return this.addFilter(function(d){if(d.match(c)){return d.replace(c,b,a)}else{return d}})};this.prepend=function(a){return this.replace(/^/,a)};this.append=function(a){return this.replace(/$/,a)};this.remove=function(b,a){return this.replace(b,"",a)};this.trim=function(){return this.addFilter(function(a){return Zotero.Utilities.trim(a)})};this.trimInternal=function(){return this.addFilter(function(a){return Zotero.Utilities.trimInternal(a)})};this.match=function(a,b){if(!b){b=0}return this.addFilter(function(d){var c=d.match(a);if(c===undefined||c===null){return undefined}else{return c[b]}})};this.cleanAuthor=function(b,a){return this.addFilter(function(c){return Zotero.Utilities.cleanAuthor(c,b,a)})};this.key=function(a){return this.addFilter(function(b){return b[a]})};this.capitalizeTitle=function(){if(arguments.length>0&&arguments[0]==true){return this.addFilter(function(a){return Zotero.Utilities.capitalizeTitle(a,true)})}else{return this.addFilter(function(a){return Zotero.Utilities.capitalizeTitle(a)})}};this.unescapeHTML=function(){return this.addFilter(function(a){return Zotero.Utilities.unescapeHTML(a)})};this.unescape=function(){return this.addFilter(function(a){return unescape(a)})};this._applyFilters=function(c,e){for(i in this._filters){c=flatten(c);c=c.filter(function(a){return((a!==undefined)&&(a!==null))});for(var d=0;d<c.length;d++){try{if((c[d]===undefined)||(c[d]===null)){continue}else{c[d]=this._filters[i](c[d],e)}}catch(b){c[d]=undefined;Zotero.debug("Caught exception "+b+"on filter: "+this._filters[i])}}c=c.filter(function(a){return((a!==undefined)&&(a!==null))})}return flatten(c)}};FW.PageText=function(){return new FW._PageText()};FW._PageText=function(){this._filters=new Array();this.evaluate=function(c){var b=[c.documentElement.innerHTML];b=this._applyFilters(b,c);if(b.length==0){return false}else{return b}}};FW._PageText.prototype=new FW._StringMagic();FW.Url=function(){return new FW._Url()};FW._Url=function(){this._filters=new Array();this.evaluate=function(d,c){var b=[c];b=this._applyFilters(b,d);if(b.length==0){return false}else{return b}}};FW._Url.prototype=new FW._StringMagic();FW.Xpath=function(a){return new FW._Xpath(a)};FW._Xpath=function(a){this._xpath=a;this._filters=new Array();this.text=function(){var b=function(c){if(typeof c==="object"&&c.textContent){return c.textContent}else{return c}};this.addFilter(b);return this};this.sub=function(b){var c=function(f,e){var d=e.evaluate(b,f,null,XPathResult.ANY_TYPE,null);if(d){return d.iterateNext()}else{return undefined}};this.addFilter(c);return this};this.evaluate=function(f){var e=f.evaluate(this._xpath,f,null,XPathResult.ANY_TYPE,null);var d=e.resultType;var c=new Array();if(d==XPathResult.STRING_TYPE){c.push(e.stringValue)}else{if(d==XPathResult.ORDERED_NODE_ITERATOR_TYPE||d==XPathResult.UNORDERED_NODE_ITERATOR_TYPE){var b;while((b=e.iterateNext())){c.push(b)}}}c=this._applyFilters(c,f);if(c.length==0){return false}else{return c}}};FW._Xpath.prototype=new FW._StringMagic();FW.detectWeb=function(e,b){for(var c in FW._scrapers){var d=FW._scrapers[c];var f=d.evaluateThing(d.itemType,e,b);var a=d.evaluateThing(d.detect,e,b);if(a.length>0&&a[0]){return f}}return undefined};FW.getScraper=function(b,a){var c=FW.detectWeb(b,a);return FW._scrapers.filter(function(d){return(d.evaluateThing(d.itemType,b,a)==c)&&(d.evaluateThing(d.detect,b,a))})[0]};FW.doWeb=function(c,a){var b=FW.getScraper(c,a);b.makeItems(c,a,[],function(f,e,g,d){e.callHook("scraperDone",f,g,d);if(!f.title){f.title=""}f.complete()},function(){Zotero.done()});Zotero.wait()};
@@ -39,55 +39,40 @@ along with this program. If not, see <http://www.gnu.org/licenses/>.
3939

4040
function detectWeb(doc, url) { return FW.detectWeb(doc, url); }
4141
function doWeb(doc, url) { return FW.doWeb(doc, url); }
42-
4342

44-
/** Newsweek Magazine */
43+
44+
/** Women in the World */
4545
FW.Scraper({
46-
itemType : 'magazineArticle',
47-
detect : FW.Xpath('//a[contains(@class, "newsweek-flag")]'),
48-
title : FW.Xpath('//h1[@property="dc:title"]').text().trim(),
49-
attachments : [{ url: FW.Url().remove(/\.html/).append("\.print\.html"),
50-
title: "Daily Beast Snapshot",
46+
itemType : 'blogPost',
47+
detect : FW.Xpath('//h1[@class="page-title"]'),
48+
title : FW.Xpath('//h1[@class="page-title"]').text().trim(),
49+
attachments : [{ url: FW.Url(),
50+
title: "Daily Beast WITW Snapshot",
5151
type: "text/html" }],
52-
creators : FW.Xpath('//meta[@name="authors"]').key("content")
53-
.text().cleanAuthor("author"),
54-
date : FW.Xpath('//time[@class="timestamp"]/@datetime').text(),
55-
tags : FW.Xpath('//meta[@name="keywords"]/@content').text().split(/\s*,\s*/),
52+
creators : FW.Xpath('//small[@class="byline"]/a').text().cleanAuthor("author"),
53+
date : FW.Xpath('//time/@datetime').text().trim(),
5654
abstractNote : FW.Xpath('//meta[@property="og:description"]/@content').text(),
57-
publicationTitle : "Newsweek Magazine"
55+
tags : FW.Xpath('//meta[@name="keywords"]/@content').text().split(/\s*,\s*/),
56+
publicationTitle : "Daily Beast - Women in the World"
5857
});
59-
58+
6059
/**Daily Beast Website*/
6160
FW.Scraper({
6261
itemType : 'webpage',
63-
detect : FW.Xpath('//h1[@property="dc:title"]'),
64-
title : FW.Xpath('//h1[@property="dc:title"]').text().trim(),
62+
detect : FW.Xpath('//h1[contains(@class, "title")]'),
63+
title : FW.Xpath('//h1[contains(@class, "title")]').text().trim(),
6564
attachments : [{ url: FW.Url().remove(/\.html/).append("\.print\.html"),
6665
title: "Daily Beast Snapshot",
6766
type: "text/html" }],
6867
creators : FW.Xpath('//meta[@name="authors"]').key("content")
6968
.text().cleanAuthor("author"),
70-
date : FW.Xpath('//time[@class="timestamp"]/@datetime').text(),
69+
date : FW.Xpath('//span[@class="date"]').text(),
7170
tags : FW.Xpath('//meta[@name="keywords"]/@content').text().split(/\s*,\s*/),
7271
abstractNote : FW.Xpath('//meta[@property="og:description"]/@content').text(),
7372
publicationTitle : "The Daily Beast"
7473
});
7574

7675

77-
/** Women in the World */
78-
FW.Scraper({
79-
itemType : 'blogPost',
80-
detect : FW.Xpath('//h1[@class="page-title"]'),
81-
title : FW.Xpath('//h1[@class="page-title"]').text().trim(),
82-
attachments : [{ url: FW.Url(),
83-
title: "Daily Beast WITW Snapshot",
84-
type: "text/html" }],
85-
creators : FW.Xpath('//small[@class="byline"]/a').text().cleanAuthor("author"),
86-
date : FW.Xpath('//time/@datetime').text().trim(),
87-
abstractNote : FW.Xpath('//meta[@property="og:description"]/@content').text(),
88-
tags : FW.Xpath('//meta[@name="keywords"]/@content').text().split(/\s*,\s*/),
89-
publicationTitle : "Daily Beast - Women in the World"
90-
});
9176

9277
/**Multiple - Search */
9378
FW.MultiScraper({
@@ -135,7 +120,7 @@ var testCases = [
135120
],
136121
"url": "http://www.thedailybeast.com/articles/2011/09/26/mikheil-saakashvili-interview-hillary-clinton-saved-georgia.html",
137122
"abstractNote": "In an exclusive interview, Georgia’s president credits Clinton and the Obama team with quelling bombings.",
138-
"date": "2011-09-26T23:59:00.000Z",
123+
"date": "September 26th 2011",
139124
"publicationTitle": "The Daily Beast",
140125
"title": "Saakashvili: U.S. Stopped Russia Bombings",
141126
"libraryCatalog": "The Daily Beast",
@@ -173,51 +158,14 @@ var testCases = [
173158
],
174159
"url": "http://www.thedailybeast.com/articles/2011/09/26/gop-s-2012-presidential-primaries-purity-test.html",
175160
"abstractNote": "The party now punishes any deviation from conservative orthodoxy in the presidential primaries.",
176-
"date": "2011-09-26T21:13:00.000Z",
161+
"date": "September 26th 2011",
177162
"publicationTitle": "The Daily Beast",
178163
"title": "The GOP’s Purity Test",
179164
"libraryCatalog": "The Daily Beast",
180165
"accessDate": "CURRENT_TIMESTAMP"
181166
}
182167
]
183168
},
184-
{
185-
"type": "web",
186-
"url": "http://www.thedailybeast.com/newsweek/2011/09/25/who-and-what-is-mitt-romney.html",
187-
"items": [
188-
{
189-
"itemType": "magazineArticle",
190-
"creators": [
191-
{
192-
"firstName": "Andrew",
193-
"lastName": "Romano",
194-
"creatorType": "author"
195-
}
196-
],
197-
"notes": [],
198-
"tags": [
199-
"Google",
200-
"Mitt Romney",
201-
"U.S. Politics",
202-
"presidential race"
203-
],
204-
"seeAlso": [],
205-
"attachments": [
206-
{
207-
"title": "Daily Beast Snapshot",
208-
"type": "text/html"
209-
}
210-
],
211-
"url": "http://www.thedailybeast.com/newsweek/2011/09/25/who-and-what-is-mitt-romney.html",
212-
"abstractNote": "Perry’s stumbling, the economy’s crumbling, Obama’s in freefall. All this could make it Mitt Romney’s moment. But…",
213-
"date": "2011-09-25T14:00:00.000Z",
214-
"publicationTitle": "Newsweek Magazine",
215-
"title": "Can Mitt Close the Deal?",
216-
"libraryCatalog": "The Daily Beast",
217-
"accessDate": "CURRENT_TIMESTAMP"
218-
}
219-
]
220-
},
221169
{
222170
"type": "web",
223171
"defer": true,

The Hamilton Spectator.js

+3-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"inRepository": true,
1010
"translatorType": 4,
1111
"browserSupport": "gcsibv",
12-
"lastUpdated": "2013-06-08 15:21:58"
12+
"lastUpdated": "2013-12-12 13:55:59"
1313
}
1414

1515
function detectWeb(doc, url) {
@@ -66,7 +66,7 @@ function scrape(doc, url) {
6666
newItem.title = doc.evaluate(xPathTitle, doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
6767

6868
newItem.abstractNote = ZU.xpathText(doc, '//meta[@name="description"]/@content');
69-
newItem.date = ZU.xpathText(doc, '//div[contains(@class, "above-page-title")]/span[contains(@class,"left")]');
69+
newItem.date = ZU.xpathText(doc, '//div[contains(@class, "above-page-title")]/span[contains(@class,"left")][1]');
7070

7171
newItem.url = doc.location.href;
7272
newItem.publicationTitle = "The Hamilton Spectator";
@@ -111,7 +111,7 @@ var testCases = [
111111
"seeAlso": [],
112112
"attachments": [],
113113
"title": "Expert calls Occupy demos most important in generations",
114-
"date": "Nov 16, 2011  |",
114+
"date": "Nov 16, 2011",
115115
"url": "http://www.thespec.com/news-story/2223303-expert-calls-occupy-demos-most-important-in-generations-/",
116116
"publicationTitle": "The Hamilton Spectator",
117117
"libraryCatalog": "The Hamilton Spectator",

The Telegraph.js

+10-12
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"inRepository": true,
1010
"translatorType": 4,
1111
"browserSupport": "gcsib",
12-
"lastUpdated": "2013-09-25 16:18:41"
12+
"lastUpdated": "2013-12-12 13:59:44"
1313
}
1414

1515
function createExcludes(url, excludeArr) {
@@ -110,7 +110,8 @@ function scrape(doc, url) {
110110
'DCSext.articleId': 'callNumber',
111111
'article-id': 'callNumber',
112112
'tmgads.articleid': 'callNumber',
113-
'last-modified': 'date'
113+
'last-modified': 'date',
114+
'DCSext.articleFirstPublished' : 'date'
114115
});
115116

116117
em.doWeb(doc, url);
@@ -173,19 +174,17 @@ var testCases = [
173174
"title": "Snapshot"
174175
}
175176
],
176-
"itemID": "http://www.telegraph.co.uk/news/worldnews/asia/china/8888909/China-Google-Earth-spots-huge-unidentified-structures-in-Gobi-desert.html",
177177
"title": "China: Google Earth spots huge, unidentified structures in Gobi desert",
178-
"source": "Telegraph.co.uk",
179178
"publicationTitle": "Telegraph.co.uk",
180-
"date": "2011-11-14",
179+
"date": "2011-11-14 13:50",
181180
"url": "http://www.telegraph.co.uk/news/worldnews/asia/china/8888909/China-Google-Earth-spots-huge-unidentified-structures-in-Gobi-desert.html",
182181
"abstractNote": "Vast, unidentified, structures have been spotted by satellites in the barren Gobi desert, raising questions about what China might be building in a region it uses for its military, space and nuclear programmes.",
182+
"libraryCatalog": "www.telegraph.co.uk",
183+
"accessDate": "CURRENT_TIMESTAMP",
183184
"section": "worldnews",
184185
"callNumber": "8888909",
185-
"accessDate": "CURRENT_TIMESTAMP",
186-
"libraryCatalog": "www.telegraph.co.uk",
187-
"shortTitle": "China",
188-
"publisher": "Telegraph Media Group Limited"
186+
"publisher": "Telegraph Media Group Limited",
187+
"shortTitle": "China"
189188
}
190189
]
191190
},
@@ -215,16 +214,15 @@ var testCases = [
215214
"title": "Snapshot"
216215
}
217216
],
218-
"itemID": "http://blogs.telegraph.co.uk/news/cristinaodone/100141152/putin-wins-the-russian-election-but-it-wont-be-long-before-hes-in-trouble/",
219217
"title": "Putin 'wins' the Russian election. But it won't be long before he's in trouble",
220218
"publicationTitle": "News - Telegraph Blogs",
221-
"date": "2012-03-04",
222219
"url": "http://blogs.telegraph.co.uk/news/cristinaodone/100141152/putin-wins-the-russian-election-but-it-wont-be-long-before-hes-in-trouble/",
223220
"abstractNote": "Vladimir Putin looks set to win the Russian elections – no surprise there, then. Few, even in Russia, believe that today's election is anything bu",
224-
"accessDate": "CURRENT_TIMESTAMP",
225221
"libraryCatalog": "blogs.telegraph.co.uk",
222+
"accessDate": "CURRENT_TIMESTAMP",
226223
"section": "Blogs",
227224
"callNumber": "100141152",
225+
"date": "2012-03-04 18:32:08",
228226
"publisher": "Telegraph Media Group Limited"
229227
}
230228
]

0 commit comments

Comments
 (0)