You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Copy file name to clipboardexpand all lines: Kommersant.js
+32-64
Original file line number
Diff line number
Diff line change
@@ -9,7 +9,7 @@
9
9
"inRepository": true,
10
10
"translatorType": 4,
11
11
"browserSupport": "gcsibv",
12
-
"lastUpdated": "2013-03-13 20:49:52"
12
+
"lastUpdated": "2014-01-05 17:14:25"
13
13
}
14
14
15
15
/* FW LINE 57:6869c32952b1 */functionflatten(c){varb=newArray();for(vardinc){vare=c[d];if(einstanceofArray){b=b.concat(flatten(e))}else{b.push(e)}}returnb}varFW={_scrapers:newArray()};FW._Base=function(){this.callHook=function(b,c,e,a){if(typeofthis["hooks"]==="object"){vard=this["hooks"][b];if(typeofd==="function"){d(c,e,a)}}};this.evaluateThing=function(f,e,c){varb=typeoff;if(b==="object"){if(finstanceofArray){vard=this.evaluateThing;vara=f.map(function(g){returnd(g,e,c)});returnflatten(a)}else{returnf.evaluate(e,c)}}else{if(b==="function"){returnf(e,c)}else{returnf}}}};FW.Scraper=function(a){FW._scrapers.push(newFW._Scraper(a))};FW._Scraper=function(a){for(xina){this[x]=a[x]}this._singleFieldNames=["abstractNote","applicationNumber","archive","archiveLocation","artworkMedium","artworkSize","assignee","audioFileType","audioRecordingType","billNumber","blogTitle","bookTitle","callNumber","caseName","code","codeNumber","codePages","codeVolume","committee","company","conferenceName","country","court","date","dateDecided","dateEnacted","dictionaryTitle","distributor","docketNumber","documentNumber","DOI","edition","encyclopediaTitle","episodeNumber","extra","filingDate","firstPage","forumTitle","genre","history","institution","interviewMedium","ISBN","ISSN","issue","issueDate","issuingAuthority","journalAbbreviation","label","language","legalStatus","legislativeBody","letterType","libraryCatalog","manuscriptType","mapType","medium","meetingName","nameOfAct","network","number","numberOfVolumes","numPages","pages","patentNumber","place","postType","presentationType","priorityNumbers","proceedingsTitle","programTitle","programmingLanguage","publicLawNumber","publicationTitle","publisher","references","reportNumber","reportType","reporter","reporterVolume","rights","runningTime","scale","section","series","seriesNumber","seriesText","seriesTitle","session","shortTitle","studio","subject","system","thesisType","title","type","university","url","version","videoRecordingType","volume","websiteTitle","websiteType"];this._makeAttachments=function(p,b,g,t){if(ginstanceofArray){g.forEach(function(k){this._makeAttachments(p,b,k,t)},this)}else{if(typeofg==="object"){varo=g.urls||g.url;varm=g.types||g.type;varf=g.titles||g.title;varq=g.snapshots||g.snapshot;varj=this.evaluateThing(o,p,b);varn=this.evaluateThing(f,p,b);vars=this.evaluateThing(m,p,b);vard=this.evaluateThing(q,p,b);if(!(jinstanceofArray)){j=[j]}for(varlinj){varc=j[l];varh;vare;varr;if(sinstanceofArray){h=s[l]}else{h=s}if(ninstanceofArray){e=n[l]}else{e=n}if(dinstanceofArray){r=d[l]}else{r=d}t.attachments.push({url:c,title:e,type:h,snapshot:r})}}}};if(this.itemTrans!==undefined){this.makeItems=this.itemTrans.makeItems}else{this.makeItems=function(o,b,m,c,l){varq=newZotero.Item(this.itemType);q.url=b;for(varhinthis._singleFieldNames){varn=this._singleFieldNames[h];if(this[n]){varg=this.evaluateThing(this[n],o,b);if(ginstanceofArray){q[n]=g[0]}else{q[n]=g}}}varr=["creators","tags"];for(varfinr){varp=r[f];vard=this.evaluateThing(this[p],o,b);if(d){for(vareind){q[p].push(d[e])}}}this._makeAttachments(o,b,this["attachments"],q);c(q,this,o,b);l([q])}}};FW._Scraper.prototype=newFW._Base;FW.MultiScraper=function(a){FW._scrapers.push(newFW._MultiScraper(a))};FW._MultiScraper=function(a){for(xina){this[x]=a[x]}this._mkSelectItems=function(e,d){varb=newObject;for(varcine){b[d[c]]=e[c]}returnb};this._selectItems=function(d,c,e){varb=newArray();Zotero.selectItems(this._mkSelectItems(d,c),function(f){for(varginf){b.push(g)}e(b)})};this._mkAttachments=function(g,d,f){varb=this.evaluateThing(this["attachments"],g,d);varc=newObject();if(b){for(vareinf){c[f[e]]=b[e]}}returnc};this._makeChoices=function(f,p,c,d,h){if(finstanceofArray){f.forEach(function(k){this._makeTitlesUrls(k,p,c,d,h)},this)}else{if(typeoff==="object"){varm=f.urls||f.url;vare=f.titles||f.title;varn=this.evaluateThing(m,p,c);varj=this.evaluateThing(e,p,c);varl=(jinstanceofArray);if(!(ninstanceofArray)){n=[n]}for(varginn){varb=n[g];varo;if(l){o=j[g]}else{o=j}h.push(b);d.push(o)}}}};this.makeItems=function(j,b,g,c,f){if(this.beforeFilter){vark=this.beforeFilter(j,b);if(k!=b){this.makeItems(j,k,g,c,f);return}}vare=[];varh=[];this._makeChoices(this["choices"],j,b,e,h);vard=this._mkAttachments(j,b,h);this._selectItems(e,h,function(m){if(!m){f([])}else{varl=[];varn=this.itemTrans;Zotero.Utilities.processDocuments(m,function(q){varp=q.documentURI;varo=n;if(o===undefined){o=FW.getScraper(q,p)}if(o===undefined){}else{o.makeItems(q,p,d[p],function(r){l.push(r);c(r,o,q,p)},function(){})}},function(){f(l)})}})}};FW._MultiScraper.prototype=newFW._Base;FW.DelegateTranslator=function(a){returnnewFW._DelegateTranslator(a)};FW._DelegateTranslator=function(a){for(xina){this[x]=a[x]}this._translator=Zotero.loadTranslator(this.translatorType);this._translator.setTranslator(this.translatorId);this.makeItems=function(g,d,b,f,c){vare;Zotero.Utilities.HTTP.doGet(d,function(h){this._translator.setHandler("itemDone",function(k,j){e=j;if(b){j.attachments=b}});if(this.preProcess){h=this.preProcess(h)}this._translator.setString(h);this._translator.translate();f(e)},function(){c([e])})}};FW.DelegateTranslator.prototype=newFW._Scraper;FW._StringMagic=function(){this._filters=newArray();this.addFilter=function(a){this._filters.push(a);returnthis};this.split=function(a){returnthis.addFilter(function(b){returnb.split(a).filter(function(c){return(c!="")})})};this.replace=function(c,b,a){returnthis.addFilter(function(d){if(d.match(c)){returnd.replace(c,b,a)}else{returnd}})};this.prepend=function(a){returnthis.replace(/^/,a)};this.append=function(a){returnthis.replace(/$/,a)};this.remove=function(b,a){returnthis.replace(b,"",a)};this.trim=function(){returnthis.addFilter(function(a){returnZotero.Utilities.trim(a)})};this.trimInternal=function(){returnthis.addFilter(function(a){returnZotero.Utilities.trimInternal(a)})};this.match=function(a,b){if(!b){b=0}returnthis.addFilter(function(d){varc=d.match(a);if(c===undefined||c===null){returnundefined}else{returnc[b]}})};this.cleanAuthor=function(b,a){returnthis.addFilter(function(c){returnZotero.Utilities.cleanAuthor(c,b,a)})};this.key=function(a){returnthis.addFilter(function(b){returnb[a]})};this.capitalizeTitle=function(){if(arguments.length>0&&arguments[0]==true){returnthis.addFilter(function(a){returnZotero.Utilities.capitalizeTitle(a,true)})}else{returnthis.addFilter(function(a){returnZotero.Utilities.capitalizeTitle(a)})}};this.unescapeHTML=function(){returnthis.addFilter(function(a){returnZotero.Utilities.unescapeHTML(a)})};this.unescape=function(){returnthis.addFilter(function(a){returnunescape(a)})};this._applyFilters=function(c,e){for(iinthis._filters){c=flatten(c);c=c.filter(function(a){return((a!==undefined)&&(a!==null))});for(vard=0;d<c.length;d++){try{if((c[d]===undefined)||(c[d]===null)){continue}else{c[d]=this._filters[i](c[d],e)}}catch(b){c[d]=undefined;Zotero.debug("Caught exception "+b+"on filter: "+this._filters[i])}}c=c.filter(function(a){return((a!==undefined)&&(a!==null))})}returnflatten(c)}};FW.PageText=function(){returnnewFW._PageText()};FW._PageText=function(){this._filters=newArray();this.evaluate=function(c){varb=[c.documentElement.innerHTML];b=this._applyFilters(b,c);if(b.length==0){returnfalse}else{returnb}}};FW._PageText.prototype=newFW._StringMagic();FW.Url=function(){returnnewFW._Url()};FW._Url=function(){this._filters=newArray();this.evaluate=function(d,c){varb=[c];b=this._applyFilters(b,d);if(b.length==0){returnfalse}else{returnb}}};FW._Url.prototype=newFW._StringMagic();FW.Xpath=function(a){returnnewFW._Xpath(a)};FW._Xpath=function(a){this._xpath=a;this._filters=newArray();this.text=function(){varb=function(c){if(typeofc==="object"&&c.textContent){returnc.textContent}else{returnc}};this.addFilter(b);returnthis};this.sub=function(b){varc=function(f,e){vard=e.evaluate(b,f,null,XPathResult.ANY_TYPE,null);if(d){returnd.iterateNext()}else{returnundefined}};this.addFilter(c);returnthis};this.evaluate=function(f){vare=f.evaluate(this._xpath,f,null,XPathResult.ANY_TYPE,null);vard=e.resultType;varc=newArray();if(d==XPathResult.STRING_TYPE){c.push(e.stringValue)}else{if(d==XPathResult.ORDERED_NODE_ITERATOR_TYPE||d==XPathResult.UNORDERED_NODE_ITERATOR_TYPE){varb;while((b=e.iterateNext())){c.push(b)}}}c=this._applyFilters(c,f);if(c.length==0){returnfalse}else{returnc}}};FW._Xpath.prototype=newFW._StringMagic();FW.detectWeb=function(e,b){for(varcinFW._scrapers){vard=FW._scrapers[c];varf=d.evaluateThing(d.itemType,e,b);vara=d.evaluateThing(d.detect,e,b);if(a.length>0&&a[0]){returnf}}returnundefined};FW.getScraper=function(b,a){varc=FW.detectWeb(b,a);returnFW._scrapers.filter(function(d){return(d.evaluateThing(d.itemType,b,a)==c)&&(d.evaluateThing(d.detect,b,a))})[0]};FW.doWeb=function(c,a){varb=FW.getScraper(c,a);b.makeItems(c,a,[],function(f,e,g,d){e.callHook("scraperDone",f,g,d);if(!f.title){f.title=""}f.complete()},function(){Zotero.done()});Zotero.wait()};
@@ -40,69 +40,35 @@ function detectWeb(doc, url) {
40
40
}
41
41
functiondoWeb(doc,url){returnFW.doWeb(doc,url);}
42
42
43
-
43
+
//technically these should be (and used to be) different item types and we should account for them, but this at least makes this work again in a basic fashion
detect : FW.Xpath('//div[@class="issue"]/a[contains(text(),"Газета") or contains(text(),"Online") or contains(text(),"FM") or contains(text(),"Уфа")]'),
65
-
title : FW.Xpath('//div[@class="document"]/div[@class="title"]').text().trim(),
title : FW.Xpath('//table[contains(@class,"news_block")]//div[@class="title"]|//div[contains(@class,"news_block")]//div[@class="title"]').text().trim(),
90
-
attachments : [
91
-
{
92
-
url : FW.Url(),
93
-
type : "text/html",
94
-
title : "Kommersant Snapshot"
95
-
}],
96
-
url : FW.Url().replace(/(\?|&)stamp.+/,""),
97
-
date : FW.Xpath('//div[contains(@class,"other_blockname")]/div[@class="f-h"]').text().replace(/^.*(\d{2})\.(\d{2})\.(\d{4}).*$/,"$3-$2-$1")
"abstractNote": "Как стало известно \"Ъ\", глава секретариата комиссии Таможенного союза (КТС) Сергей Глазьев с июля 2012 года может сменить работу: он не получил предложения войти в Евразийскую экономическую комиссию (ЕЭК) со стороны РФ. Действующие сотрудники КТС будут проходить аттестационную комиссию, чтобы попасть в управляющую структуру нового союза. Главная претензия к главе секретариата КТС — \"дефицит идеологии и проблемы с администрированием\": их с российской стороны будут восполнять переводом в ЕЭК сотрудников российских министерств.",
136
-
"date": "2011-11-08",
137
-
"issue": "208 (4749)",
138
-
"publicationTitle": "Коммерсантъ",
101
+
"date": "08.11.2011",
102
+
"issue": "208",
103
+
"pages": "1",
104
+
"publicationTitle": "Газета \"Коммерсантъ\"",
139
105
"title": "В Сергее Глазьеве не хватило евразийского",
140
-
"libraryCatalog": "Kommersant"
106
+
"libraryCatalog": "Kommersant",
107
+
"accessDate": "CURRENT_TIMESTAMP"
141
108
}
142
109
]
143
110
},
@@ -159,9 +126,10 @@ var testCases = [
159
126
],
160
127
"url": "http://www.kommersant.ru/doc/1832739",
161
128
"abstractNote": "За тем, как проходят российские выборы в месте, где административный ресурс по географическим причинам ослаблен, наблюдал корреспондент \"Власти\" Артем Платов.",
0 commit comments