Skip to content

Commit 821eda1

Browse files
committed
touch-ups to Eastview, courtesy of @aurimasv
1 parent 248ddc2 commit 821eda1

File tree

1 file changed

+124
-123
lines changed

1 file changed

+124
-123
lines changed

Eastview.js

+124-123
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
"translatorID": "c59896bc-4beb-43ed-8109-a73a13251828",
33
"label": "Eastview",
44
"creator": "Sebastian Karcher",
5-
"target": "^https?://dlib\\.eastview\\.com/(search/(advanced|simple)/|browse/(doc|favorites))",
5+
"target": "^https?://dlib\\.eastview\\.com/(search/(advanced|simple)/|browse/(doc|favorites|issue))",
66
"minVersion": "3.0",
77
"maxVersion": "",
88
"priority": 100,
99
"inRepository": true,
1010
"translatorType": 4,
1111
"browserSupport": "gcsb",
12-
"lastUpdated": "2014-08-31 20:21:04"
12+
"lastUpdated": "2014-09-01 13:01:04"
1313
}
1414

1515
/*
@@ -35,111 +35,112 @@
3535
***** END LICENSE BLOCK *****
3636
*/
3737
function detectWeb(doc, url) {
38-
if (url.search("/search/simple/articles?") != -1 || url.indexOf("/search/advanced/articles") != -1 || url.indexOf("/browse/favorites") != -1) {
39-
return "multiple";
40-
} else {
41-
return "newspaperArticle"
42-
}
38+
if (url.search("/search/simple/articles?") != -1 || url.indexOf("/search/advanced/articles") != -1 || url.search(/browse\/(favorites|issue)/) != -1) {
39+
if (ZU.xpath(doc, '//td[contains(@class, "title-cell")]/a').length) return "multiple";
40+
} else {
41+
return "newspaperArticle"
42+
}
4343
}
4444

4545
var typeMap = {
46-
"Argumenty i fakty": "magazineArticle",
47-
"Argumenty nedeli": "magazineArticle",
48-
"Ekonomika i zhizn'": "magazineArticle",
49-
"Ekspert": "magazineArticle",
50-
"Izvestiia": "newspaperArticle",
51-
"Kommersant. Daily": "newspaperArticle",
52-
"Komsomol'skaia pravda": "newspaperArticle",
53-
"Kul'tura": "magazineArticle",
54-
"Literaturnaia gazeta": "magazineArticle",
55-
"Moscow Times, The": "newspaperArticle",
56-
"Moskovskaia pravda": "newspaperArticle",
57-
"Moskovskii komsomolets": "newspaperArticle",
58-
"New Times, The": "magazineArticle",
59-
"Nezavisimaia gazeta": "newspaperArticle",
60-
"Novaia gazeta": "newspaperArticle",
61-
"Novye izvestiia": "newspaperArticle",
62-
"Ogonek": "magazineArticle",
63-
"Pravda": "newspaperArticle",
64-
"President": "magazineArticle",
65-
"Profil'": "magazineArticle",
66-
"RBK Daily": "newspaperArticle",
67-
"Rossiiskaia gazeta": "newspaperArticle",
68-
"Rossiiskie vesti": "newspaperArticle",
69-
"Russkii reporter": "magazineArticle",
70-
"Sankt-Peterburgskie vedomosti": "newspaperArticle",
71-
"Slovo": "magazineArticle",
72-
"Sovetskaia Rossiia": "newspaperArticle",
73-
"Trud": "newspaperArticle",
74-
"Vecherniaia Moskva": "newspaperArticle",
75-
"Vedomosti": "newspaperArticle",
76-
"Zavtra": "newspaperArticle"
46+
"Argumenty i fakty": "magazineArticle",
47+
"Argumenty nedeli": "magazineArticle",
48+
"Ekonomika i zhizn'": "magazineArticle",
49+
"Ekspert": "magazineArticle",
50+
"Izvestiia": "newspaperArticle",
51+
"Kommersant. Daily": "newspaperArticle",
52+
"Komsomol'skaia pravda": "newspaperArticle",
53+
"Kul'tura": "magazineArticle",
54+
"Literaturnaia gazeta": "magazineArticle",
55+
"Moscow Times, The": "newspaperArticle",
56+
"Moskovskaia pravda": "newspaperArticle",
57+
"Moskovskii komsomolets": "newspaperArticle",
58+
"New Times, The": "magazineArticle",
59+
"Nezavisimaia gazeta": "newspaperArticle",
60+
"Novaia gazeta": "newspaperArticle",
61+
"Novye izvestiia": "newspaperArticle",
62+
"Ogonek": "magazineArticle",
63+
"Pravda": "newspaperArticle",
64+
"President": "magazineArticle",
65+
"Profil'": "magazineArticle",
66+
"RBK Daily": "newspaperArticle",
67+
"Rossiiskaia gazeta": "newspaperArticle",
68+
"Rossiiskie vesti": "newspaperArticle",
69+
"Russkii reporter": "magazineArticle",
70+
"Sankt-Peterburgskie vedomosti": "newspaperArticle",
71+
"Slovo": "magazineArticle",
72+
"Sovetskaia Rossiia": "newspaperArticle",
73+
"Trud": "newspaperArticle",
74+
"Vecherniaia Moskva": "newspaperArticle",
75+
"Vedomosti": "newspaperArticle",
76+
"Zavtra": "newspaperArticle"
7777
}
7878

7979
function permaLink(URL) {
80-
var id = URL.match(/id=(\d+)/);
81-
if (id) return "http://dlib.eastview.com/browse/doc/" + id[1];
82-
else return URL
80+
var id = URL.match(/id=(\d+)/);
81+
if (id) return "http://dlib.eastview.com/browse/doc/" + id[1];
82+
else return URL
8383
}
8484

8585

8686
function scrape(doc, url) {
87-
Z.debug(url)
88-
var item = new Zotero.Item("newspaperArticle");
89-
var publication = ZU.xpathText(doc, '//a[@class="path" and contains(@href, "browse/publication")]');
90-
item.publication = publication;
91-
var voliss = ZU.xpathText(doc, '//a[@class="path" and contains(@href, "browse/issue/")]');
92-
if (voliss) {
93-
var issue = voliss.match(/No\. (\d+)/);
94-
if (issue) item.issue = issue[1];
95-
var volume = voliss.match(/Vol\. (\d+)/);
96-
if (volume) item.volume = volume[1];
97-
}
98-
var database = ZU.xpathText(doc, '//a[@class="path" and contains(@href, "browse/udb")]');
99-
if (database) item.libraryCatalog = database.replace(/\(.+\)/, "") + "(Eastview)";
100-
if (ZU.xpathText(doc, '//table[@id="metatable"]')) {
101-
//we have the metadata in a table
102-
var title = ZU.xpathText(doc, '//table[@id="metatable"]//td[@class="hdr" and contains(text(), "Article Title")]/following-sibling::td[@class="val"]');
103-
var source = ZU.xpathText(doc, '//table[@id="metatable"]//td[@class="hdr" and contains(text(), "Source")]/following-sibling::td[@class="val"]');
104-
if (source) {
105-
var date = source.match(/(January|February|March|April|May|Juni|July|August|September|October|November|December)\s+(\d{1,2},\s+)?\d{4}/);
106-
if (date) item.date = ZU.trimInternal(date[0]);
107-
var pages = source.match(/page\(s\): (\d+(?:-\d+)?)/);
108-
if (pages) item.page = pages[1]
109-
}
110-
var author = ZU.xpathText(doc, '//table[@id="metatable"]//td[@class="hdr" and contains(text(), "Author(s)")]/following-sibling::td[@class="val"]');
111-
if (author) {
112-
//Z.debug(author)
113-
authors = author.trim().split(/\s*,\s*/);
114-
for (var i in authors) {
115-
item.creators.push(ZU.cleanAuthor(authors[i], "author"))
116-
}
117-
}
87+
Z.debug(url)
88+
var item = new Zotero.Item("newspaperArticle");
89+
var publication = ZU.xpathText(doc, '//a[@class="path" and contains(@href, "browse/publication")]');
90+
item.publication = publication;
91+
var voliss = ZU.xpathText(doc, '//a[@class="path" and contains(@href, "browse/issue/")]');
92+
if (voliss) {
93+
var issue = voliss.match(/No\. (\d+)/);
94+
if (issue) item.issue = issue[1];
95+
var volume = voliss.match(/Vol\. (\d+)/);
96+
if (volume) item.volume = volume[1];
97+
}
98+
var database = ZU.xpathText(doc, '//a[@class="path" and contains(@href, "browse/udb")]');
99+
if (database) item.libraryCatalog = database.replace(/\(.+\)/, "") + "(Eastview)";
100+
if (doc.getElementById('metatable')) {
101+
//we have the metadata in a table
102+
var metatable = doc.getElementById('metatable');
103+
var title = ZU.xpathText(metatable, './/td[@class="hdr" and contains(text(), "Article Title")]/following-sibling::td[@class="val"]');
104+
var source = ZU.xpathText(metatable, './/td[@class="hdr" and contains(text(), "Source")]/following-sibling::td[@class="val"]');
105+
if (source) {
106+
var date = source.match(/(January|February|March|April|May|Juni|July|August|September|October|November|December)\s+(\d{1,2},\s+)?\d{4}/);
107+
if (date) item.date = ZU.trimInternal(date[0]);
108+
var pages = source.match(/page\(s\): (\d+(?:-\d+)?)/);
109+
if (pages) item.page = pages[1]
110+
}
111+
var author = ZU.xpathText(metatable, './/td[@class="hdr" and contains(text(), "Author(s)")]/following-sibling::td[@class="val"]');
112+
if (author) {
113+
//Z.debug(author)
114+
authors = author.trim().split(/\s*,\s*/);
115+
for (var i=0; i<authors.length; i++) {
116+
item.creators.push(ZU.cleanAuthor(authors[i], "author"))
117+
}
118+
}
119+
item.place = ZU.xpathText(doc, '//table[@id="metatable"]//td[@class="hdr" and contains(text(), "Place of Publication")]/following-sibling::td');
120+
} else {
121+
var title = ZU.xpathText(doc, '//div[@class="change_font"]');
122+
//the "old" page format. We have very little structure here, doing the best we can.
123+
var header = ZU.xpathText(doc, '//tbody/tr/td/ul');
124+
Z.debug(header);
125+
var date = header.match(/Date:\s*(\d{2}-\d{2}-\d{2,4})/);
126+
if (date) item.date = date[1];
127+
}
118128

119-
item.place = ZU.xpathText(doc, '//table[@id="metatable"]//td[@class="hdr" and contains(text(), "Place of Publication")]/following-sibling::td');
120-
} else {
121-
//the "old" page format. We have very little structure here, doing the best we can.
122-
var header = ZU.xpathText(doc, '//tbody/tr/td/ul');
123-
Z.debug(header);
124-
var date = header.match(/Date:\s*(\d{2}-\d{2}-\d{2,4})/);
125-
if (date) item.date = date[1];
126-
var title = ZU.xpathText(doc, '//div[@class="change_font"]');
127-
}
128-
if (title && title == title.toUpperCase()) {
129-
title = ZU.capitalizeTitle(title.toLowerCase(), true);
130-
}
131-
//see if we have a match for item type; default to newspaper otherwise.
132-
var itemType = typeMap[item.publication];
133-
if (itemType) item.itemType = itemType;
134-
item.attachments.push({
135-
document: doc,
136-
title: "Eastview Fulltext Snapshot",
137-
mimeType: "text/html"
138-
})
139-
item.title = title;
140-
//sometimes items actually don't have a title: use the publication title instead.
141-
if (!item.title) item.title = item.publication;
142-
item.complete();
129+
//see if we have a match for item type; default to newspaper otherwise.
130+
var itemType = typeMap[item.publication];
131+
if (itemType) item.itemType = itemType;
132+
item.attachments.push({
133+
document: doc,
134+
title: "Eastview Fulltext Snapshot",
135+
mimeType: "text/html"
136+
});
137+
if (title && title == title.toUpperCase()) {
138+
title = ZU.capitalizeTitle(title, true);
139+
}
140+
item.title = title;
141+
//sometimes items actually don't have a title: use the publication title instead.
142+
if (!item.title) item.title = item.publication;
143+
item.complete();
143144

144145
}
145146

@@ -177,36 +178,36 @@ function scrapeSearch(doc, url) {
177178

178179

179180
function doWeb(doc, url) {
180-
var articles = new Array();
181-
var items = {};
182-
if (detectWeb(doc, url) == "multiple") {
183-
var titles = ZU.xpath(doc, '//td[contains(@class, "title-cell")]/a');
184-
//var number = ZU.xpath(doc, '//td[contains(@class, "check-cell")]/following-sibling::td[1]');
185-
for (i = 0; i < titles.length; i++) {
186-
items[ZU.xpathText(titles[i], './@href')] = titles[i].textContent.trim();
187-
}
188-
Zotero.selectItems(items, function(items) {
189-
if (!items) {
190-
return true;
191-
}
192-
for (i in items) {
193-
/* For scraping search table
181+
var articles = new Array();
182+
var items = {};
183+
if (detectWeb(doc, url) == "multiple") {
184+
var titles = ZU.xpath(doc, '//td[contains(@class, "title-cell")]/a');
185+
//var number = ZU.xpath(doc, '//td[contains(@class, "check-cell")]/following-sibling::td[1]');
186+
for (var i = 0; i < titles.length; i++) {
187+
items[titles[i].href] = titles[i].textContent.trim();
188+
}
189+
Zotero.selectItems(items, function(items) {
190+
if (!items) {
191+
return true;
192+
}
193+
for (var i in items) {
194+
/* For scraping search table
194195
var xpath = '//tr[td[text()="' + i + '"]]'
195196
var node = ZU.xpath(doc, xpath);
196197
scrapeSearch(node, url); */
197-
articles.push(permaLink(i))
198-
}
199-
ZU.processDocuments(articles, scrape)
200-
});
201-
} else {
202-
if (url.search(/doc\/\d+/) != -1) {
203-
scrape(doc, url);
204-
}
205-
//always scrape from the permalink page, which has extra publication info at the top
206-
else {
207-
ZU.processDocuments(permaLink(url), scrape);
208-
}
209-
}
198+
articles.push(permaLink(i))
199+
}
200+
ZU.processDocuments(articles, scrape)
201+
});
202+
} else {
203+
if (url.search(/doc\/\d+/) != -1) {
204+
scrape(doc, url);
205+
}
206+
//always scrape from the permalink page, which has extra publication info at the top
207+
else {
208+
ZU.processDocuments(permaLink(url), scrape);
209+
}
210+
}
210211
}/** BEGIN TEST CASES **/
211212
var testCases = [
212213
{

0 commit comments

Comments
 (0)