|
2 | 2 | "translatorID": "c59896bc-4beb-43ed-8109-a73a13251828",
|
3 | 3 | "label": "Eastview",
|
4 | 4 | "creator": "Sebastian Karcher",
|
5 |
| - "target": "^https?://dlib\\.eastview\\.com/(search/(advanced|simple)/|browse/(doc|favorites))", |
| 5 | + "target": "^https?://dlib\\.eastview\\.com/(search/(advanced|simple)/|browse/(doc|favorites|issue))", |
6 | 6 | "minVersion": "3.0",
|
7 | 7 | "maxVersion": "",
|
8 | 8 | "priority": 100,
|
9 | 9 | "inRepository": true,
|
10 | 10 | "translatorType": 4,
|
11 | 11 | "browserSupport": "gcsb",
|
12 |
| - "lastUpdated": "2014-08-31 20:21:04" |
| 12 | + "lastUpdated": "2014-09-01 13:01:04" |
13 | 13 | }
|
14 | 14 |
|
15 | 15 | /*
|
|
35 | 35 | ***** END LICENSE BLOCK *****
|
36 | 36 | */
|
37 | 37 | function detectWeb(doc, url) {
|
38 |
| - if (url.search("/search/simple/articles?") != -1 || url.indexOf("/search/advanced/articles") != -1 || url.indexOf("/browse/favorites") != -1) { |
39 |
| - return "multiple"; |
40 |
| - } else { |
41 |
| - return "newspaperArticle" |
42 |
| - } |
| 38 | + if (url.search("/search/simple/articles?") != -1 || url.indexOf("/search/advanced/articles") != -1 || url.search(/browse\/(favorites|issue)/) != -1) { |
| 39 | + if (ZU.xpath(doc, '//td[contains(@class, "title-cell")]/a').length) return "multiple"; |
| 40 | + } else { |
| 41 | + return "newspaperArticle" |
| 42 | + } |
43 | 43 | }
|
44 | 44 |
|
45 | 45 | var typeMap = {
|
46 |
| - "Argumenty i fakty": "magazineArticle", |
47 |
| - "Argumenty nedeli": "magazineArticle", |
48 |
| - "Ekonomika i zhizn'": "magazineArticle", |
49 |
| - "Ekspert": "magazineArticle", |
50 |
| - "Izvestiia": "newspaperArticle", |
51 |
| - "Kommersant. Daily": "newspaperArticle", |
52 |
| - "Komsomol'skaia pravda": "newspaperArticle", |
53 |
| - "Kul'tura": "magazineArticle", |
54 |
| - "Literaturnaia gazeta": "magazineArticle", |
55 |
| - "Moscow Times, The": "newspaperArticle", |
56 |
| - "Moskovskaia pravda": "newspaperArticle", |
57 |
| - "Moskovskii komsomolets": "newspaperArticle", |
58 |
| - "New Times, The": "magazineArticle", |
59 |
| - "Nezavisimaia gazeta": "newspaperArticle", |
60 |
| - "Novaia gazeta": "newspaperArticle", |
61 |
| - "Novye izvestiia": "newspaperArticle", |
62 |
| - "Ogonek": "magazineArticle", |
63 |
| - "Pravda": "newspaperArticle", |
64 |
| - "President": "magazineArticle", |
65 |
| - "Profil'": "magazineArticle", |
66 |
| - "RBK Daily": "newspaperArticle", |
67 |
| - "Rossiiskaia gazeta": "newspaperArticle", |
68 |
| - "Rossiiskie vesti": "newspaperArticle", |
69 |
| - "Russkii reporter": "magazineArticle", |
70 |
| - "Sankt-Peterburgskie vedomosti": "newspaperArticle", |
71 |
| - "Slovo": "magazineArticle", |
72 |
| - "Sovetskaia Rossiia": "newspaperArticle", |
73 |
| - "Trud": "newspaperArticle", |
74 |
| - "Vecherniaia Moskva": "newspaperArticle", |
75 |
| - "Vedomosti": "newspaperArticle", |
76 |
| - "Zavtra": "newspaperArticle" |
| 46 | + "Argumenty i fakty": "magazineArticle", |
| 47 | + "Argumenty nedeli": "magazineArticle", |
| 48 | + "Ekonomika i zhizn'": "magazineArticle", |
| 49 | + "Ekspert": "magazineArticle", |
| 50 | + "Izvestiia": "newspaperArticle", |
| 51 | + "Kommersant. Daily": "newspaperArticle", |
| 52 | + "Komsomol'skaia pravda": "newspaperArticle", |
| 53 | + "Kul'tura": "magazineArticle", |
| 54 | + "Literaturnaia gazeta": "magazineArticle", |
| 55 | + "Moscow Times, The": "newspaperArticle", |
| 56 | + "Moskovskaia pravda": "newspaperArticle", |
| 57 | + "Moskovskii komsomolets": "newspaperArticle", |
| 58 | + "New Times, The": "magazineArticle", |
| 59 | + "Nezavisimaia gazeta": "newspaperArticle", |
| 60 | + "Novaia gazeta": "newspaperArticle", |
| 61 | + "Novye izvestiia": "newspaperArticle", |
| 62 | + "Ogonek": "magazineArticle", |
| 63 | + "Pravda": "newspaperArticle", |
| 64 | + "President": "magazineArticle", |
| 65 | + "Profil'": "magazineArticle", |
| 66 | + "RBK Daily": "newspaperArticle", |
| 67 | + "Rossiiskaia gazeta": "newspaperArticle", |
| 68 | + "Rossiiskie vesti": "newspaperArticle", |
| 69 | + "Russkii reporter": "magazineArticle", |
| 70 | + "Sankt-Peterburgskie vedomosti": "newspaperArticle", |
| 71 | + "Slovo": "magazineArticle", |
| 72 | + "Sovetskaia Rossiia": "newspaperArticle", |
| 73 | + "Trud": "newspaperArticle", |
| 74 | + "Vecherniaia Moskva": "newspaperArticle", |
| 75 | + "Vedomosti": "newspaperArticle", |
| 76 | + "Zavtra": "newspaperArticle" |
77 | 77 | }
|
78 | 78 |
|
79 | 79 | function permaLink(URL) {
|
80 |
| - var id = URL.match(/id=(\d+)/); |
81 |
| - if (id) return "http://dlib.eastview.com/browse/doc/" + id[1]; |
82 |
| - else return URL |
| 80 | + var id = URL.match(/id=(\d+)/); |
| 81 | + if (id) return "http://dlib.eastview.com/browse/doc/" + id[1]; |
| 82 | + else return URL |
83 | 83 | }
|
84 | 84 |
|
85 | 85 |
|
86 | 86 | function scrape(doc, url) {
|
87 |
| - Z.debug(url) |
88 |
| - var item = new Zotero.Item("newspaperArticle"); |
89 |
| - var publication = ZU.xpathText(doc, '//a[@class="path" and contains(@href, "browse/publication")]'); |
90 |
| - item.publication = publication; |
91 |
| - var voliss = ZU.xpathText(doc, '//a[@class="path" and contains(@href, "browse/issue/")]'); |
92 |
| - if (voliss) { |
93 |
| - var issue = voliss.match(/No\. (\d+)/); |
94 |
| - if (issue) item.issue = issue[1]; |
95 |
| - var volume = voliss.match(/Vol\. (\d+)/); |
96 |
| - if (volume) item.volume = volume[1]; |
97 |
| - } |
98 |
| - var database = ZU.xpathText(doc, '//a[@class="path" and contains(@href, "browse/udb")]'); |
99 |
| - if (database) item.libraryCatalog = database.replace(/\(.+\)/, "") + "(Eastview)"; |
100 |
| - if (ZU.xpathText(doc, '//table[@id="metatable"]')) { |
101 |
| - //we have the metadata in a table |
102 |
| - var title = ZU.xpathText(doc, '//table[@id="metatable"]//td[@class="hdr" and contains(text(), "Article Title")]/following-sibling::td[@class="val"]'); |
103 |
| - var source = ZU.xpathText(doc, '//table[@id="metatable"]//td[@class="hdr" and contains(text(), "Source")]/following-sibling::td[@class="val"]'); |
104 |
| - if (source) { |
105 |
| - var date = source.match(/(January|February|March|April|May|Juni|July|August|September|October|November|December)\s+(\d{1,2},\s+)?\d{4}/); |
106 |
| - if (date) item.date = ZU.trimInternal(date[0]); |
107 |
| - var pages = source.match(/page\(s\): (\d+(?:-\d+)?)/); |
108 |
| - if (pages) item.page = pages[1] |
109 |
| - } |
110 |
| - var author = ZU.xpathText(doc, '//table[@id="metatable"]//td[@class="hdr" and contains(text(), "Author(s)")]/following-sibling::td[@class="val"]'); |
111 |
| - if (author) { |
112 |
| - //Z.debug(author) |
113 |
| - authors = author.trim().split(/\s*,\s*/); |
114 |
| - for (var i in authors) { |
115 |
| - item.creators.push(ZU.cleanAuthor(authors[i], "author")) |
116 |
| - } |
117 |
| - } |
| 87 | + Z.debug(url) |
| 88 | + var item = new Zotero.Item("newspaperArticle"); |
| 89 | + var publication = ZU.xpathText(doc, '//a[@class="path" and contains(@href, "browse/publication")]'); |
| 90 | + item.publication = publication; |
| 91 | + var voliss = ZU.xpathText(doc, '//a[@class="path" and contains(@href, "browse/issue/")]'); |
| 92 | + if (voliss) { |
| 93 | + var issue = voliss.match(/No\. (\d+)/); |
| 94 | + if (issue) item.issue = issue[1]; |
| 95 | + var volume = voliss.match(/Vol\. (\d+)/); |
| 96 | + if (volume) item.volume = volume[1]; |
| 97 | + } |
| 98 | + var database = ZU.xpathText(doc, '//a[@class="path" and contains(@href, "browse/udb")]'); |
| 99 | + if (database) item.libraryCatalog = database.replace(/\(.+\)/, "") + "(Eastview)"; |
| 100 | + if (doc.getElementById('metatable')) { |
| 101 | + //we have the metadata in a table |
| 102 | + var metatable = doc.getElementById('metatable'); |
| 103 | + var title = ZU.xpathText(metatable, './/td[@class="hdr" and contains(text(), "Article Title")]/following-sibling::td[@class="val"]'); |
| 104 | + var source = ZU.xpathText(metatable, './/td[@class="hdr" and contains(text(), "Source")]/following-sibling::td[@class="val"]'); |
| 105 | + if (source) { |
| 106 | + var date = source.match(/(January|February|March|April|May|Juni|July|August|September|October|November|December)\s+(\d{1,2},\s+)?\d{4}/); |
| 107 | + if (date) item.date = ZU.trimInternal(date[0]); |
| 108 | + var pages = source.match(/page\(s\): (\d+(?:-\d+)?)/); |
| 109 | + if (pages) item.page = pages[1] |
| 110 | + } |
| 111 | + var author = ZU.xpathText(metatable, './/td[@class="hdr" and contains(text(), "Author(s)")]/following-sibling::td[@class="val"]'); |
| 112 | + if (author) { |
| 113 | + //Z.debug(author) |
| 114 | + authors = author.trim().split(/\s*,\s*/); |
| 115 | + for (var i=0; i<authors.length; i++) { |
| 116 | + item.creators.push(ZU.cleanAuthor(authors[i], "author")) |
| 117 | + } |
| 118 | + } |
| 119 | + item.place = ZU.xpathText(doc, '//table[@id="metatable"]//td[@class="hdr" and contains(text(), "Place of Publication")]/following-sibling::td'); |
| 120 | + } else { |
| 121 | + var title = ZU.xpathText(doc, '//div[@class="change_font"]'); |
| 122 | + //the "old" page format. We have very little structure here, doing the best we can. |
| 123 | + var header = ZU.xpathText(doc, '//tbody/tr/td/ul'); |
| 124 | + Z.debug(header); |
| 125 | + var date = header.match(/Date:\s*(\d{2}-\d{2}-\d{2,4})/); |
| 126 | + if (date) item.date = date[1]; |
| 127 | + } |
118 | 128 |
|
119 |
| - item.place = ZU.xpathText(doc, '//table[@id="metatable"]//td[@class="hdr" and contains(text(), "Place of Publication")]/following-sibling::td'); |
120 |
| - } else { |
121 |
| - //the "old" page format. We have very little structure here, doing the best we can. |
122 |
| - var header = ZU.xpathText(doc, '//tbody/tr/td/ul'); |
123 |
| - Z.debug(header); |
124 |
| - var date = header.match(/Date:\s*(\d{2}-\d{2}-\d{2,4})/); |
125 |
| - if (date) item.date = date[1]; |
126 |
| - var title = ZU.xpathText(doc, '//div[@class="change_font"]'); |
127 |
| - } |
128 |
| - if (title && title == title.toUpperCase()) { |
129 |
| - title = ZU.capitalizeTitle(title.toLowerCase(), true); |
130 |
| - } |
131 |
| - //see if we have a match for item type; default to newspaper otherwise. |
132 |
| - var itemType = typeMap[item.publication]; |
133 |
| - if (itemType) item.itemType = itemType; |
134 |
| - item.attachments.push({ |
135 |
| - document: doc, |
136 |
| - title: "Eastview Fulltext Snapshot", |
137 |
| - mimeType: "text/html" |
138 |
| - }) |
139 |
| - item.title = title; |
140 |
| - //sometimes items actually don't have a title: use the publication title instead. |
141 |
| - if (!item.title) item.title = item.publication; |
142 |
| - item.complete(); |
| 129 | + //see if we have a match for item type; default to newspaper otherwise. |
| 130 | + var itemType = typeMap[item.publication]; |
| 131 | + if (itemType) item.itemType = itemType; |
| 132 | + item.attachments.push({ |
| 133 | + document: doc, |
| 134 | + title: "Eastview Fulltext Snapshot", |
| 135 | + mimeType: "text/html" |
| 136 | + }); |
| 137 | + if (title && title == title.toUpperCase()) { |
| 138 | + title = ZU.capitalizeTitle(title, true); |
| 139 | + } |
| 140 | + item.title = title; |
| 141 | + //sometimes items actually don't have a title: use the publication title instead. |
| 142 | + if (!item.title) item.title = item.publication; |
| 143 | + item.complete(); |
143 | 144 |
|
144 | 145 | }
|
145 | 146 |
|
@@ -177,36 +178,36 @@ function scrapeSearch(doc, url) {
|
177 | 178 |
|
178 | 179 |
|
179 | 180 | function doWeb(doc, url) {
|
180 |
| - var articles = new Array(); |
181 |
| - var items = {}; |
182 |
| - if (detectWeb(doc, url) == "multiple") { |
183 |
| - var titles = ZU.xpath(doc, '//td[contains(@class, "title-cell")]/a'); |
184 |
| - //var number = ZU.xpath(doc, '//td[contains(@class, "check-cell")]/following-sibling::td[1]'); |
185 |
| - for (i = 0; i < titles.length; i++) { |
186 |
| - items[ZU.xpathText(titles[i], './@href')] = titles[i].textContent.trim(); |
187 |
| - } |
188 |
| - Zotero.selectItems(items, function(items) { |
189 |
| - if (!items) { |
190 |
| - return true; |
191 |
| - } |
192 |
| - for (i in items) { |
193 |
| - /* For scraping search table |
| 181 | + var articles = new Array(); |
| 182 | + var items = {}; |
| 183 | + if (detectWeb(doc, url) == "multiple") { |
| 184 | + var titles = ZU.xpath(doc, '//td[contains(@class, "title-cell")]/a'); |
| 185 | + //var number = ZU.xpath(doc, '//td[contains(@class, "check-cell")]/following-sibling::td[1]'); |
| 186 | + for (var i = 0; i < titles.length; i++) { |
| 187 | + items[titles[i].href] = titles[i].textContent.trim(); |
| 188 | + } |
| 189 | + Zotero.selectItems(items, function(items) { |
| 190 | + if (!items) { |
| 191 | + return true; |
| 192 | + } |
| 193 | + for (var i in items) { |
| 194 | + /* For scraping search table |
194 | 195 | var xpath = '//tr[td[text()="' + i + '"]]'
|
195 | 196 | var node = ZU.xpath(doc, xpath);
|
196 | 197 | scrapeSearch(node, url); */
|
197 |
| - articles.push(permaLink(i)) |
198 |
| - } |
199 |
| - ZU.processDocuments(articles, scrape) |
200 |
| - }); |
201 |
| - } else { |
202 |
| - if (url.search(/doc\/\d+/) != -1) { |
203 |
| - scrape(doc, url); |
204 |
| - } |
205 |
| - //always scrape from the permalink page, which has extra publication info at the top |
206 |
| - else { |
207 |
| - ZU.processDocuments(permaLink(url), scrape); |
208 |
| - } |
209 |
| - } |
| 198 | + articles.push(permaLink(i)) |
| 199 | + } |
| 200 | + ZU.processDocuments(articles, scrape) |
| 201 | + }); |
| 202 | + } else { |
| 203 | + if (url.search(/doc\/\d+/) != -1) { |
| 204 | + scrape(doc, url); |
| 205 | + } |
| 206 | + //always scrape from the permalink page, which has extra publication info at the top |
| 207 | + else { |
| 208 | + ZU.processDocuments(permaLink(url), scrape); |
| 209 | + } |
| 210 | + } |
210 | 211 | }/** BEGIN TEST CASES **/
|
211 | 212 | var testCases = [
|
212 | 213 | {
|
|
0 commit comments