Skip to content

Commit e37d1fa

Browse files
committed
add functionality for Spiegel Print editions
1 parent b6861cd commit e37d1fa

File tree

1 file changed

+69
-6
lines changed

1 file changed

+69
-6
lines changed

Spiegel Online.js

Lines changed: 69 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
"inRepository": true,
1010
"translatorType": 4,
1111
"browserSupport": "gcsib",
12-
"lastUpdated": "2012-05-03 09:08:01"
12+
"lastUpdated": "2012-08-24 00:04:03"
1313
}
1414

1515
/*
@@ -41,8 +41,12 @@ http://www.spiegel.de/international/europe/0,1518,700530,00.html
4141
function detectWeb(doc, url) {
4242

4343
var spiegel_article_XPath = ".//div[@id='spArticleFunctions']";
44-
45-
if (doc.evaluate(spiegel_article_XPath, doc, null, XPathResult.ANY_TYPE, null).iterateNext() ){
44+
//the print edition is a magazine. Since the online edition is updated constantly it
45+
//makes sense to treat it like a newspaper.
46+
if (url.match(/\/print\//) && ZU.xpathText(doc, spiegel_article_XPath)){
47+
return "magazineArticle";
48+
}
49+
else if (doc.evaluate(spiegel_article_XPath, doc, null, XPathResult.ANY_TYPE, null).iterateNext() ){
4650
//Zotero.debug("newspaperArticle");
4751
return "newspaperArticle";
4852
} else if (doc.location.href.match(/^http\:\/\/www\.spiegel\.de\/thema/)){
@@ -61,8 +65,13 @@ function detectWeb(doc, url) {
6165
}
6266

6367
function scrape(doc, url) {
64-
65-
var newItem = new Zotero.Item("newspaperArticle");
68+
69+
if (detectWeb(doc, url)=="magazineArticle") {
70+
var newItem = new Zotero.Item("magazineArticle");
71+
}
72+
else{
73+
var newItem = new Zotero.Item("newspaperArticle");
74+
}
6675
newItem.url = doc.location.href;
6776

6877
// This is for the title
@@ -71,6 +80,8 @@ function scrape(doc, url) {
7180
if (doc.evaluate(title_xPath, doc, null, XPathResult.ANY_TYPE, null).iterateNext() ){
7281
var title = doc.evaluate(title_xPath, doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
7382
newItem.title = title;
83+
} else if (ZU.xpathText(doc, '//div[@id="spArticleColumn"]/h2')) {
84+
newItem.title = ZU.xpathText(doc, '//div[@id="spArticleColumn"]/h2');
7485
} else {
7586
var title = doc.evaluate('//title', doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
7687
title = title.split(" - ")[0];
@@ -137,7 +148,10 @@ function scrape(doc, url) {
137148
newItem.attachments.push({url:printurl, title:doc.title, mimeType:"text/html"});
138149
}
139150

140-
151+
//Ausgabe/Volume für Print
152+
if (ZU.xpathText(doc, '//div[@class="spAssetHdln"]') && newItem.itemType == "magazineArticle"){
153+
newItem.volume = ZU.xpathText(doc, '//div[@class="spAssetHdln"]').match(/(\d+)\/\d{4}/)[1];
154+
}
141155

142156
// Summary
143157
var summary_xPath = ".//p[@id='spIntroTeaser']";
@@ -149,6 +163,7 @@ function scrape(doc, url) {
149163
// Date - sometimes xpath1 doesn't yield anything. Fortunately, there's another possibility...
150164
var date1_xPath = ".//h5[contains(@id, 'ShortDate')]";
151165
var date2_xPath = "//meta[@name='date']";
166+
var date3_xPath = "//div[@id='spShortDate']"
152167
if (doc.evaluate(date1_xPath, doc, null, XPathResult.ANY_TYPE, null).iterateNext() ){
153168
var date= doc.evaluate(date1_xPath, doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
154169
if (date.match('/')) {
@@ -157,7 +172,10 @@ function scrape(doc, url) {
157172
} else if (doc.evaluate(date2_xPath, doc, null, XPathResult.ANY_TYPE, null).iterateNext() ){
158173
var date= doc.evaluate(date2_xPath, doc, null, XPathResult.ANY_TYPE, null).iterateNext().content;
159174
date=date.replace(/(\d\d\d\d)-(\d\d)-(\d\d)/, '$3.$2.$1').replace(/T.+/,"");
175+
} else if (doc.evaluate(date3_xPath, doc, null, XPathResult.ANY_TYPE, null).iterateNext() ){
176+
var date= doc.evaluate(date3_xPath, doc, null, XPathResult.ANY_TYPE, null).iterateNext().textContent;
160177
}
178+
161179
newItem.date = Zotero.Utilities.trim(date);
162180

163181
if (doc.location.href.match(/^http\:\/\/www\.spiegel\.de\/spiegel/)){
@@ -257,6 +275,51 @@ var testCases = [
257275
"type": "web",
258276
"url": "http://www.spiegel.de/international/search/index.html?suchbegriff=Crisis",
259277
"items": "multiple"
278+
},
279+
{
280+
"type": "web",
281+
"url": "http://www.spiegel.de/spiegel/print/d-84789653.html",
282+
"items": [
283+
{
284+
"itemType": "magazineArticle",
285+
"creators": [
286+
{
287+
"firstName": "Alexander",
288+
"lastName": "Neubacher",
289+
"creatorType": "author"
290+
},
291+
{
292+
"firstName": "Conny",
293+
"lastName": "Neumann",
294+
"creatorType": "author"
295+
},
296+
{
297+
"firstName": "Steffen",
298+
"lastName": "Winter",
299+
"creatorType": "author"
300+
}
301+
],
302+
"notes": [],
303+
"tags": [
304+
"DER SPIEGEL"
305+
],
306+
"seeAlso": [],
307+
"attachments": [
308+
{
309+
"title": "DER SPIEGEL 15/2012 - VEB Energiewende",
310+
"mimeType": "application/pdf"
311+
}
312+
],
313+
"url": "http://www.spiegel.de/spiegel/print/d-84789653.html",
314+
"title": "VEB Energiewende",
315+
"volume": "15",
316+
"abstractNote": "Der Atomausstieg wird zur Subventionsmaschine für Industriebosse, Elektrokonzerne und findige Geschäftemacher. Die Kosten und Risiken sollen andere tragen - die Bürger.",
317+
"date": "07.04.2012",
318+
"publicationTitle": "Der Spiegel",
319+
"libraryCatalog": "Spiegel Online",
320+
"accessDate": "CURRENT_TIMESTAMP"
321+
}
322+
]
260323
}
261324
]
262325
/** END TEST CASES **/

0 commit comments

Comments
 (0)