9
9
"inRepository" : true ,
10
10
"translatorType" : 4 ,
11
11
"browserSupport" : "gcsib" ,
12
- "lastUpdated" : "2012-05-03 09:08:01 "
12
+ "lastUpdated" : "2012-08-24 00:04:03 "
13
13
}
14
14
15
15
/*
@@ -41,8 +41,12 @@ http://www.spiegel.de/international/europe/0,1518,700530,00.html
41
41
function detectWeb ( doc , url ) {
42
42
43
43
var spiegel_article_XPath = ".//div[@id='spArticleFunctions']" ;
44
-
45
- if ( doc . evaluate ( spiegel_article_XPath , doc , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ) {
44
+ //the print edition is a magazine. Since the online edition is updated constantly it
45
+ //makes sense to treat it like a newspaper.
46
+ if ( url . match ( / \/ p r i n t \/ / ) && ZU . xpathText ( doc , spiegel_article_XPath ) ) {
47
+ return "magazineArticle" ;
48
+ }
49
+ else if ( doc . evaluate ( spiegel_article_XPath , doc , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ) {
46
50
//Zotero.debug("newspaperArticle");
47
51
return "newspaperArticle" ;
48
52
} else if ( doc . location . href . match ( / ^ h t t p \: \/ \/ w w w \. s p i e g e l \. d e \/ t h e m a / ) ) {
@@ -61,8 +65,13 @@ function detectWeb(doc, url) {
61
65
}
62
66
63
67
function scrape ( doc , url ) {
64
-
65
- var newItem = new Zotero . Item ( "newspaperArticle" ) ;
68
+
69
+ if ( detectWeb ( doc , url ) == "magazineArticle" ) {
70
+ var newItem = new Zotero . Item ( "magazineArticle" ) ;
71
+ }
72
+ else {
73
+ var newItem = new Zotero . Item ( "newspaperArticle" ) ;
74
+ }
66
75
newItem . url = doc . location . href ;
67
76
68
77
// This is for the title
@@ -71,6 +80,8 @@ function scrape(doc, url) {
71
80
if ( doc . evaluate ( title_xPath , doc , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ) {
72
81
var title = doc . evaluate ( title_xPath , doc , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . textContent ;
73
82
newItem . title = title ;
83
+ } else if ( ZU . xpathText ( doc , '//div[@id="spArticleColumn"]/h2' ) ) {
84
+ newItem . title = ZU . xpathText ( doc , '//div[@id="spArticleColumn"]/h2' ) ;
74
85
} else {
75
86
var title = doc . evaluate ( '//title' , doc , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . textContent ;
76
87
title = title . split ( " - " ) [ 0 ] ;
@@ -137,7 +148,10 @@ function scrape(doc, url) {
137
148
newItem . attachments . push ( { url :printurl , title :doc . title , mimeType :"text/html" } ) ;
138
149
}
139
150
140
-
151
+ //Ausgabe/Volume für Print
152
+ if ( ZU . xpathText ( doc , '//div[@class="spAssetHdln"]' ) && newItem . itemType == "magazineArticle" ) {
153
+ newItem . volume = ZU . xpathText ( doc , '//div[@class="spAssetHdln"]' ) . match ( / ( \d + ) \/ \d { 4 } / ) [ 1 ] ;
154
+ }
141
155
142
156
// Summary
143
157
var summary_xPath = ".//p[@id='spIntroTeaser']" ;
@@ -149,6 +163,7 @@ function scrape(doc, url) {
149
163
// Date - sometimes xpath1 doesn't yield anything. Fortunately, there's another possibility...
150
164
var date1_xPath = ".//h5[contains(@id, 'ShortDate')]" ;
151
165
var date2_xPath = "//meta[@name='date']" ;
166
+ var date3_xPath = "//div[@id='spShortDate']"
152
167
if ( doc . evaluate ( date1_xPath , doc , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ) {
153
168
var date = doc . evaluate ( date1_xPath , doc , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . textContent ;
154
169
if ( date . match ( '/' ) ) {
@@ -157,7 +172,10 @@ function scrape(doc, url) {
157
172
} else if ( doc . evaluate ( date2_xPath , doc , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ) {
158
173
var date = doc . evaluate ( date2_xPath , doc , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . content ;
159
174
date = date . replace ( / ( \d \d \d \d ) - ( \d \d ) - ( \d \d ) / , '$3.$2.$1' ) . replace ( / T .+ / , "" ) ;
175
+ } else if ( doc . evaluate ( date3_xPath , doc , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) ) {
176
+ var date = doc . evaluate ( date3_xPath , doc , null , XPathResult . ANY_TYPE , null ) . iterateNext ( ) . textContent ;
160
177
}
178
+
161
179
newItem . date = Zotero . Utilities . trim ( date ) ;
162
180
163
181
if ( doc . location . href . match ( / ^ h t t p \: \/ \/ w w w \. s p i e g e l \. d e \/ s p i e g e l / ) ) {
@@ -257,6 +275,51 @@ var testCases = [
257
275
"type" : "web" ,
258
276
"url" : "http://www.spiegel.de/international/search/index.html?suchbegriff=Crisis" ,
259
277
"items" : "multiple"
278
+ } ,
279
+ {
280
+ "type" : "web" ,
281
+ "url" : "http://www.spiegel.de/spiegel/print/d-84789653.html" ,
282
+ "items" : [
283
+ {
284
+ "itemType" : "magazineArticle" ,
285
+ "creators" : [
286
+ {
287
+ "firstName" : "Alexander" ,
288
+ "lastName" : "Neubacher" ,
289
+ "creatorType" : "author"
290
+ } ,
291
+ {
292
+ "firstName" : "Conny" ,
293
+ "lastName" : "Neumann" ,
294
+ "creatorType" : "author"
295
+ } ,
296
+ {
297
+ "firstName" : "Steffen" ,
298
+ "lastName" : "Winter" ,
299
+ "creatorType" : "author"
300
+ }
301
+ ] ,
302
+ "notes" : [ ] ,
303
+ "tags" : [
304
+ "DER SPIEGEL"
305
+ ] ,
306
+ "seeAlso" : [ ] ,
307
+ "attachments" : [
308
+ {
309
+ "title" : "DER SPIEGEL 15/2012 - VEB Energiewende" ,
310
+ "mimeType" : "application/pdf"
311
+ }
312
+ ] ,
313
+ "url" : "http://www.spiegel.de/spiegel/print/d-84789653.html" ,
314
+ "title" : "VEB Energiewende" ,
315
+ "volume" : "15" ,
316
+ "abstractNote" : "Der Atomausstieg wird zur Subventionsmaschine für Industriebosse, Elektrokonzerne und findige Geschäftemacher. Die Kosten und Risiken sollen andere tragen - die Bürger." ,
317
+ "date" : "07.04.2012" ,
318
+ "publicationTitle" : "Der Spiegel" ,
319
+ "libraryCatalog" : "Spiegel Online" ,
320
+ "accessDate" : "CURRENT_TIMESTAMP"
321
+ }
322
+ ]
260
323
}
261
324
]
262
325
/** END TEST CASES **/
0 commit comments