9
9
"inRepository" : true ,
10
10
"translatorType" : 4 ,
11
11
"browserSupport" : "gcs" ,
12
- "lastUpdated" : "2013-08-25 02:58:03 "
12
+ "lastUpdated" : "2013-08-25 04:10:34 "
13
13
}
14
14
15
15
/*
@@ -43,7 +43,21 @@ function getRefworksByID(ids, next) {
43
43
ZU . doPost (
44
44
'http://epub.cnki.net/KNS/ViewPage/SaveSelectedNoteFormat.aspx?type=txt' ,
45
45
'CurSaveModeType=REFWORKS' ,
46
- next
46
+ function ( text ) {
47
+ //fix item types
48
+ text = text . replace ( / ^ R T \s + D i s s e r t a t i o n \/ T h e s i s / gmi, 'RT Dissertation' )
49
+ //Zotero doesn't do well with mixed line endings. Make everything \n
50
+ . replace ( / \r \n ? / g, '\n' )
51
+ //split authors
52
+ . replace ( / ^ ( A [ 1 - 4 ] | U 2 ) \s * ( [ ^ \r \n ] + ) / gm, function ( m , tag , authors ) {
53
+ var authors = authors . split ( / \s * [ ; , , ] \s * / ) ; //that's a special comma
54
+ if ( ! authors [ authors . length - 1 ] . trim ( ) ) authors . pop ( ) ;
55
+
56
+ return tag + ' ' + authors . join ( '\n' + tag + ' ' ) ;
57
+ } ) ;
58
+
59
+ next ( text ) ;
60
+ }
47
61
) ;
48
62
} ) ;
49
63
}
@@ -58,10 +72,16 @@ function getIDFromURL(url) {
58
72
return { dbname : dbname [ 1 ] , filename : filename [ 1 ] , url : url } ;
59
73
}
60
74
75
+ function getIDFromPage ( doc , url ) {
76
+ return getIDFromURL ( url )
77
+ || getIDFromURL ( ZU . xpathText ( doc , '//div[@class="zwjdown"]/a/@href' ) ) ;
78
+ }
79
+
61
80
function getTypeFromDBName ( dbname ) {
62
81
switch ( dbname . substr ( 0 , 4 ) . toUpperCase ( ) ) {
63
82
case "CJFQ" :
64
83
case "CJFD" :
84
+ case "CAPJ" :
65
85
return "journalArticle" ;
66
86
case "CDFD" :
67
87
case "CMFD" :
@@ -76,27 +96,40 @@ function getTypeFromDBName(dbname) {
76
96
}
77
97
}
78
98
79
- function getItemsFromSearchResults ( doc , url ) {
80
- var links = doc . getElementsByClassName ( 'fz14' ) ;
99
+ function getItemsFromSearchResults ( doc , url , itemInfo ) {
100
+ var links = ZU . xpath ( doc , '//tr[not(.//tr) and .//a[@class="fz14"]]' ) ;
101
+ var aXpath = './/a[@class="fz14"]' ;
102
+ if ( ! links . length ) {
103
+ links = ZU . xpath ( doc , '//table[@class="GridTableContent"]/tbody/tr[./td[2]/a]' ) ;
104
+ aXpath = './td[2]/a' ;
105
+ }
81
106
if ( ! links . length ) return ;
82
107
83
108
var items = { } ;
84
109
var count = 0 ;
85
110
for ( var i = 0 , n = links . length ; i < n ; i ++ ) {
86
- var title = ZU . xpathText ( links [ i ] , './node()[not(name()="SCRIPT")]' , null , '' ) ;
111
+ var a = ZU . xpath ( links [ i ] , aXpath ) [ 0 ] ;
112
+ var title = ZU . xpathText ( a , './node()[not(name()="SCRIPT")]' , null , '' ) ;
87
113
if ( title ) title = ZU . trimInternal ( title ) ;
88
- var id = getIDFromURL ( links [ i ] . href ) ;
114
+ var id = getIDFromURL ( a . href ) ;
89
115
if ( ! title || ! id ) continue ;
90
116
91
117
count ++ ;
92
- items [ links [ i ] . href ] = title ;
118
+ if ( itemInfo ) {
119
+ itemInfo [ a . href ] = { id : id } ;
120
+
121
+ /*var pdfLink = ZU.xpath(links[i], './/a[@class="brief_downloadIcon"]')[0];
122
+ if(pdfLink) itemInfo[a.href].pdfURL = pdfLink.href;*/
123
+ }
124
+ items [ a . href ] = title ;
93
125
}
94
126
95
127
if ( count ) return items ;
96
128
}
97
129
98
130
function detectWeb ( doc , url ) {
99
- var id = getIDFromURL ( url ) ;
131
+ var id = getIDFromPage ( doc , url ) ;
132
+ Z . debug ( id ) ;
100
133
if ( id ) {
101
134
return getTypeFromDBName ( id . dbname ) ;
102
135
}
@@ -107,41 +140,81 @@ function detectWeb(doc, url) {
107
140
108
141
function doWeb ( doc , url ) {
109
142
if ( detectWeb ( doc , url ) == "multiple" ) {
110
- var items = getItemsFromSearchResults ( doc , url ) ;
143
+ var itemInfo = { } ;
144
+ var items = getItemsFromSearchResults ( doc , url , itemInfo ) ;
111
145
Z . selectItems ( items , function ( selectedItems ) {
112
146
if ( ! selectedItems ) return true ;
113
147
148
+ var itemInfoByTitle = { } ;
114
149
var ids = [ ] ;
115
150
for ( var url in selectedItems ) {
116
- ids . push ( getIDFromURL ( url ) ) ;
151
+ ids . push ( itemInfo [ url ] . id ) ;
152
+ itemInfoByTitle [ selectedItems [ url ] ] = itemInfo [ url ] ;
153
+ itemInfoByTitle [ selectedItems [ url ] ] . url = url ;
117
154
}
118
- scrape ( ids ) ;
155
+ scrape ( ids , doc , url , itemInfoByTitle ) ;
119
156
} ) ;
120
157
} else {
121
- scrape ( [ getIDFromURL ( url ) ] ) ;
158
+ scrape ( [ getIDFromPage ( doc , url ) ] , doc , url ) ;
122
159
}
123
160
}
124
161
125
- function scrape ( ids ) {
162
+ function scrape ( ids , doc , url , itemInfo ) {
126
163
getRefworksByID ( ids , function ( text ) {
127
- Z . debug ( text ) ;
128
- //fix item types
129
- text = text . replace ( / R T \s + D i s s e r t a t i o n \/ T h e s i s / mi, 'RT Dissertation' )
130
- //split authors
131
- . replace ( / ^ ( A [ 1 - 4 ] | U 2 ) \s * ( [ ^ \r \n ] + ) / m, function ( m , tag , authors ) {
132
- var authors = authors . split ( ';' ) ;
133
- if ( ! authors [ authors . length - 1 ] . trim ( ) ) authors . pop ( ) ;
134
-
135
- return tag + ' ' + authors . join ( '\n' + tag + ' ' ) ;
136
- } )
137
-
164
+ Z . debug ( text ) ;
138
165
var translator = Z . loadTranslator ( 'import' ) ;
139
166
translator . setTranslator ( '1a3506da-a303-4b0a-a1cd-f216e6138d86' ) ; //Refworks
140
167
translator . setString ( text ) ;
141
168
142
169
var i = 0 ;
143
170
translator . setHandler ( 'itemDone' , function ( obj , newItem ) {
144
- newItem . url = ids [ i ] . url ;
171
+ //split names
172
+ for ( var i = 0 , n = newItem . creators . length ; i < n ; i ++ ) {
173
+ var creator = newItem . creators [ i ] ;
174
+ if ( creator . firstName ) continue ;
175
+
176
+ var lastSpace = creator . lastName . lastIndexOf ( ' ' ) ;
177
+ if ( creator . lastName . search ( / [ A - Z a - z ] / ) !== - 1 && lastSpace !== - 1 ) {
178
+ //western name. split on last space
179
+ creator . firstName = creator . lastName . substr ( 0 , lastSpace ) ;
180
+ creator . lastName = creator . lastName . substr ( lastSpace + 1 ) ;
181
+ } else {
182
+ //Chinese name. first character is last name, the rest are first name
183
+ creator . firstName = creator . lastName . substr ( 1 ) ;
184
+ creator . lastName = creator . lastName . charAt ( 0 ) ;
185
+ }
186
+ }
187
+
188
+ if ( newItem . abstractNote ) {
189
+ newItem . abstractNote = newItem . abstractNote . replace ( / \s * [ \r \n ] \s * / g, '\n' ) ;
190
+ }
191
+
192
+ //clean up tags. Remove numbers from end
193
+ for ( var i = 0 , n = newItem . tags . length ; i < n ; i ++ ) {
194
+ newItem . tags [ i ] = newItem . tags [ i ] . replace ( / : \d + $ / , '' ) ;
195
+ }
196
+
197
+ newItem . title = ZU . trimInternal ( newItem . title ) ;
198
+ if ( itemInfo ) {
199
+ var info = itemInfo [ newItem . title ] ;
200
+ if ( ! info ) {
201
+ Z . debug ( 'No item info for "' + newItem . title + '"' ) ;
202
+ } else {
203
+ /*if(!info.pdfURL) {
204
+ Z.debug('No PDF URL passed from multiples page');
205
+ } else {
206
+ newItem.attachments.push({
207
+ title: 'Full Text PDF',
208
+ mimeType: 'application/pdf',
209
+ url: info.pdfURL
210
+ })
211
+ }*/
212
+
213
+ newItem . url = info . url ;
214
+ }
215
+ } else {
216
+ newItem . url = url ;
217
+ }
145
218
146
219
i ++ ;
147
220
newItem . complete ( ) ;
0 commit comments