forked from zotero/translators
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathTV by the Numbers.js
139 lines (117 loc) · 4.06 KB
/
TV by the Numbers.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
{
"translatorID": "180a62bf-efdd-4d38-8d85-8971af04dd85",
"label": "TV by the Numbers",
"creator": "odie5533",
"target": "^https?://tvbythenumbers\\.com",
"minVersion": "1.0",
"maxVersion": "",
"priority": 100,
"inRepository": true,
"translatorType": 4,
"browserSupport": "g",
"lastUpdated": "2014-04-04 10:18:02"
}
/*
TV by the Numbers - translator for Zotero
Copyright (C) 2010 odie5533
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
This translator supports saving a snapshot of a single post and saving
the citation of many posts at once without visiting each post. Thus, it does
not save a snapshot when multiple citations are to be saved.
*/
PUB_TITLE = "TV by the Numbers";
XPATH_TITLE = "//title";
XPATH_PAGES = null;
XPATH_DATE = "substring-after(substring-before(string(//p[@class='posted_on']),' by '), 'on ')";
RE_DATE = /(.*)/;
XPATH_AUTHORS = "substring-after(string(//p[@class='posted_on']),' by ')";
RE_AUTHORS = /(.*)/;
function detectWeb(doc, url) {
/* site has lots of garbage, check we're on the right doc */
if (!xpath_string(doc, doc, XPATH_TITLE))
return;
var posts = doc.evaluate("count(//div[@class='post-alt blog'])", doc, null,
XPathResult.NUMBER_TYPE, null).numberValue;
if (posts == 1)
return "webpage";
else if (posts > 1)
return "multiple";
}
function xpath_string(doc, node, xpath) {
var res = doc.evaluate(xpath, node, null, XPathResult.STRING_TYPE, null);
if (!res || !res.stringValue)
return null;
return Zotero.Utilities.trim(res.stringValue);
}
function xpre(doc, node, xpath, reg) {
var xpmatch = xpath_string(doc, node, xpath);
return reg.exec(xpmatch)[1];
}
function scrape(doc, url) {
var items = new Array();
var posts = doc.evaluate("//div[@class='post-alt blog']", doc, null,
XPathResult.ANY_TYPE, null);
var post_count = 0;
while (post = posts.iterateNext()) {
var newItem = new Zotero.Item("webpage");
newItem.publicationTitle = PUB_TITLE;
var link = post.getElementsByTagName("a")[0];
newItem.url = link.href;
var title = Zotero.Utilities.unescapeHTML(
Zotero.Utilities.cleanTags(link.textContent));
title = title.replace(/(\s+)(?:‘|’)|(?:‘|’)(\s+)/g, "$1''$2").replace(/‘|’/g, "'");
newItem.title = title;
if (XPATH_DATE)
newItem.date = xpre(doc, post, XPATH_DATE, RE_DATE);
if (XPATH_PAGES)
newItem.pages = xpath_string(doc, post, XPATH_PAGES);
//authors
var author_text = xpre(doc, post, XPATH_AUTHORS, RE_AUTHORS);
var authors = [];
if (author_text) {
if (author_text.indexOf(" and ") != -1)
authors = author_text.split(" and ");
else if (author_text.indexOf(";") != -1)
authors = author_text.split(";");
else
authors.push(author_text);
}
for each(var a in authors)
if (a != 'null')
newItem.creators.push(
Zotero.Utilities.cleanAuthor(a, "author"));
// attach html
if (url == newItem.url)
newItem.attachments.push({title:PUB_TITLE+" Snapshot",
mimeType:"text/html", url:doc.location.href, snapshot:true});
newItem.toString = function() { return this.title; };
items[newItem.url] = newItem;
post_count++;
}
/* a stupidly complex way of calling selectItems, and then completing
the items which were selected */
if (post_count > 1) {
var sel_items = new Object();
for each(var i in items)
sel_items[i.url] = i.title;
sel_items = Zotero.selectItems(sel_items);
for (var i in sel_items)
items[i].complete();
} else if (post_count == 1)
for each(var i in items)
i.complete();
}
function doWeb(doc, url) {
scrape(doc, url);
}