-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathindex.js
162 lines (143 loc) · 4.32 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
'use strict';
// See index.d.ts for docs
const fetch = require('node-fetch');
const UserAgent = require('user-agents');
exports.snapshot = async function snapshot(args) {
const {
url,
// Without a user agent, force archiving won't work
userAgent = new UserAgent().toString(),
renew = false,
complete = true,
} = args;
// Mutated
let { archiveDomain = 'https://archive.today' } = args;
/** Internal */
let { submitid } = args;
const { referer } = args;
const headers = {};
if (userAgent) headers['user-agent'] = userAgent;
if (referer) headers.referer = referer;
// Get an initial submitid from the index page, as well as follow the redirect for the desired mirror to be used.
// This step is not performed when renewing.
const passedSubmitid = submitid;
if (!submitid) {
const indexPage = await fetch(archiveDomain, { headers });
const body = await indexPage.text();
archiveDomain = indexPage.url;
submitid = extractSubmitid(body);
}
const body =
(passedSubmitid ? 'anyway=1&' : '') +
'url=' +
encodeURIComponent(url) +
'&submitid=' +
encodeURIComponent(submitid);
let response = await fetch(
`${archiveDomain}${archiveDomain.endsWith('/') ? '' : '/'}submit/`,
{
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
...headers,
},
body,
redirect: 'manual',
}
);
let link = '';
let wip = false;
let cachedDate = false;
if (response.status === 200) {
const responseText = await response.text();
if (responseText === '<h1>Invalid URL</h1>') {
throw new TypeError('Invalid URL sent to archive.today: ' + url);
}
link = response.headers.get('refresh').split('url=')[1];
if (link.includes('/wip/')) {
if (complete) {
while (true) {
const res = await fetch(link, {
headers,
redirect: 'manual',
});
const redirect = res.headers.get('location');
if (redirect) {
link = redirect;
break;
}
await new Promise((resolve) => setTimeout(resolve, 1000));
}
} else {
wip = link;
link = link.replace('/wip/', '/');
}
}
} else if (response.status === 302 || response.status === 307) {
// Found
link = response.headers.get('location');
response = await fetch(link, {
method: 'POST',
headers: {
'Content-Type': 'application/x-www-form-urlencoded',
...headers,
},
body,
redirect: 'manual',
});
cachedDate = new Date(response.headers.get('memento-datetime'));
// Already archived, re-submit with submitid and referer from the page (this is required)
if (!passedSubmitid) {
const renewing =
typeof renew === 'function' ? await renew(cachedDate) : Boolean(renew);
if (renewing) {
const renewSubmitid = extractSubmitid(await response.text());
return snapshot({
...args,
userAgent, // Re-use the same user agent (if it was generated)
archiveDomain,
submitid: renewSubmitid,
referer: link,
});
}
}
} else {
throw new Error('Unknown response code: ' + response.status);
}
// _ and __ are the // in the protocol
const [_, __, domain, id] = link.split('/');
return { id, url: link, domain, image: `${link}/scr.png`, cachedDate, wip };
};
function extractSubmitid(html) {
return html.match(/<input type="hidden" name="submitid" value="(.*?)"\/>/)[1];
}
exports.timemap = async function timemap({
url,
userAgent = new UserAgent().toString(),
archiveDomain = 'https://archive.today',
}) {
const rawMap = await fetch(`${archiveDomain}/timemap/${url}`, {
headers: userAgent
? {
'User-Agent': userAgent,
}
: {},
}).then((r) => r.text());
const mementoMap = rawMap.split(',\n');
const mementos = [];
// No mementos (never been archived)
if (!mementoMap[0].includes('rel="original"')) {
return mementos;
}
for (const line of mementoMap) {
const [url, rel, datetime] = line.split('; ');
if (rel.includes('memento')) {
// Remove < and >
mementos.push({
url,
date: new Date(datetime.slice(datetime.indexOf('"'), datetime.lastIndexOf('"'))),
});
}
}
return mementos;
};