-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcli.js
282 lines (270 loc) · 8.87 KB
/
cli.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
#!/usr/bin/env node
import { join } from 'path';
import yargs from 'yargs';
import Listr from 'listr';
import open from 'open';
import { appendFileSync } from 'fs';
import enquirer from 'enquirer';
import screenshot from './src/screenshot.js';
import archivers, {
isArchiveOrgUrl,
isArchiveTodayUrl,
} from './src/archive/archivers.js';
import resolveStylesheet from './src/stylesheet.js';
import addExifMetadata from './src/exif.js';
import launchBrowser from './src/browser.js';
import { VIEWPORT_WIDTH } from './src/util.js';
const log = console.log;
// @ts-ignore
const { argv: yargsArgv } = yargs(process.argv.slice(2)).options({
print: {
type: 'boolean',
describe: "Whether to use the page's print stylesheet",
default: false,
},
width: {
type: 'string',
describe:
'Screenshot viewport width (e.g. 1920) or one of: mini (492), mobile (576), tablet (768), notebook (1200), laptop (1400, default), desktop (1920)',
},
screenshot: {
type: 'string',
choice: ['fullpage', 'stitched', 'none'],
describe:
"Screenshot method to use.\nfullpage (default): Take a screenshot of the page in one go. Does not work with very tall pages. Unresponsive pages with a horizontal scrollbar will override the viewport's width for the screenshot.\nstitched: Stitch together screenshots of the page by scrolling down the height of the viewport. Certain sticky elements may cause issues, especially when scripting is enabled, which must be fixed manually with a stylesheet.\nnone: Do not take a screenshot.",
default: 'fullpage',
},
screenshotQuality: {
type: 'number',
default: 90,
},
aoUrl: {
type: 'string',
describe:
'Pre-defined archive.org URL, useful when selecting a historical snapshot. "auto" (default) attempts to archive the URL. You may be prompted if the link is invalid to select a historic snapshot. "none" skips archive.org archiving.',
default: 'auto',
},
atUrl: {
type: 'string',
describe:
'Pre-defined archive.today URL, useful when selecting a historical snapshot. "auto" (default) attempts to archive the URL. "none" skips archive.today archiving.',
default: 'auto',
},
stylesheet: {
type: 'string',
describe:
'File containing the stylesheet to be used for the screenshot process. Overrides --stylesheets-dir. @import rules are supported.',
},
stylesheetsDir: {
type: 'string',
describe:
'Directory containing stylesheets (files named origin.css, e.g. www.example.com.css) for the screenshot process. @import rules are supported',
default: join(process.cwd(), 'stylesheets'),
},
filters: {
type: 'string',
describe:
'File containing a list of Adblock filters to apply. Almost all filters (cosmetic and network) are supported. Defaults to <stylesheet-dir/filters.txt>',
},
shorturl: {
type: 'string',
describe:
'5-30 characters that will be used as v.gd shorturl of the archive.org link, or "none" to disable',
},
exifComment: {
type: 'string',
describe: 'Custom text to add at the end of the EXIF description',
},
exifKeywords: {
type: 'string',
describe: 'List of keywords to add to the EXIF data, separated by commas (no spaces)',
},
renew: {
type: 'string',
choices: ['auto', 'manual', 'no'],
describe:
'"no" to always use the latest existing snapshot when possible. "manual" to manually determine whether to rearchive the link. "auto" (default) automatically determines whether the link is outdated. "never" to never renew',
default: 'auto',
},
referrer: {
type: 'string',
describe:
'Referrer site to use when visiting the site when taking a screenshot. Useful for paywalls. Presets: g: https://google.com, ddg: https://duckduckgo.com',
},
outputDir: { type: 'string', default: process.cwd() },
noscript: {
type: 'boolean',
describe:
'If passed, JavaScript will be disabled when taking a screenshot. Useful especially for paywall websites and obnoxious popups.',
default: false,
},
imageLoadTimeout: {
type: 'number',
describe:
'Timeout in milliseconds for images to load. In noscript mode, this amount of time is always elapsed to let images load.',
default: 15000,
},
debug: {
type: 'string',
choices: ['all', 'screenshot'],
describe:
'screenshot: Debug the screenshotting process without saving files or archiving a URL.',
},
open: {
type: 'boolean',
describe:
'If the created screenshot should be opened using your preferred image viewer.',
default: true,
},
url: { type: 'string', describe: 'URL to archive' },
});
/**
* @type {import('./src/types').ArchhiveOptions}
*/
// @ts-ignore
const opts = yargsArgv;
async function main() {
const originalArgv = { ...opts };
if (!opts.url) {
// @ts-ignore
const extraArgs = opts._.join(' ');
if (extraArgs) {
opts.url = extraArgs;
} else {
opts.url = /** @type {any} */ (await enquirer.prompt({
type: 'input',
message: 'URL:',
name: 'url',
})).url;
if (!opts.width) {
opts.width = /** @type {any} */ (await enquirer.prompt({
type: 'select',
message: 'Viewport:',
name: 'width',
choices: Object.keys(VIEWPORT_WIDTH),
})).width;
}
}
}
if (!opts.width) opts.width = 'laptop';
if (opts.debug === 'screenshot') {
if (opts.aoUrl === 'auto') {
opts.aoUrl = 'archive.org/debug';
opts.shorturl = 'none';
}
if (opts.atUrl === 'auto') opts.atUrl = 'archive.today/debug';
}
try {
const urlObject = new URL(opts.url);
// Ensure entities are encoded
opts.url = urlObject.toString();
const isAoUrl = isArchiveOrgUrl(urlObject);
const isAtUrl = isArchiveTodayUrl(urlObject);
// Automatically set the appropriate field if detected so we don't submit an already submitted URL
if (isAoUrl) {
opts.aoUrl = opts.url;
} else if (isAtUrl) {
opts.atUrl = opts.url;
// archive.today pages don't have any JS so it's faster to have scripting enabled so we can do proper image loading detection
opts.noscript = false;
}
} catch (e) {
throw new Error(`Invalid URL specified: ${opts.url}`);
}
const { cssFilename, stylesheet } = await resolveStylesheet(opts);
if (opts.debug) {
log(
stylesheet
? `Using stylesheet: ${cssFilename}`
: `Could not find stylesheet: ${cssFilename}`
);
}
/** @type {Listr<import('./src/types').TaskContext>}> */
const tasks = new Listr(
[
{
title: 'Start browser',
task: launchBrowser,
},
{
title: 'Archiving URL',
task(ctx, task) {
const archivingTasks = [];
for (const site in archivers) {
function retryableTask(...args) {
return archivers[site](...args)
.then((res) => {
ctx.urls = { ...ctx.urls, ...res };
})
.catch(async (e) => {
log(e);
const retry = /** @type {any} */ (await enquirer.prompt({
type: 'confirm',
message: `${site} failed to archive ${opts.url}. Retry?`,
name: 'retry',
initial: true,
})).retry;
if (retry) {
return retryableTask(...args);
}
throw e;
});
}
archivingTasks.push({
title: site,
task: retryableTask,
});
}
return new Listr(archivingTasks, { concurrent: true, exitOnError: true });
},
},
{
title: 'Screenshot',
task: screenshot,
},
{
title: 'EXIF Metadata',
skip() {
if (opts.debug === 'screenshot') {
return 'Debugging screenshot';
}
},
task: addExifMetadata,
},
],
{ exitOnError: true }
);
// @ts-ignore Partial context
const ctx = await tasks.run({
prompt: enquirer.prompt,
log,
opts,
stylesheet,
urls: { url: opts.url },
});
await ctx.browser.close();
log(`File: ${ctx.filename}`);
log(
`archive.org: ${ctx.urls.archiveOrgUrl}${
ctx.urls.archiveOrgShortUrl ? ` (${ctx.urls.archiveOrgShortUrl})` : ''
}`
);
log(`archive.today: ${ctx.urls.archiveTodayUrl}`);
if (opts.debug !== 'screenshot') {
if (opts.open) {
await open(`file://${ctx.filename}`);
}
const launchArgv = process.argv.slice(2);
// Add --width and --url if they are specified via the CLI
if (!originalArgv.width) launchArgv.push('--width', opts.width);
if (!originalArgv.url) launchArgv.push(`"${opts.url}"`);
appendFileSync(
join(opts.outputDir, '.archhive_history'),
`${launchArgv.join(' ')} # ${new Date()}\n`
);
}
}
main().catch((e) => {
log(e);
process.exit(1);
});