-
-
Notifications
You must be signed in to change notification settings - Fork 11
/
unpackager.js
432 lines (390 loc) · 15.4 KB
/
unpackager.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
/*!
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
var unpackage = (function() {
'use strict';
/**
* @returns {Promise<JSZip>}
*/
const unzipOrNull = async (binaryData) => {
try {
return await JSZip.loadAsync(binaryData);
} catch (e) {
return null;
}
};
/**
* @param {Blob} blob
* @returns {Promise<string>}
*/
const readAsText = (blob) => new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = () => resolve(reader.result);
reader.onerror = () => reject(new Error('Could not read blob as text'));
reader.readAsText(blob);
});
/**
* @param {string} string
* @param {RegExp} regex
* @returns {string[][]}
*/
const matchAll = (string, regex) => {
const result = [];
let match = null;
while ((match = regex.exec(string)) !== null) {
result.push(match);
}
return result;
};
const getContainingFolder = (name) => {
const parts = name.split('/');
parts.pop();
return parts.join('/');
};
const identifyProjectJSONType = (data) => {
if ('targets' in data) {
return 'sb3';
} else if ('objName' in data) {
return 'sb2';
}
throw new Error('Can not determine project.json type');
};
const decodeBase85WithLengthHeader = (str) => {
const decode_1 = (str) => {
// The initial version of base85
// https://github.com/TurboWarp/packager/blob/9234d057585132d2514a831476abbcf2a7b9b151/src/packager/lib/base85-encode.js
// "0x29 - 0x7d of ASCII with 0x5c (\) replaced with 0x7e (~)"
const getValue = (code) => {
if (code === 0x7e) {
return 0x5c - 0x29;
}
return code - 0x29;
};
const toMultipleOfFour = (n) => {
if (n % 4 === 0) {
return n;
}
return n + (4 - n % 4);
};
const stringToBytes = (str) => new TextEncoder().encode(str);
const lengthEndsAt = str.indexOf(',');
const byteLength = +str.substring(0, lengthEndsAt);
const resultBuffer = new ArrayBuffer(toMultipleOfFour(byteLength));
const resultView = new DataView(resultBuffer);
const stringBytes = stringToBytes(str);
for (let i = lengthEndsAt + 1, j = 0; i < str.length; i += 5, j += 4) {
resultView.setUint32(j, (
getValue(stringBytes[i + 4]) * 85 * 85 * 85 * 85 +
getValue(stringBytes[i + 3]) * 85 * 85 * 85 +
getValue(stringBytes[i + 2]) * 85 * 85 +
getValue(stringBytes[i + 1]) * 85 +
getValue(stringBytes[i])
), true);
}
return new Uint8Array(resultBuffer, 0, byteLength);
};
const decode_2 = (str) => {
// Second version, modified to be HTML safe
// https://github.com/TurboWarp/packager/blob/44638a3f6daf03290c4020c5fd0d022edc1d0229/src/packager/lib/base85-encode.js
// "The character set used is 0x2a - 0x7e of ASCII"
// "0x3c (<) is replaced with 0x28 (opening parenthesis) and 0x3e (>) is replaced with 0x29 (closing parenthesis)"
const getValue = (code) => {
if (code === 0x28) code = 0x3c;
if (code === 0x29) code = 0x3e;
return code - 0x2a;
};
const toMultipleOfFour = (n) => {
if (n % 4 === 0) {
return n;
}
return n + (4 - n % 4);
};
const stringToBytes = (str) => new TextEncoder().encode(str);
const lengthEndsAt = str.indexOf(',');
const byteLength = +str.substring(0, lengthEndsAt);
const resultBuffer = new ArrayBuffer(toMultipleOfFour(byteLength));
const resultView = new DataView(resultBuffer);
const stringBytes = stringToBytes(str);
for (let i = lengthEndsAt + 1, j = 0; i < str.length; i += 5, j += 4) {
resultView.setUint32(j, (
getValue(stringBytes[i + 4]) * 85 * 85 * 85 * 85 +
getValue(stringBytes[i + 3]) * 85 * 85 * 85 +
getValue(stringBytes[i + 2]) * 85 * 85 +
getValue(stringBytes[i + 1]) * 85 +
getValue(stringBytes[i])
), true);
}
return new Uint8Array(resultBuffer, 0, byteLength);
};
const decode_3 = (str) => {
// Third version, length header was is now encoded so people don't misinterpret it
// https://github.com/TurboWarp/packager/blob/61b6905853320332dd44b08f9f7ab03c4b3542b9/src/packager/base85.js
const getValue = (code) => {
if (code === 0x28) code = 0x3c;
if (code === 0x29) code = 0x3e;
return code - 0x2a;
};
const toMultipleOfFour = (n) => {
if (n % 4 === 0) {
return n;
}
return n + (4 - n % 4);
};
const lengthEndsAt = str.indexOf(',');
const byteLength = +str
.substring(0, lengthEndsAt)
.split('')
.map(i => String.fromCharCode(i.charCodeAt(0) - 49))
.join('');
const resultBuffer = new ArrayBuffer(toMultipleOfFour(byteLength));
const resultView = new DataView(resultBuffer);
for (let i = lengthEndsAt + 1, j = 0; i < str.length; i += 5, j += 4) {
resultView.setUint32(j, (
getValue(str.charCodeAt(i + 4)) * 85 * 85 * 85 * 85 +
getValue(str.charCodeAt(i + 3)) * 85 * 85 * 85 +
getValue(str.charCodeAt(i + 2)) * 85 * 85 +
getValue(str.charCodeAt(i + 1)) * 85 +
getValue(str.charCodeAt(i))
), true);
}
return new Uint8Array(resultBuffer, 0, byteLength);
};
const header = str.substring(0, str.indexOf(','));
// Version 1 and 2 use numbers for the length header while version 3 encodes it
if (/^\d+$/.test(header)) {
// Version 2 uses \, version 1 does not
// This is accurate enough for now. Technically someone could encode something with
// version 2 that doesn't include \, but projects are effectively random bytes to
// zip compression so the likelihood of that happening randomly is pretty low.
if (str.includes('\\')) {
return decode_2(str);
}
return decode_1(str);
}
return decode_3(str);
};
const decodeBase85WithoutLengthHeader = (str, length) => {
// Base 85, version 4: Length header is gone
const getBase85DecodeValue = (code) => {
if (code === 0x28) code = 0x3c;
if (code === 0x29) code = 0x3e;
return code - 0x2a;
};
const buffer = new ArrayBuffer(Math.ceil(length / 4) * 4);
const view = new DataView(buffer, 0, Math.floor(str.length / 5 * 4));
for (let i = 0, j = 0; i < str.length; i += 5, j += 4) {
view.setUint32(j, (
getBase85DecodeValue(str.charCodeAt(i + 4)) * 85 * 85 * 85 * 85 +
getBase85DecodeValue(str.charCodeAt(i + 3)) * 85 * 85 * 85 +
getBase85DecodeValue(str.charCodeAt(i + 2)) * 85 * 85 +
getBase85DecodeValue(str.charCodeAt(i + 1)) * 85 +
getBase85DecodeValue(str.charCodeAt(i))
), true);
}
return new Uint8Array(buffer, 0, length);
};
/**
* @param {string} str
* @returns {Uint8Array}
*/
const decodeBase64 = (str) => {
const decoded = atob(str);
const result = new Uint8Array(decoded.length);
for (let i = 0; i < decoded.length; i++) {
result[i] = decoded.charCodeAt(i);
}
return result;
};
/**
* @param {string} uri
*/
const decodeDataURI = (uri) => {
const parts = uri.split(';base64,');
if (parts.length < 2) {
throw new Error('Data URI is not base64');
}
const base64 = parts[1];
return decodeBase64(base64);
};
/**
* Find a file in a JSZip using its name regardless of the folder it's in.
* @param {JSZip} zip
* @param {string} path
* @returns {JSZip.File|null}
*/
const findFileInZip = (zip, path) => {
const f = zip.file(path);
if (f) {
return f;
}
for (const filename of Object.keys(zip.files)) {
if (filename.endsWith(`/${path}`)) {
return zip.file(filename);
}
}
return null;
};
const unpackageBinaryBlob = async (data) => {
const projectZip = await unzipOrNull(data);
if (projectZip) {
// The project is a compressed sb2 or sb3 project.
const projectJSON = findFileInZip(projectZip, 'project.json');
const projectJSONData = JSON.parse(await projectJSON.async('text'));
const type = identifyProjectJSONType(projectJSONData);
return {
type,
data
};
}
// The project is a Scratch 1 project.
return {
type: 'sb',
data
};
};
const zipToArrayBuffer = (zip) => {
for (const file of Object.values(zip.files)) {
file.date = new Date(1662869887000); // date of first unpackager commit :)
}
return zip.generateAsync({
type: 'arraybuffer',
compression: 'DEFLATE'
});
};
const unpackage = async (blob) => {
const packagedZip = await unzipOrNull(blob);
if (packagedZip) {
// Raw sb2, raw sb3, and zip files generated by TurboWarp Packager have a project.json alongside the assets.
const projectJSON = findFileInZip(packagedZip, 'project.json');
if (projectJSON) {
const innerFolderPath = getContainingFolder(projectJSON.name);
const innerZip = packagedZip.folder(innerFolderPath);
let sb3Assets = 0;
let sb2Assets = 0;
// Remove extra files that aren't part of the project but are in the same folder
// This removes extra files from HTMLifier zips of Scratch 3 projects
for (const path of Object.keys(innerZip.files)) {
if (path === 'project.json') {
// keep
} else if (/^[a-f0-9]{32}\.[a-z0-9]{3}$/i.test(path)) {
// sb3 asset; keep
sb3Assets++;
} else if (/^[0-9]+\.[a-z0-9]{3}$/i.test(path)) {
// sb2 asset; keep
sb2Assets++;
} else {
innerZip.remove(path);
}
}
// Guess project time based on assets because we can't reliably parse the JSON without
// importing @turbowarp/json. Only count as sb2 if we saw an sb2-style asset name and
// and nothing that looks like an sb3. sb3 is much more common so if we're unsure,
// we'll lean towards that if there's any ambiguity.
const type = sb2Assets > 0 && sb3Assets === 0 ? 'sb2' : 'sb3';
return {
type,
data: await zipToArrayBuffer(innerZip)
};
}
const projectBinary = (
// Zip files generated by the TurboWarp Packager, the legacy TurboWarp Packager, or the forkphorus packager
// can have a "project.zip" file
findFileInZip(packagedZip, 'project.zip') ||
// Zip files generated by HTMLifier for Scratch 1 projects have a "project" file
findFileInZip(packagedZip, 'project')
);
if (projectBinary) {
const projectData = await projectBinary.async('arraybuffer');
return unpackageBinaryBlob(projectData);
}
throw new Error('Input was a zip but we could not find a project.')
}
const text = await readAsText(blob);
// HTML files generated by some versions of the TurboWarp Packager use base85 in several inline script tags
// that decode progressively (For simplicity we still just concatenate)
// https://github.com/TurboWarp/packager/pull/861
let base85Matches = matchAll(text, /<script data="([^"]+)">decodeChunk\((\d+)\)<\/script>/g);
if (base85Matches.length) {
const base85 = base85Matches.map(i => i[1]).join('');
const length = base85Matches.map(i => +i[2]).reduce((a, b) => a + b, 0);
return unpackageBinaryBlob(decodeBase85WithoutLengthHeader(base85, length));
}
// HTML files generated by some versions of the TurboWarp Packager use base85 in several inline script tags
// that get concatenated at the end.
base85Matches = matchAll(text, /<script type="p4-project">([^<]+)<\/script>/g);
if (base85Matches.length) {
const base85 = base85Matches.map(i => i[1]).join('');
return unpackageBinaryBlob(decodeBase85WithLengthHeader(base85));
}
// HTML files generated by some versions of the TurboWarp Packager use base85 in one big script tag
let base85Match = (
// https://github.com/TurboWarp/packager/commit/45838ee9ced603058b774587b01808c2fae991ec
text.match(/const result = base85decode\("(.+)"\);/) ||
// https://github.com/TurboWarp/packager/commit/44638a3f6daf03290c4020c5fd0d022edc1d0229
text.match(/<script id="p4-encoded-project-data" type="p4-encoded-project-data">([^<]+)<\/script>/)
);
if (base85Match) {
const base85 = base85Match[1];
return unpackageBinaryBlob(decodeBase85WithLengthHeader(base85));
}
const dataURIMatch = (
// HTML files generated by old version of the TurboWarp Packager use inline base64
// https://github.com/TurboWarp/packager/blob/33b7b8c43986485a97e6885a2bb004d6fcc20b08/src/packager/packager.js#L362-L368
text.match(/const getProjectData = \(\) => fetch\("([a-zA-Z0-9+/=\-:;,]+)"\)/) ||
// HTML files generated by the forkphorus packager use an inline base64 URL
text.match(/var project = '([a-zA-Z0-9+/=\-:;,]+)';/) ||
// HTML files generated by the legacy TurboWarp Packager use an inline base64 URL
text.match(/window\.__PACKAGER__ = {\n projectData: "([a-zA-Z0-9+/=\-:;,]+)"/)
);
if (dataURIMatch) {
const dataURI = dataURIMatch[1];
return unpackageBinaryBlob(decodeDataURI(dataURI));
}
// HTML files generated by some versions of HTMLifier store the project as fields in initOptions
let htmlifierOptions = text.match(/<script>\nconst GENERATED = \d+\nconst initOptions = ({[\s\S]+})\ninit\(initOptions\)\n<\/script>/m);
if (htmlifierOptions) {
const htmlifierAssets = JSON.parse(htmlifierOptions[1]).assets;
const compressedProjectData = htmlifierAssets.file;
if (compressedProjectData) {
// The project is a Scratch 1 project
const decodedProjectData = decodeDataURI(compressedProjectData);
return {
type: 'sb',
data: decodedProjectData
};
}
// The project is a Scratch 3 project with assets listed individually in the JSON options
// or the project was a Scratch 2 project which HTMLifier converts to Scratch 3
const newZip = new JSZip();
for (const name of Object.keys(htmlifierAssets)) {
const nameInZip = name === 'project' ? 'project.json' : name;
const dataURI = htmlifierAssets[name];
newZip.file(nameInZip, decodeDataURI(dataURI));
}
return {
type: 'sb3',
data: await zipToArrayBuffer(newZip)
};
}
// HTML files generated by older versions of HTMLifier with TYPE === "json" have PROJECT_JSON and ASSETS constants
// https://github.com/SheepTester/htmlifier/blob/b80b860f64bf7c4a908f3c9e74bac6285b7a1687/hacky-file-getter.js#L166
htmlifierOptions = text.match(/var TYPE = 'json',\nPROJECT_JSON = "([^"]*)",\nASSETS = ({[^}]*}),/m);
if (htmlifierOptions) {
const projectJSON = decodeDataURI(htmlifierOptions[1]);
const assetsJSON = JSON.parse(htmlifierOptions[2]);
const newZip = new JSZip();
newZip.file('project.json', projectJSON);
for (const name of Object.keys(assetsJSON)) {
newZip.file(name, decodeDataURI(assetsJSON[name]));
}
return {
type: 'sb3',
data: await zipToArrayBuffer(newZip)
};
}
throw new Error('Input was not a zip and we could not find project.');
};
return unpackage;
}());