From 55441e4bc9d2cf5b4d6fe5e74ded713f63307abc Mon Sep 17 00:00:00 2001 From: yifancong Date: Mon, 15 Jul 2024 16:05:12 +0800 Subject: [PATCH] refactor: the stream logic of reading the json data. (#419) --- .changeset/long-seahorses-leave.md | 6 ++ packages/sdk/src/sdk/sdk/core.ts | 55 +++++++++------- packages/sdk/src/sdk/utils/upload.ts | 2 +- packages/utils/src/build/file/sharding.ts | 8 +-- packages/utils/src/build/json.ts | 65 ++++++++++++++----- .../tests/__snapshots__/json.test.ts.snap | 37 +++++++++++ packages/utils/tests/json.test.ts | 16 ++--- pnpm-lock.yaml | 3 +- 8 files changed, 140 insertions(+), 52 deletions(-) create mode 100644 .changeset/long-seahorses-leave.md create mode 100644 packages/utils/tests/__snapshots__/json.test.ts.snap diff --git a/.changeset/long-seahorses-leave.md b/.changeset/long-seahorses-leave.md new file mode 100644 index 00000000..554e88b6 --- /dev/null +++ b/.changeset/long-seahorses-leave.md @@ -0,0 +1,6 @@ +--- +'@rsdoctor/utils': patch +'@rsdoctor/sdk': patch +--- + +fix(sdk): the error of Buffer string limit diff --git a/packages/sdk/src/sdk/sdk/core.ts b/packages/sdk/src/sdk/sdk/core.ts index 59f39fc0..7920b71c 100644 --- a/packages/sdk/src/sdk/sdk/core.ts +++ b/packages/sdk/src/sdk/sdk/core.ts @@ -125,28 +125,38 @@ export abstract class SDKCore this.diskManifestPath = manifest; await File.fse.ensureDir(outputDir); - /** write sharding files and get disk result */ - const dataUrls: DataWithUrl[] = await Promise.all( - Object.keys(storeData).map(async (key) => { - const data = storeData[key]; - // not use filesharding when the data is not object. - if (typeof data !== 'object') { - return { - name: key, - files: data, - }; + const urlsPromiseList: (Promise | DataWithUrl)[] = []; + + for (let key of Object.keys(storeData)) { + const data = storeData[key]; + // not use filesharding when the data is not object. + if (typeof data !== 'object') { + urlsPromiseList.push({ + name: key, + files: data, + }); + } + const jsonstr: string | string[] = await (async () => { + try { + return JSON.stringify(data); + } catch (error) { + // use the stream json stringify when call JSON.stringify failed due to the json is too large. + return Json.stringify(data); } - const jsonstr: string = await (async () => { - try { - return JSON.stringify(data); - } catch (error) { - // use the stream json stringify when call JSON.stringify failed due to the json is too large. - return Json.stringify(data); - } - })(); - return this.writeToFolder(jsonstr, outputDir, key); - }), - ); + })(); + + if (Array.isArray(jsonstr)) { + const urls = jsonstr.map((str, index) => { + return this.writeToFolder(str, outputDir, key, index + 1); + }); + urlsPromiseList.push(...urls); + } else { + urlsPromiseList.push(this.writeToFolder(jsonstr, outputDir, key)); + } + } + + /** write sharding files and get disk result */ + const dataUrls: DataWithUrl[] = await Promise.all(urlsPromiseList); debug( () => @@ -194,10 +204,11 @@ export abstract class SDKCore jsonstr: string, dir: string, key: string, + index?: number, ): Promise { const sharding = new File.FileSharding(Algorithm.compressText(jsonstr)); const folder = path.resolve(dir, key); - const writer = sharding.writeStringToFolder(folder); + const writer = sharding.writeStringToFolder(folder, '', index); return writer.then((item) => { const res: DataWithUrl = { name: key, diff --git a/packages/sdk/src/sdk/utils/upload.ts b/packages/sdk/src/sdk/utils/upload.ts index d6f808c7..f849ede1 100644 --- a/packages/sdk/src/sdk/utils/upload.ts +++ b/packages/sdk/src/sdk/utils/upload.ts @@ -5,7 +5,7 @@ export const transformDataUrls = ( ): Record => { return d.reduce((t: { [key: string]: string[] | string }, item) => { t[item.name] = Array.isArray(item.files) - ? item.files.map((e) => e.path) + ? item.files.map((e) => e.path).concat(t[item.name] || []) : item.files; return t; }, {}); diff --git a/packages/utils/src/build/file/sharding.ts b/packages/utils/src/build/file/sharding.ts index 15a2b827..1d6e6400 100644 --- a/packages/utils/src/build/file/sharding.ts +++ b/packages/utils/src/build/file/sharding.ts @@ -12,7 +12,7 @@ export class FileSharding { /** * @param ext the extension name of the output file (must starts with ".") */ - public createVirtualShardingFiles(ext = '') { + public createVirtualShardingFiles(ext = '', index = 0) { const bf = Buffer.from(this.content, this.encoding); const res: Buffer[] = []; const threshold = this.limitBytes; @@ -23,17 +23,17 @@ export class FileSharding { tmpBytes += threshold; } - return res.map((e, i) => ({ filename: `${i}${ext}`, content: e })); + return res.map((e, i) => ({ filename: `${i + index}${ext}`, content: e })); } /** * @param folder absolute path of folder which used to save string sharding files. * @param ext the extension name of the output file (must starts with ".") */ - public async writeStringToFolder(folder: string, ext = '') { + public async writeStringToFolder(folder: string, ext = '', index?: number) { const dist = path.resolve(folder); await fse.ensureDir(dist); - const res = this.createVirtualShardingFiles(ext); + const res = this.createVirtualShardingFiles(ext, index); await Promise.all( res.map( diff --git a/packages/utils/src/build/json.ts b/packages/utils/src/build/json.ts index 89f53df1..9493837a 100644 --- a/packages/utils/src/build/json.ts +++ b/packages/utils/src/build/json.ts @@ -1,8 +1,10 @@ import { JsonStreamStringify } from 'json-stream-stringify'; -import { PassThrough } from 'stream'; import { SDK } from '@rsdoctor/types'; import { dirname, join } from 'path'; import { Package } from 'src/common'; +import { Transform } from 'stream'; + +const maxFileSize = 1024 * 1024 * 400; // maximum length of each file, measured in bytes, with 400MB as an example. export function stringify( json: T, @@ -10,29 +12,62 @@ export function stringify( space?: string | number, cycle?: boolean, ): Promise

{ + const jsonList: string[] = []; if (json && typeof json === 'object') { return new Promise((resolve, reject) => { - let res = ''; - const pt = new PassThrough(); const stream = new JsonStreamStringify(json, replacer, space, cycle); - pt.on('data', (chunk) => { - res += chunk; - }); + let currentLength = 0; + let currentContent = ''; - pt.on('end', () => { - return resolve(res as P); - }); + const batchProcessor = new Transform({ + readableObjectMode: true, + transform(chunk, _encoding, callback) { + const lines = chunk.toString().split('\\n'); - pt.on('error', (err) => { - return reject(err); - }); + lines.forEach((line: string | any[]) => { + if (currentLength + line.length > maxFileSize) { + // 超出最大长度,保存当前内容 + jsonList.push(currentContent); + currentContent = ''; + currentLength = 0; + } + + if (line.length) { + currentContent += line; + currentLength += line.length; + } + }); - stream.on('error', (err) => { - return reject(err); + callback(); + }, }); - stream.pipe(pt); + stream + // .pipe(split2(/\\n/)) + .pipe(batchProcessor) + .on('data', (line: string | any[]) => { + if (currentLength + line.length > maxFileSize) { + //Exceeding the maximum length, closing the current file stream. + jsonList.push(currentContent); + currentContent = ''; + currentLength = 0; + } + + if (line.length) { + currentContent += line; + currentLength += line.length; + } + }) + .on('end', () => { + if (jsonList.length < 1) { + jsonList.push(currentContent); + } + resolve(jsonList as P); + }) + .on('error', (err: any) => { + return reject(err); + }); }); } diff --git a/packages/utils/tests/__snapshots__/json.test.ts.snap b/packages/utils/tests/__snapshots__/json.test.ts.snap new file mode 100644 index 00000000..e3e2a56e --- /dev/null +++ b/packages/utils/tests/__snapshots__/json.test.ts.snap @@ -0,0 +1,37 @@ +// Vitest Snapshot v1, https://vitest.dev/guide/snapshot.html + +exports[`test src/json.ts > stringify() > Array & Object 1`] = ` +[ + "["abcde"]", +] +`; + +exports[`test src/json.ts > stringify() > Array & Object 2`] = ` +[ + "["abcde",123,null,null,true,false]", +] +`; + +exports[`test src/json.ts > stringify() > Array & Object 3`] = ` +[ + "[{"a":1,"c":null},1,[2,{"k":1}]]", +] +`; + +exports[`test src/json.ts > stringify() > Array & Object 4`] = ` +[ + "{"a":1,"c":null}", +] +`; + +exports[`test src/json.ts > stringify() > Array & Object 5`] = ` +[ + "{"a":1,"c":null,"d":{"e":23}}", +] +`; + +exports[`test src/json.ts > stringify() > Array & Object 6`] = ` +[ + "{"d":{"e":23,"f":null,"h":{"a":1}}}", +] +`; diff --git a/packages/utils/tests/json.test.ts b/packages/utils/tests/json.test.ts index 97e595be..329a12ab 100644 --- a/packages/utils/tests/json.test.ts +++ b/packages/utils/tests/json.test.ts @@ -13,29 +13,29 @@ describe('test src/json.ts', () => { }); it('Array & Object', async () => { - expect(await Json.stringify(['abcde'])).toEqual('["abcde"]'); + expect(await Json.stringify(['abcde'])).toMatchSnapshot(); expect( await Json.stringify(['abcde', 123, null, undefined, true, false]), - ).toEqual('["abcde",123,null,null,true,false]'); + ).toMatchSnapshot(); expect( await Json.stringify([ { a: 1, b: undefined, c: null }, 1, [2, { k: 1 }], ]), - ).toEqual('[{"a":1,"c":null},1,[2,{"k":1}]]'); + ).toMatchSnapshot(); - expect(await Json.stringify({ a: 1, b: undefined, c: null })).toEqual( - '{"a":1,"c":null}', - ); + expect( + await Json.stringify({ a: 1, b: undefined, c: null }), + ).toMatchSnapshot(); expect( await Json.stringify({ a: 1, b: undefined, c: null, d: { e: 23 } }), - ).toEqual('{"a":1,"c":null,"d":{"e":23}}'); + ).toMatchSnapshot(); expect( await Json.stringify({ d: { e: 23, f: null, g: undefined, h: { a: 1 } }, }), - ).toEqual('{"d":{"e":23,"f":null,"h":{"a":1}}}'); + ).toMatchSnapshot(); }); }); }); diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 03f25148..6b66b6cf 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -5753,7 +5753,7 @@ packages: /@modern-js/codesmith@2.3.6: resolution: {integrity: sha512-wUF2L1mxUrYWPAj50jQ7yqcsLXt5g19Dt/USn3TxnosFE57GCvrsVjZseKUai9zfC0pnX7PXOj3KXXoV7EaiFw==} dependencies: - '@modern-js/utils': 2.54.3 + '@modern-js/utils': 2.54.5 '@swc/helpers': 0.5.1 axios: 1.7.2 tar: 6.2.0 @@ -6290,7 +6290,6 @@ packages: caniuse-lite: 1.0.30001638 lodash: 4.17.21 rslog: 1.2.0 - dev: true /@module-federation/runtime-tools@0.1.6: resolution: {integrity: sha512-7ILVnzMIa0Dlc0Blck5tVZG1tnk1MmLnuZpLOMpbdW+zl+N6wdMjjHMjEZFCUAJh2E5XJ3BREwfX8Ets0nIkLg==}