From 262b213227c9d72d061aa7e7d87862bc90030e8b Mon Sep 17 00:00:00 2001 From: Brian Botha Date: Wed, 24 Jul 2024 11:32:40 +1000 Subject: [PATCH] feat: implemented an `FileTree` serializer/de-serializer for flattening `FileTree`s over a raw RPC stream --- src/vaults/VaultManager.ts | 39 ++ src/vaults/fileTree.ts | 651 ++++++++++++++++++++++++++++++ src/vaults/index.ts | 1 + src/vaults/types.ts | 70 ++-- src/vaults/utils.ts | 174 +------- tests/vaults/VaultOps.test.ts | 48 ++- tests/vaults/fileTree.test.ts | 722 ++++++++++++++++++++++++++++++++++ tests/vaults/utils.test.ts | 403 +------------------ tests/vaults/utils.ts | 57 ++- 9 files changed, 1579 insertions(+), 586 deletions(-) create mode 100644 src/vaults/fileTree.ts create mode 100644 tests/vaults/fileTree.test.ts diff --git a/src/vaults/VaultManager.ts b/src/vaults/VaultManager.ts index 4618661b2..21c34c324 100644 --- a/src/vaults/VaultManager.ts +++ b/src/vaults/VaultManager.ts @@ -1033,6 +1033,45 @@ class VaultManager { }); } + /** + * Takes a generator and runs it with the listed vaults. locking is handled automatically + * @param vaultIds List of vault ID for vaults you wish to use + * @param g Generator you wish to run with the provided vaults + * @param tran + */ + @ready(new vaultsErrors.ErrorVaultManagerNotRunning()) + public async *withVaultsG( + vaultIds: Array, + g: (...args: Array) => AsyncGenerator, + tran?: DBTransaction, + ): AsyncGenerator { + if (tran == null) { + return yield* this.db.withTransactionG((tran) => + this.withVaultsG(vaultIds, g, tran), + ); + } + + // Obtaining locks + const vaultLocks: Array> = vaultIds.map( + (vaultId) => { + return [vaultId.toString(), RWLockWriter, 'read']; + }, + ); + // Running the function with locking + return yield* this.vaultLocks.withG( + ...vaultLocks, + async function* (): AsyncGenerator { + // Getting the vaults while locked + const vaults = await Promise.all( + vaultIds.map(async (vaultId) => { + return await this.getVault(vaultId, tran); + }), + ); + return yield* g(...vaults); + }, + ); + } + protected async setupKey(tran: DBTransaction): Promise { let key: Buffer | undefined; key = await tran.get([...this.vaultsDbPath, 'key'], true); diff --git a/src/vaults/fileTree.ts b/src/vaults/fileTree.ts new file mode 100644 index 000000000..7180adaf6 --- /dev/null +++ b/src/vaults/fileTree.ts @@ -0,0 +1,651 @@ +import type { Stat } from 'encryptedfs'; +import type { FileSystem } from '../types'; +import type { + ContentNode, + DoneMessage, + FileSystemReadable, + INode, + StatEncoded, + TreeNode, + Parsed, + HeaderGeneric, + HeaderContent, +} from './types'; +import path from 'path'; +import { ReadableStream, TransformStream } from 'stream/web'; +import { minimatch } from 'minimatch'; +import { JSONParser, TokenizerError } from '@streamparser/json'; +import * as vaultsUtils from './utils'; +import { HeaderSize, HeaderType, HeaderMagic } from './types'; +import * as utils from '../utils'; +import * as utilsErrors from '../utils/errors'; +import * as validationErrors from '../validation/errors'; + +/** + * Generates a serializable format of file stats + */ +function generateStats(stat: Stat): StatEncoded { + return { + isSymbolicLink: stat.isSymbolicLink(), + dev: stat.dev, + ino: stat.ino, + mode: stat.mode, + nlink: stat.nlink, + uid: stat.uid, + gid: stat.gid, + rdev: stat.rdev, + size: stat.size, + blksize: stat.blksize, + blocks: stat.blocks, + atime: stat.atime.getTime(), + mtime: stat.mtime.getTime(), + ctime: stat.ctime.getTime(), + birthtime: stat.birthtime.getTime(), + }; +} + +/** + * This is a utility for walking a file tree while matching a file path globstar pattern. + * @param fs - file system to work against, supports nodes `fs` and our `FileSystemReadable` provided by vaults. + * @param basePath - The path to start walking from. + * @param pattern - The pattern to match against, defaults to everything + * @param yieldRoot - toggles yielding details of the basePath. Defaults to true. + * @param yieldParents - Toggles yielding details about parents of pattern matched paths. Defaults to false. + * @param yieldDirectories - Toggles yielding directories that match the pattern. Defaults to true. + * @param yieldFiles - Toggles yielding files that match the pattern. Defaults to true. + * @param yieldStats - Toggles including stats in file and directory details. Defaults to false. + */ +async function* globWalk({ + fs, + basePath = '.', + pattern = '**/*', + yieldRoot = true, + yieldParents = false, + yieldDirectories = true, + yieldFiles = true, + yieldStats = false, +}: { + fs: FileSystem | FileSystemReadable; + basePath?: string; + pattern?: string; + yieldRoot?: boolean; + yieldParents?: boolean; + yieldDirectories?: boolean; + yieldFiles?: boolean; + yieldStats?: boolean; +}): AsyncGenerator { + const directoryMap: Map = new Map(); + // Array<[Path, node, parent]> + const queue: Array<[string, INode, INode]> = []; + let iNode = 1; + const basePathNormalised = path.normalize(basePath); + let current: [string, INode, INode] | undefined = [basePathNormalised, 1, 0]; + + // Generates a list of parent nodes in order of parent to child + function* getParents(parentINode: INode): Generator { + const parents: Array = []; + let currentParent = parentINode; + while (true) { + const directory = directoryMap.get(currentParent); + directoryMap.delete(currentParent); + if (directory == null) break; + parents.unshift(directory); + currentParent = directory.parent; + } + for (const parent of parents) { + yield parent; + } + } + + // Wrapper function to handle compatability between fs and efs. + // Exists as a placeholder for better handling if needed. + async function readDir(path: string): Promise> { + // @ts-ignore: While the types don't fully match, it matches enough for our usage. + return await fs.promises.readdir(path); + } + + // Iterate over tree + const patternPath = path.join(basePathNormalised, pattern); + while (current != null) { + const [currentPath, node, parentINode] = current; + const stat = await fs.promises.stat(currentPath); + if (stat.isDirectory()) { + // `.` and `./` will not partially match the pattern, so we exclude the initial path + // We're doing a partial match to skip directories that can't contain our pattern + if ( + !minimatch(currentPath, patternPath, { partial: true }) && + currentPath !== basePathNormalised + ) { + current = queue.shift(); + continue; + } + const childrenPaths: Array = await readDir(currentPath); + // Filling in iNode details for adding to queue + const children = childrenPaths.map( + (v) => + [path.join(currentPath!, v.toString()), ++iNode, node] as [ + string, + INode, + INode, + ], + ); + queue.push(...children); + // Only yield root if enabled + if (yieldRoot || node !== 1) { + directoryMap.set(node, { + type: 'DIRECTORY', + path: currentPath, + iNode: node, + parent: parentINode, + stat: yieldStats ? generateStats(stat) : undefined, + }); + } + // Wildcards can find directories so we need yield them too + if (!minimatch(currentPath, patternPath)) { + current = queue.shift(); + continue; + } + // Yield directory if enabled + if (yieldDirectories) { + // Yield parents only if enabled and the child is yielded + if (yieldParents) { + yield* getParents(parentINode); + } + // Remove current from parent map since we yielded it already + directoryMap.delete(node); + yield { + type: 'DIRECTORY', + path: currentPath, + iNode: node, + parent: parentINode, + stat: yieldStats ? generateStats(stat) : undefined, + }; + } + } else if (stat.isFile()) { + // Check if the file matches the pattern + if (!minimatch(currentPath, patternPath)) { + current = queue.shift(); + continue; + } + // If enabled, yield the directories in order of parent to child + if (yieldParents) { + yield* getParents(parentINode); + } + // Yield file if it is enabled + if (yieldFiles) { + yield { + type: 'FILE', + path: currentPath, + iNode: node, + parent: parentINode, + stat: yieldStats ? generateStats(stat) : undefined, + }; + } + } + current = queue.shift(); + } +} + +/** + * Creates the base header with extra bytes to fill in extra header data for sub headers. + * Formatted as... + * 'H'(1) | HEADER_TYPE(1) + */ +function generateGenericHeader(headerData: HeaderGeneric): Uint8Array { + const header = new Uint8Array(HeaderSize.GENERIC); + const dataView = new DataView( + header.buffer, + header.byteOffset, + header.byteLength, + ); + dataView.setInt8(0, HeaderMagic.START); + dataView.setInt8(1, headerData.type); + return header; +} + +/** + * Creates the content header which identifies the content with the length. + * Data should follow this header. + * Formatted as... + * generic_header(10) | total_size(8)[data_size + header_size] | i_node(4) | 'D'(1) + */ +function generateContentHeader(headerData: HeaderContent): Uint8Array { + const contentHeader = new Uint8Array(HeaderSize.CONTENT); + const dataView = new DataView( + contentHeader.buffer, + contentHeader.byteOffset, + contentHeader.byteLength, + ); + dataView.setBigUint64(0, headerData.dataSize, false); + dataView.setUint32(8, headerData.iNode, false); + dataView.setUint8(12, HeaderMagic.END); + return contentHeader; +} + +// Parsers + +function parseGenericHeader(data: Uint8Array): Parsed { + const dataView = new DataView(data.buffer, data.byteOffset, data.byteLength); + if (data.byteLength < HeaderSize.GENERIC) return { remainder: data }; + const magicByte = dataView.getUint8(0); + if (magicByte !== HeaderMagic.START) { + throw new validationErrors.ErrorParse( + `invalid magic byte, should be "${HeaderMagic.START}", found "${magicByte}"`, + ); + } + const headerType = dataView.getUint8(1); + if (!(headerType in HeaderType)) { + throw new validationErrors.ErrorParse( + `header type was not valid type in 'HeaderType', found "${headerType}")`, + ); + } + return { + data: { + type: headerType, + }, + remainder: data.subarray(HeaderSize.GENERIC), + }; +} + +function parseContentHeader(data: Uint8Array): Parsed { + const dataView = new DataView(data.buffer, data.byteOffset, data.byteLength); + if (data.byteLength < HeaderSize.CONTENT) return { remainder: data }; + const dataSize = dataView.getBigUint64(0, false); + const iNode = dataView.getUint32(8, false); + const magicByte = dataView.getUint8(12); + if (magicByte !== HeaderMagic.END) { + throw new validationErrors.ErrorParse( + `invalid magic byte, should be "${HeaderMagic.END}", found "${magicByte}"`, + ); + } + return { + data: { + dataSize, + iNode, + }, + remainder: data.subarray(HeaderSize.CONTENT), + }; +} + +/** + * Takes a filesystem and filePath and generates a content header with the contents of the file in chunks. + * The chunk size is specified by the `chunkSize` parameter. + * @param fs - File system used to access files. + * @param path - filePath for the file to serialize. + * @param iNode - file identifier number to be included in the header. + * @param chunkSize - Maximum chunk sized used when sending file data, defaults to 4kB + */ +async function* encodeContent( + fs: FileSystem | FileSystemReadable, + path: string, + iNode: number, + chunkSize: number = 1024 * 4, +): AsyncGenerator { + const fd = await fs.promises.open(path, 'r'); + async function read(buffer: Uint8Array): Promise<{ + bytesRead: number; + buffer: Uint8Array; + }> { + if (typeof fd === 'number') { + // Handle as an EFS fd + const fsr = fs as FileSystemReadable; + const bytesRead = await fsr.promises.read( + fd, + buffer, + undefined, + buffer.byteLength, + ); + return { bytesRead, buffer }; + } else { + // Handle as an FS fd + return fd.read({ buffer }); + } + } + async function close(): Promise { + if (typeof fd === 'number') { + // Handle as an EFS fd + const fsr = fs as FileSystemReadable; + return await fsr.close(fd); + } else { + // Handle as an FS fd + return await fd.close(); + } + } + try { + const stats = await fs.promises.stat(path); + yield vaultsUtils.uint8ArrayConcat([ + generateGenericHeader({ + type: HeaderType.CONTENT, + }), + generateContentHeader({ + dataSize: BigInt(stats.size), + iNode, + }), + ]); + while (true) { + const readResult = await read(new Uint8Array(chunkSize)); + if (readResult.bytesRead === 0) break; + yield readResult.buffer.subarray(0, readResult.bytesRead); + if (readResult.bytesRead < chunkSize) break; + } + } finally { + await close(); + } +} + +/** + * Takes an AsyncGenerator and serializes it into a `ReadableStream` + * @param fs + * @param treeGen - An AsyncGenerator that yields the files and directories of a file tree. + * @param yieldContents - Toggles sending the contents of files after the file tree. + */ +function serializerStreamFactory( + fs: FileSystem | FileSystemReadable, + treeGen: AsyncGenerator, + yieldContents: boolean = true, +): ReadableStream { + const files: Array<[number, string]> = []; + let treeDataGen: AsyncGenerator | undefined = treeGen; + let contentsGen: AsyncGenerator | undefined = + undefined; + // Will get the next content chunk or return undefined if there is no more data to send + async function getNextContentChunk(): Promise { + if (!yieldContents) return undefined; + while (true) { + if (contentsGen == null) { + const next = files.shift(); + // No more files means we're done + if (next == null) return undefined; + const [iNode, path] = next; + contentsGen = encodeContent(fs, path, iNode); + } + const result = await contentsGen.next(); + if (!result.done) return result.value; + else contentsGen = undefined; + } + } + async function cleanup(reason: unknown) { + await treeDataGen?.throw(reason).catch(() => {}); + await contentsGen?.throw(reason).catch(() => {}); + } + return new ReadableStream({ + start: (controller) => { + controller.enqueue(generateGenericHeader({ type: HeaderType.TREE })); + }, + pull: async (controller) => { + try { + if (treeDataGen != null) { + const result = await treeGen.next(); + if (!result.done) { + // If a file, add to the file list to encode contents later + if (result.value.type === 'FILE') { + files.push([result.value.iNode, result.value.path]); + } + // Normal tree nodes are just serialized and converted to `UInt8Array` + const jsonSerialized = JSON.stringify(result.value); + controller.enqueue( + vaultsUtils.bufferToUint8ArrayCopyless( + Buffer.from(jsonSerialized, 'utf-8'), + ), + ); + } else { + const treeDoneMessage = JSON.stringify({ type: 'DONE' }); + controller.enqueue( + vaultsUtils.bufferToUint8ArrayCopyless( + Buffer.from(treeDoneMessage, 'utf-8'), + ), + ); + treeDataGen = undefined; + } + } else { + const contentDataChunk = await getNextContentChunk(); + if (contentDataChunk == null) return controller.close(); + controller.enqueue(contentDataChunk); + } + } catch (e) { + await cleanup(e); + return controller.error(e); + } + }, + cancel: async (reason) => { + await cleanup(reason); + }, + }); +} + +/** + * Type-guard for checking if an object is structured as a `DoneMessage`. + */ +function isDoneMessage(data: unknown): data is DoneMessage { + if (!utils.isObject(data)) return false; + if (Array.isArray(data)) return false; + return 'type' in data && data.type === 'DONE'; +} + +/** + * Type-guard assertion for checking if an object is structured as a `TreeNode`. + */ +function parseTreeNode(data: unknown): asserts data is TreeNode { + if (!utils.isObject(data)) { + throw new validationErrors.ErrorParse('Must be an object'); + } + if (Array.isArray(data)) { + throw new validationErrors.ErrorParse("Can't be an array"); + } + if (!('type' in data)) { + throw new validationErrors.ErrorParse("'type' parameter must be defined"); + } + const type = data.type; + if (typeof type !== 'string') { + throw new validationErrors.ErrorParse("'type' parameter must be a string"); + } + if (!(type === 'FILE' || type === 'DIRECTORY')) { + throw new validationErrors.ErrorParse( + "'type' parameter must be either 'FILE' or 'DIRECTORY'", + ); + } + if (!('path' in data && typeof data.path == 'string')) { + throw new validationErrors.ErrorParse( + "'path' parameter must be defined and a string", + ); + } + if (!('iNode' in data && typeof data.iNode == 'number')) { + throw new validationErrors.ErrorParse( + "'iNode' parameter must be defined and a number", + ); + } + if (!('parent' in data && typeof data.parent == 'number')) { + throw new validationErrors.ErrorParse( + "'parent' parameter must be defined and a number", + ); + } +} + +/** + * Creates a TransformStream webStream to transform a binary `UInt8Array` stream into a parsed file tree stream. + * Will yield `TreeNode`s defining the file tree. + * If file contents are included in the stream it will yield `ContentNode`s defining the file metadata and raw binary + * `UInit8Array` chunks of the contents. + */ +function parserTransformStreamFactory(): TransformStream< + Uint8Array, + TreeNode | ContentNode | Uint8Array +> { + let workingBuffer: Uint8Array = new Uint8Array(0); + let phase: 'START' | 'TREE' | 'CONTENT' = 'START'; + let jsonParser: JSONParser | undefined = undefined; + let lastChunk: Uint8Array | undefined; + let contentLength: bigint | undefined = undefined; + const enterTreeState = ( + controller: TransformStreamDefaultController< + TreeNode | ContentNode | Uint8Array + >, + initialChunk: Uint8Array, + ) => { + let done = false; + phase = 'TREE'; + workingBuffer = new Uint8Array(0); + // Setting up the JSON stream parser + jsonParser = new JSONParser({ + separator: '', + paths: ['$'], + }); + const handleEnd = (e?: unknown) => { + if (e != null && !(done && e instanceof TokenizerError)) { + controller.error(e); + return; + } + if (e instanceof TokenizerError) { + // Extracting error position. + const match = e.message.match(/at position "(.*)" in state/); + if (match == null) { + controller.error( + new utilsErrors.ErrorUtilsUndefinedBehaviour( + 'failed to match for buffer index', + ), + ); + return; + } + const bufferIndex = parseInt(match[1]); + if (isNaN(bufferIndex)) { + controller.error( + new utilsErrors.ErrorUtilsUndefinedBehaviour( + 'failed to parse buffer index', + ), + ); + return; + } + if (lastChunk == null) { + controller.error( + new utilsErrors.ErrorUtilsUndefinedBehaviour( + 'lastChunk was undefined', + ), + ); + return; + } + workingBuffer = lastChunk.subarray(bufferIndex); + } + jsonParser = undefined; + }; + jsonParser.onEnd = handleEnd; + jsonParser.onError = handleEnd; + jsonParser.onValue = (value) => { + const message = value.value; + if (isDoneMessage(message)) { + done = true; + jsonParser?.end(); + phase = 'CONTENT'; + return; + } + parseTreeNode(message); + controller.enqueue(message); + }; + jsonParser.write(initialChunk); + }; + return new TransformStream({ + transform: (chunk, controller) => { + switch (phase) { + case 'START': { + workingBuffer = vaultsUtils.uint8ArrayConcat([workingBuffer, chunk]); + // Start phase expects a TREE header to indicate start of TREE data + const { data, remainder } = parseGenericHeader(workingBuffer); + if (data == null) { + // Wait for more data + workingBuffer = remainder; + return; + } + if (data.type !== HeaderType.TREE) { + controller.error( + new validationErrors.ErrorParse( + `expected TREE header, got "${HeaderType[data.type]}"`, + ), + ); + return; + } + // We have the tree header, so we switch to tree mode + enterTreeState(controller, remainder); + lastChunk = remainder; + return; + } + case 'TREE': + { + // Tree needs to parse a JSON stream + lastChunk = chunk; + jsonParser?.write(chunk); + } + return; + case 'CONTENT': + { + workingBuffer = vaultsUtils.uint8ArrayConcat([ + workingBuffer, + chunk, + ]); + if (contentLength == null) { + const genericHeader = parseGenericHeader(workingBuffer); + if (genericHeader.data == null) return; + if (genericHeader.data.type === HeaderType.TREE) { + enterTreeState(controller, genericHeader.remainder); + lastChunk = genericHeader.remainder; + return; + } + if (genericHeader.data.type !== HeaderType.CONTENT) { + controller.error( + new validationErrors.ErrorParse( + `expected CONTENT or TREE message, got "${genericHeader.data.type}"`, + ), + ); + return; + } + const contentHeader = parseContentHeader(genericHeader.remainder); + if (contentHeader.data == null) return; + + const { dataSize, iNode } = contentHeader.data; + controller.enqueue({ type: 'CONTENT', dataSize, iNode }); + contentLength = dataSize; + workingBuffer = contentHeader.remainder; + } + // We yield the whole buffer, or split it for the next header + if (workingBuffer.byteLength === 0) return; + if (workingBuffer.byteLength <= contentLength) { + contentLength -= BigInt(workingBuffer.byteLength); + controller.enqueue(workingBuffer); + workingBuffer = new Uint8Array(0); + if (contentLength === 0n) contentLength = undefined; + return; + } else { + controller.enqueue( + workingBuffer.subarray(0, Number(contentLength)), + ); + workingBuffer = workingBuffer.subarray(Number(contentLength)); + contentLength = undefined; + } + } + return; + default: + controller.error( + new utilsErrors.ErrorUtilsUndefinedBehaviour( + `invalid state "${phase}"`, + ), + ); + return; + } + }, + }); +} + +export { + HeaderSize, + HeaderType, + HeaderMagic, + generateStats, + globWalk, + generateGenericHeader, + generateContentHeader, + parseGenericHeader, + parseContentHeader, + encodeContent, + serializerStreamFactory, + isDoneMessage, + parseTreeNode, + parserTransformStreamFactory, +}; diff --git a/src/vaults/index.ts b/src/vaults/index.ts index 26906d291..b03de1f7c 100644 --- a/src/vaults/index.ts +++ b/src/vaults/index.ts @@ -6,3 +6,4 @@ export * as types from './types'; export * as errors from './errors'; export * as events from './events'; export * as vaultOps from './VaultOps'; +export * as fileTree from './fileTree'; diff --git a/src/vaults/types.ts b/src/vaults/types.ts index 6511e5ff0..a4609718c 100644 --- a/src/vaults/types.ts +++ b/src/vaults/types.ts @@ -135,14 +135,26 @@ type VaultName = string; type VaultActions = Partial>; type FileTree = Array; -type TreeNode = DirectoryNode | FileNode; +type TreeNode = { + type: 'DIRECTORY' | 'FILE'; + iNode: INode; + path: FilePath; + parent: INode; + stat?: StatEncoded; +}; + +type ContentNode = { + type: 'CONTENT'; + iNode: number; + dataSize: bigint; +}; +type DoneMessage = { type: 'DONE' }; + type FilePath = string; type INode = number; -type CNode = number; type StatEncoded = { isSymbolicLink: boolean; - type: 'FILE' | 'DIRECTORY' | 'OTHER'; dev: number; ino: number; mode: number; @@ -159,25 +171,35 @@ type StatEncoded = { birthtime: number; }; -type DirectoryNode = { - type: 'directory'; - path: FilePath; - iNode: INode; - parent: INode; - children: Array; - stat?: StatEncoded; -}; +interface Parsed { + data?: T; + remainder: Uint8Array; +} -type FileNode = { - type: 'file'; - path: FilePath; - iNode: INode; - parent: INode; - cNode: CNode; - stat?: StatEncoded; +type HeaderGeneric = { + type: HeaderType; +}; +type HeaderContent = { + dataSize: bigint; + iNode: number; }; -export { vaultActions }; +enum HeaderSize { + GENERIC = 2, + CONTENT = 13, +} + +enum HeaderType { + CONTENT = 0x43, // 'C' 67 + TREE = 0x54, // 'T' 84 +} + +enum HeaderMagic { + START = 0x48, // 'H' 72 + END = 0x44, // 'D' 68 +} + +export {}; export type { VaultId, @@ -193,12 +215,14 @@ export type { VaultActions, FileTree, TreeNode, + ContentNode, + DoneMessage, FilePath, INode, - CNode, StatEncoded, - DirectoryNode, - FileNode, + Parsed, + HeaderGeneric, + HeaderContent, }; -export { tagLast, refs }; +export { vaultActions, tagLast, refs, HeaderSize, HeaderType, HeaderMagic }; diff --git a/src/vaults/utils.ts b/src/vaults/utils.ts index 340879fbb..a946127b1 100644 --- a/src/vaults/utils.ts +++ b/src/vaults/utils.ts @@ -1,20 +1,14 @@ -import type { EncryptedFS, Stat } from 'encryptedfs'; -import type { FileSystem } from '../types'; +import type { EncryptedFS } from 'encryptedfs'; import type { VaultRef, VaultAction, CommitId, FileSystemReadable, FileSystemWritable, - TreeNode, - DirectoryNode, - INode, - StatEncoded, } from './types'; import type { NodeId } from '../ids/types'; import type { Path } from 'encryptedfs/dist/types'; import path from 'path'; -import { minimatch } from 'minimatch'; import { pathJoin } from 'encryptedfs/dist/utils'; import * as vaultsErrors from './errors'; import { tagLast, refs, vaultActions } from './types'; @@ -129,159 +123,25 @@ async function mkdirExists(efs: FileSystemWritable, directory: string) { } } -function genStat(stat: Stat): StatEncoded { - return { - isSymbolicLink: stat.isSymbolicLink(), - type: stat.isFile() ? 'FILE' : stat.isDirectory() ? 'DIRECTORY' : 'OTHER', - dev: stat.dev, - ino: stat.ino, - mode: stat.mode, - nlink: stat.nlink, - uid: stat.uid, - gid: stat.gid, - rdev: stat.rdev, - size: stat.size, - blksize: stat.blksize, - blocks: stat.blocks, - atime: stat.atime.getTime(), - mtime: stat.mtime.getTime(), - ctime: stat.ctime.getTime(), - birthtime: stat.birthtime.getTime(), - }; +/** + * Converts a `Buffer` to a `Uint8Array` without copying the contents + */ +function bufferToUint8ArrayCopyless(data: Buffer): Uint8Array { + return new Uint8Array(data.buffer, data.byteOffset, data.byteLength); } /** - * This is a utility for walking a file tree while matching a file path globstar pattern. - * @param fs - file system to work against, supports nodes `fs` and our `FileSystemReadable` provided by vaults. - * @param basePath - The path to start walking from. - * @param pattern - The pattern to match against, defaults to everything - * @param yieldRoot - toggles yielding details of the basePath. Defaults to true. - * @param yieldParents - Toggles yielding details about parents of pattern matched paths. Defaults to false. - * @param yieldDirectories - Toggles yielding directories that match the pattern. Defaults to true. - * @param yieldFiles - Toggles yielding files that match the pattern. Defaults to true. - * @param yieldStats - Toggles including stats in file and directory details. Defaults to false. + * Converts a `Uint8Array` to a `Buffer` without copying the contents */ -async function* globWalk({ - fs, - basePath = '.', - pattern = '**/*', - yieldRoot = true, - yieldParents = false, - yieldDirectories = true, - yieldFiles = true, - yieldStats = false, -}: { - fs: FileSystem | FileSystemReadable; - basePath?: string; - pattern?: string; - yieldRoot?: boolean; - yieldParents?: boolean; - yieldDirectories?: boolean; - yieldFiles?: boolean; - yieldStats?: boolean; -}): AsyncGenerator { - const files: Array = []; - const directoryMap: Map = new Map(); - // Path, node, parent - const queue: Array<[string, INode, INode]> = []; - let iNode = 1; - const basePathNormalised = path.normalize(basePath); - let current: [string, INode, INode] | undefined = [basePathNormalised, 1, 0]; - - const getParents = (parentINode: INode) => { - const parents: Array = []; - let currentParent = parentINode; - while (true) { - const directory = directoryMap.get(currentParent); - directoryMap.delete(currentParent); - if (directory == null) break; - parents.unshift(directory); - currentParent = directory.parent; - } - return parents; - }; - - // Iterate over tree - const patternPath = path.join(basePathNormalised, pattern); - while (current != null) { - const [currentPath, node, parentINode] = current; +function uint8ArrayToBufferCopyless(data: Uint8Array): Buffer { + return Buffer.from(data.buffer, data.byteOffset, data.byteLength); +} - const stat = await fs.promises.stat(currentPath); - if (stat.isDirectory()) { - // `.` and `./` will not partially match the pattern, so we exclude the initial path - if ( - !minimatch(currentPath, patternPath, { partial: true }) && - currentPath !== basePathNormalised - ) { - current = queue.shift(); - continue; - } - // @ts-ignore: While the types don't fully match, it matches enough for our usage. - const childrenPaths = await fs.promises.readdir(currentPath); - const children = childrenPaths.map( - (v) => - [path.join(currentPath!, v.toString()), ++iNode, node] as [ - string, - INode, - INode, - ], - ); - queue.push(...children); - // Only yield root if we specify it - if (yieldRoot || node !== 1) { - directoryMap.set(node, { - type: 'directory', - path: currentPath, - iNode: node, - parent: parentINode, - children: children.map((v) => v[1]), - stat: yieldStats ? genStat(stat) : undefined, - }); - } - // Wildcards can find directories so we need yield them too - if (minimatch(currentPath, patternPath)) { - // Remove current from parent list - directoryMap.delete(node); - // Yield parents - if (yieldParents) { - for (const parent of getParents(parentINode)) yield parent; - } - // Yield directory - if (yieldDirectories) { - yield { - type: 'directory', - path: currentPath, - iNode: node, - parent: parentINode, - children: children.map((v) => v[1]), - stat: yieldStats ? genStat(stat) : undefined, - }; - } - } - } else if (stat.isFile()) { - if (!minimatch(currentPath, patternPath)) { - current = queue.shift(); - continue; - } - // Get the directories in order - if (yieldParents) { - for (const parent of getParents(parentINode)) yield parent; - } - // Yield file. - if (yieldFiles) { - yield { - type: 'file', - path: currentPath, - iNode: node, - parent: parentINode, - cNode: files.length, - stat: yieldStats ? genStat(stat) : undefined, - }; - } - files.push(currentPath); - } - current = queue.shift(); - } +/** + * Concatenates `Buffers` or `Uint8Array`s into a `Uint8Array` + */ +function uint8ArrayConcat(list: Array): Uint8Array { + return bufferToUint8ArrayCopyless(Buffer.concat(list)); } export { @@ -299,7 +159,9 @@ export { walkFs, deleteObject, mkdirExists, - globWalk, + bufferToUint8ArrayCopyless, + uint8ArrayToBufferCopyless, + uint8ArrayConcat, }; export { createVaultIdGenerator, encodeVaultId, decodeVaultId } from '../ids'; diff --git a/tests/vaults/VaultOps.test.ts b/tests/vaults/VaultOps.test.ts index 6b7a9bb7a..0d04713e8 100644 --- a/tests/vaults/VaultOps.test.ts +++ b/tests/vaults/VaultOps.test.ts @@ -3,6 +3,7 @@ import type { Vault } from '@/vaults/Vault'; import type KeyRing from '@/keys/KeyRing'; import type { LevelPath } from '@matrixai/db'; import type { FileTree } from '@/vaults/types'; +import type { ContentNode, TreeNode } from '@/vaults/types'; import fs from 'fs'; import path from 'path'; import os from 'os'; @@ -10,6 +11,7 @@ import { EncryptedFS, Stat } from 'encryptedfs'; import Logger, { LogLevel, StreamHandler } from '@matrixai/logger'; import { DB } from '@matrixai/db'; import VaultInternal from '@/vaults/VaultInternal'; +import * as fileTree from '@/vaults/fileTree'; import * as vaultOps from '@/vaults/VaultOps'; import * as vaultsErrors from '@/vaults/errors'; import * as vaultsUtils from '@/vaults/utils'; @@ -527,7 +529,7 @@ describe('VaultOps', () => { globalThis.defaultTimeout * 4, ); - describe('globWalk', () => { + describe('fileTree', () => { const relativeBase = '.'; const dir1: string = 'dir1'; const dir11: string = path.join(dir1, 'dir11'); @@ -549,10 +551,10 @@ describe('VaultOps', () => { }); }); - test('Works with efs', async () => { + test('globWalk works with efs', async () => { const files = await vault.readF(async (fs) => { const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ + for await (const treeNode of fileTree.globWalk({ fs: fs, basePath: '.', yieldDirectories: true, @@ -575,5 +577,45 @@ describe('VaultOps', () => { file4b, ]); }); + test('serializer with content works with efs', async () => { + const data = await vault.readF(async (fs) => { + const fileTreeGen = fileTree.globWalk({ + fs, + yieldStats: false, + yieldRoot: false, + yieldFiles: true, + yieldParents: true, + yieldDirectories: true, + }); + const data: Array = []; + const parserTransform = fileTree.parserTransformStreamFactory(); + const serializedStream = fileTree.serializerStreamFactory( + fs, + fileTreeGen, + true, + ); + const outputStream = serializedStream.pipeThrough(parserTransform); + for await (const output of outputStream) { + data.push(output); + } + return data; + }); + const contents = data + .filter((v) => v instanceof Uint8Array) + .map((v) => Buffer.from(v as Uint8Array).toString()); + const contentHeaders = data.filter( + (v) => !(v instanceof Uint8Array) && v.type === 'CONTENT', + ) as Array; + expect(contents).toIncludeAllMembers([ + 'content-file0', + 'content-file1', + 'content-file2', + 'content-file3', + 'content-file4', + ]); + for (const contentHeader of contentHeaders) { + expect(contentHeader.dataSize).toBe(13n); + } + }); }); }); diff --git a/tests/vaults/fileTree.test.ts b/tests/vaults/fileTree.test.ts new file mode 100644 index 000000000..f47831c72 --- /dev/null +++ b/tests/vaults/fileTree.test.ts @@ -0,0 +1,722 @@ +import type { ContentNode, FileTree, TreeNode } from '@/vaults/types'; +import fs from 'fs'; +import os from 'os'; +import path from 'path'; +import { ReadableStream } from 'stream/web'; +import { test } from '@fast-check/jest'; +import fc from 'fast-check'; +import * as fileTree from '@/vaults/fileTree'; +import * as vaultsTestUtils from './utils'; + +describe('fileTree', () => { + let dataDir: string; + + beforeEach(async () => { + dataDir = await fs.promises.mkdtemp( + path.join(os.tmpdir(), 'polykey-test-'), + ); + }); + afterEach(async () => { + await fs.promises.rm(dataDir, { + force: true, + recursive: true, + }); + }); + + describe('globWalk', () => { + let cwd: string; + + const relativeBase = '.'; + const dir1: string = 'dir1'; + const dir2: string = 'dir2'; + const dir11: string = path.join(dir1, 'dir11'); + const dir12: string = path.join(dir1, 'dir12'); + const dir21: string = path.join(dir2, 'dir21'); + const dir22: string = path.join(dir2, 'dir22'); + const file0b: string = 'file0.b'; + const file1a: string = path.join(dir11, 'file1.a'); + const file2b: string = path.join(dir11, 'file2.b'); + const file3a: string = path.join(dir12, 'file3.a'); + const file4b: string = path.join(dir12, 'file4.b'); + const file5a: string = path.join(dir21, 'file5.a'); + const file6b: string = path.join(dir21, 'file6.b'); + const file7a: string = path.join(dir22, 'file7.a'); + const file8b: string = path.join(dir22, 'file8.b'); + const file9a: string = path.join(dir22, 'file9.a'); + + beforeEach(async () => { + await fs.promises.mkdir(path.join(dataDir, dir1)); + await fs.promises.mkdir(path.join(dataDir, dir11)); + await fs.promises.mkdir(path.join(dataDir, dir12)); + await fs.promises.mkdir(path.join(dataDir, dir2)); + await fs.promises.mkdir(path.join(dataDir, dir21)); + await fs.promises.mkdir(path.join(dataDir, dir22)); + await fs.promises.writeFile(path.join(dataDir, file0b), 'content-file0'); + await fs.promises.writeFile(path.join(dataDir, file1a), 'content-file1'); + await fs.promises.writeFile(path.join(dataDir, file2b), 'content-file2'); + await fs.promises.writeFile(path.join(dataDir, file3a), 'content-file3'); + await fs.promises.writeFile(path.join(dataDir, file4b), 'content-file4'); + await fs.promises.writeFile(path.join(dataDir, file5a), 'content-file5'); + await fs.promises.writeFile(path.join(dataDir, file6b), 'content-file6'); + await fs.promises.writeFile(path.join(dataDir, file7a), 'content-file7'); + await fs.promises.writeFile(path.join(dataDir, file8b), 'content-file8'); + await fs.promises.writeFile(path.join(dataDir, file9a), 'content-file9'); + cwd = process.cwd(); + process.chdir(dataDir); + }); + afterEach(async () => { + process.chdir(cwd); + }); + + test('Works with relative base path `.`', async () => { + const tree: FileTree = []; + for await (const treeNode of fileTree.globWalk({ + fs: fs, + basePath: relativeBase, + yieldDirectories: true, + yieldFiles: true, + yieldParents: true, + yieldRoot: true, + })) { + tree.push(treeNode); + } + const files = tree.map((v) => v.path); + expect(files).toContainAllValues([ + relativeBase, + dir1, + dir2, + dir11, + dir12, + dir21, + dir22, + file0b, + file1a, + file2b, + file3a, + file4b, + file5a, + file6b, + file7a, + file8b, + file9a, + ]); + }); + test('Works with relative base path `./`', async () => { + const tree: FileTree = []; + for await (const treeNode of fileTree.globWalk({ + fs: fs, + basePath: './', + yieldDirectories: true, + yieldFiles: true, + yieldParents: true, + yieldRoot: true, + })) { + tree.push(treeNode); + } + const files = tree.map((v) => v.path); + expect(files).toContainAllValues([ + './', + dir1, + dir2, + dir11, + dir12, + dir21, + dir22, + file0b, + file1a, + file2b, + file3a, + file4b, + file5a, + file6b, + file7a, + file8b, + file9a, + ]); + }); + test('Works with relative base path `./dir1`', async () => { + const tree: FileTree = []; + for await (const treeNode of fileTree.globWalk({ + fs: fs, + basePath: './dir1', + yieldDirectories: true, + yieldFiles: true, + yieldParents: true, + yieldRoot: true, + })) { + tree.push(treeNode); + } + const files = tree.map((v) => v.path); + expect(files).toContainAllValues([ + dir1, + dir11, + dir12, + file1a, + file2b, + file3a, + file4b, + ]); + }); + test('Works with absolute base path', async () => { + const tree: FileTree = []; + for await (const treeNode of fileTree.globWalk({ + fs: fs, + basePath: dataDir, + yieldDirectories: true, + yieldFiles: true, + yieldParents: true, + yieldRoot: true, + })) { + tree.push(treeNode); + } + const files = tree.map((v) => v.path); + expect(files).toContainAllValues( + [ + relativeBase, + dir1, + dir2, + dir11, + dir12, + dir21, + dir22, + file0b, + file1a, + file2b, + file3a, + file4b, + file5a, + file6b, + file7a, + file8b, + file9a, + ].map((v) => path.join(dataDir, v)), + ); + }); + test('Yields parent directories with `yieldParents`', async () => { + const tree: FileTree = []; + for await (const treeNode of fileTree.globWalk({ + fs: fs, + basePath: relativeBase, + yieldParents: true, + yieldFiles: false, + })) { + tree.push(treeNode); + } + const files = tree.map((v) => v.path); + expect(files).toContainAllValues([ + relativeBase, + dir2, + dir1, + dir11, + dir12, + dir21, + dir22, + ]); + }); + test('Does not yield the base path with `yieldParents` and `yieldRoot`', async () => { + const tree: FileTree = []; + for await (const treeNode of fileTree.globWalk({ + fs: fs, + basePath: relativeBase, + yieldRoot: false, + yieldParents: true, + yieldFiles: false, + })) { + tree.push(treeNode); + } + const files = tree.map((v) => v.path); + expect(files).not.toInclude(relativeBase); + expect(files).toContainAllValues([ + dir2, + dir1, + dir11, + dir12, + dir21, + dir22, + ]); + }); + test('Does not yield the base path with `yieldParents` and `yieldRoot` and absolute paths', async () => { + const tree: FileTree = []; + for await (const treeNode of fileTree.globWalk({ + fs: fs, + basePath: dataDir, + yieldRoot: false, + yieldParents: true, + yieldFiles: false, + })) { + tree.push(treeNode); + } + const files = tree.map((v) => v.path); + expect(files).not.toInclude(dataDir); + expect(files).toContainAllValues( + [dir2, dir1, dir11, dir12, dir21, dir22].map((v) => + path.join(dataDir, v), + ), + ); + }); + test('Yields stats with `yieldStats`', async () => { + const tree: FileTree = []; + for await (const treeNode of fileTree.globWalk({ + fs: fs, + basePath: relativeBase, + yieldStats: true, + yieldFiles: true, + yieldDirectories: true, + })) { + tree.push(treeNode); + } + tree.forEach((v) => expect(v.stat).toBeDefined()); + }); + // Globbing examples + test('glob with wildcard', async () => { + const tree: FileTree = []; + for await (const treeNode of fileTree.globWalk({ + fs: fs, + basePath: relativeBase, + pattern: '*', + yieldFiles: true, + yieldDirectories: true, + yieldParents: false, + })) { + tree.push(treeNode); + } + const files = tree.map((v) => v.path); + expect(files).toContainAllValues([dir1, dir2, file0b]); + }); + test('glob with wildcard ignores directories with `yieldDirectories: false`', async () => { + const tree: FileTree = []; + for await (const treeNode of fileTree.globWalk({ + fs: fs, + basePath: relativeBase, + pattern: '*', + yieldFiles: true, + yieldDirectories: false, + yieldParents: false, + })) { + tree.push(treeNode); + } + const files = tree.map((v) => v.path); + expect(files).not.toContainAllValues([relativeBase, dir1, dir2]); + expect(files).toContainAllValues([file0b]); + }); + test('glob with wildcard ignores files with `yieldFiles: false`', async () => { + const tree: FileTree = []; + for await (const treeNode of fileTree.globWalk({ + fs: fs, + basePath: relativeBase, + pattern: '*', + yieldFiles: false, + yieldDirectories: true, + yieldParents: false, + })) { + tree.push(treeNode); + } + const files = tree.map((v) => v.path); + expect(files).not.toContainAllValues([file0b]); + expect(files).toContainAllValues([dir1, dir2]); + }); + test('glob with globstar', async () => { + const tree: FileTree = []; + for await (const treeNode of fileTree.globWalk({ + fs: fs, + basePath: relativeBase, + pattern: '**', + yieldFiles: true, + yieldDirectories: true, + yieldParents: false, + })) { + tree.push(treeNode); + } + const files = tree.map((v) => v.path); + expect(files).not.toInclude(relativeBase); + expect(files).toContainAllValues([ + dir1, + dir2, + file0b, + dir11, + dir12, + dir21, + dir22, + file1a, + file2b, + file3a, + file4b, + file5a, + file6b, + file7a, + file8b, + file9a, + ]); + }); + test('glob with globstar and directory pattern', async () => { + const tree: FileTree = []; + for await (const treeNode of fileTree.globWalk({ + fs: fs, + basePath: relativeBase, + pattern: '**/dir2/**', + yieldFiles: true, + yieldDirectories: true, + yieldParents: false, + })) { + tree.push(treeNode); + } + const files = tree.map((v) => v.path); + expect(files).not.toContainAllValues([ + relativeBase, + dir1, + dir2, + file0b, + dir11, + dir12, + file1a, + file2b, + file3a, + file4b, + ]); + expect(files).toContainAllValues([ + dir21, + dir22, + file5a, + file6b, + file7a, + file8b, + file9a, + ]); + }); + test('glob with globstar and wildcard', async () => { + const tree: FileTree = []; + for await (const treeNode of fileTree.globWalk({ + fs: fs, + basePath: relativeBase, + pattern: '**/*.a', + yieldFiles: true, + yieldDirectories: true, + yieldParents: false, + })) { + tree.push(treeNode); + } + const files = tree.map((v) => v.path); + expect(files).not.toContainAllValues([ + relativeBase, + dir1, + dir2, + file0b, + dir11, + dir12, + dir21, + dir22, + file2b, + file4b, + file6b, + file8b, + ]); + expect(files).toContainAllValues([ + file1a, + file3a, + file5a, + file7a, + file9a, + ]); + }); + }); + describe('parsers and generators', () => { + test.prop([vaultsTestUtils.headerGenericArb])( + 'generic header', + async (genericHeader) => { + const data = fileTree.generateGenericHeader(genericHeader); + const result = fileTree.parseGenericHeader(data); + expect(result.data).toMatchObject(genericHeader); + }, + ); + test.prop([vaultsTestUtils.headerContentArb])( + 'content header', + async (contentHeader) => { + const data = fileTree.generateContentHeader(contentHeader); + const result = fileTree.parseContentHeader(data); + expect(result.data).toMatchObject(contentHeader); + expect(result.remainder.byteLength).toBe(0); + }, + ); + }); + describe('serializer', () => { + let cwd: string; + + const dir1: string = 'dir1'; + const dir2: string = 'dir2'; + const dir11: string = path.join(dir1, 'dir11'); + const dir12: string = path.join(dir1, 'dir12'); + const dir21: string = path.join(dir2, 'dir21'); + const dir22: string = path.join(dir2, 'dir22'); + const file0b: string = 'file0.b'; + const file1a: string = path.join(dir11, 'file1.a'); + const file2b: string = path.join(dir11, 'file2.b'); + const file3a: string = path.join(dir12, 'file3.a'); + const file4b: string = path.join(dir12, 'file4.b'); + const file5a: string = path.join(dir21, 'file5.a'); + const file6b: string = path.join(dir21, 'file6.b'); + const file7a: string = path.join(dir22, 'file7.a'); + const file8b: string = path.join(dir22, 'file8.b'); + const file9a: string = path.join(dir22, 'file9.a'); + + beforeEach(async () => { + await fs.promises.mkdir(path.join(dataDir, dir1)); + await fs.promises.mkdir(path.join(dataDir, dir11)); + await fs.promises.mkdir(path.join(dataDir, dir12)); + await fs.promises.mkdir(path.join(dataDir, dir2)); + await fs.promises.mkdir(path.join(dataDir, dir21)); + await fs.promises.mkdir(path.join(dataDir, dir22)); + await fs.promises.writeFile(path.join(dataDir, file0b), 'content-file0'); + await fs.promises.writeFile(path.join(dataDir, file1a), 'content-file1'); + await fs.promises.writeFile(path.join(dataDir, file2b), 'content-file2'); + await fs.promises.writeFile(path.join(dataDir, file3a), 'content-file3'); + await fs.promises.writeFile(path.join(dataDir, file4b), 'content-file4'); + await fs.promises.writeFile(path.join(dataDir, file5a), 'content-file5'); + await fs.promises.writeFile(path.join(dataDir, file6b), 'content-file6'); + await fs.promises.writeFile(path.join(dataDir, file7a), 'content-file7'); + await fs.promises.writeFile(path.join(dataDir, file8b), 'content-file8'); + await fs.promises.writeFile(path.join(dataDir, file9a), 'content-file9'); + cwd = process.cwd(); + process.chdir(dataDir); + }); + afterEach(async () => { + process.chdir(cwd); + }); + + // TODO: + // - Add test for testing serializer on vaults fs. + + test('sends single tree', async () => { + const fileTreeGen = fileTree.globWalk({ + fs, + yieldStats: false, + yieldRoot: false, + yieldFiles: true, + yieldParents: true, + yieldDirectories: true, + }); + const data: Array = []; + const parserTransform = fileTree.parserTransformStreamFactory(); + const serializedStream = fileTree.serializerStreamFactory( + fs, + fileTreeGen, + false, + ); + const outputStream = serializedStream.pipeThrough(parserTransform); + for await (const output of outputStream) { + data.push(output); + } + const paths = data.map((v) => { + fileTree.parseTreeNode(v); + return v.path; + }); + expect(paths).toIncludeAllMembers([ + dir1, + dir2, + dir11, + dir12, + dir21, + dir22, + file0b, + file1a, + file2b, + file3a, + file4b, + file5a, + file6b, + file7a, + file8b, + file9a, + ]); + }); + test('sends tree with randomly sized chunks', async () => { + const fileTreeGen = fileTree.globWalk({ + fs, + yieldStats: false, + yieldRoot: false, + yieldFiles: true, + yieldParents: true, + yieldDirectories: true, + }); + const data: Array = []; + const snipperTransform = vaultsTestUtils.binaryStreamToSnippedStream([ + 5, 7, 11, 13, + ]); + const parserTransform = fileTree.parserTransformStreamFactory(); + const serializedStream = fileTree.serializerStreamFactory( + fs, + fileTreeGen, + false, + ); + const outputStream = serializedStream + .pipeThrough(snipperTransform) + .pipeThrough(parserTransform); + for await (const output of outputStream) { + data.push(output); + } + const paths = data.map((v) => { + fileTree.parseTreeNode(v); + return v.path; + }); + expect(paths).toIncludeAllMembers([ + dir1, + dir2, + dir11, + dir12, + dir21, + dir22, + file0b, + file1a, + file2b, + file3a, + file4b, + file5a, + file6b, + file7a, + file8b, + file9a, + ]); + }); + test('sends multiple trees', async () => { + function doubleWalkFactory() { + const stream1 = fileTree.serializerStreamFactory( + fs, + fileTree.globWalk({ + fs, + yieldStats: false, + yieldRoot: false, + yieldFiles: true, + yieldParents: true, + yieldDirectories: true, + }), + false, + ); + const stream2 = fileTree.serializerStreamFactory( + fs, + fileTree.globWalk({ + fs, + yieldStats: false, + yieldRoot: false, + yieldFiles: true, + yieldParents: true, + yieldDirectories: true, + }), + false, + ); + return new ReadableStream({ + start: async (controller) => { + for await (const data of stream1) controller.enqueue(data); + for await (const data of stream2) controller.enqueue(data); + controller.close(); + }, + }); + } + const data: Array = []; + const parserTransform = fileTree.parserTransformStreamFactory(); + // Const serializedStream = fileTree.serializerStreamFactory(fileTreeGen); + const serializedStream = doubleWalkFactory(); + const outputStream = serializedStream.pipeThrough(parserTransform); + for await (const output of outputStream) { + data.push(output); + } + const paths = data.map((v) => { + fileTree.parseTreeNode(v); + return v.path; + }); + expect(paths).toIncludeAllMembers([ + dir1, + dir2, + dir11, + dir12, + dir21, + dir22, + file0b, + file1a, + file2b, + file3a, + file4b, + file5a, + file6b, + file7a, + file8b, + file9a, + ]); + const dupes = paths.reduce((previous, value) => { + previous.set(value, (previous.get(value) ?? 0) + 1); + return previous; + }, new Map()); + for (const dupe of dupes.values()) { + expect(dupe).toBe(2); + } + }); + test('file contents are sent and are correct', async () => { + const fileTreeGen = fileTree.globWalk({ + fs, + yieldStats: false, + yieldRoot: false, + yieldFiles: true, + yieldParents: false, + yieldDirectories: false, + }); + const data: Array = []; + const parserTransform = fileTree.parserTransformStreamFactory(); + const serializedStream = fileTree.serializerStreamFactory( + fs, + fileTreeGen, + true, + ); + const outputStream = serializedStream.pipeThrough(parserTransform); + for await (const output of outputStream) { + data.push(output); + } + const contents = data + .filter((v) => v instanceof Uint8Array) + .map((v) => Buffer.from(v as Uint8Array).toString()); + const contentHeaders = data.filter( + (v) => !(v instanceof Uint8Array) && v.type === 'CONTENT', + ) as Array; + expect(contents).toIncludeAllMembers([ + 'content-file0', + 'content-file1', + 'content-file2', + 'content-file3', + 'content-file4', + 'content-file5', + 'content-file6', + 'content-file7', + 'content-file8', + 'content-file9', + ]); + for (const contentHeader of contentHeaders) { + expect(contentHeader.dataSize).toBe(13n); + } + }); + test.prop( + [ + fc + .uint8Array({ size: 'large' }) + .noShrink() + .map((v) => Buffer.from(v)), + ], + { numRuns: 20 }, + )('handles invalid data', async (data) => { + let limit = 100; + const dataStream = new ReadableStream({ + pull: (controller) => + limit-- > 0 ? controller.enqueue(data) : controller.close(), + }); + const parserTransform = fileTree.parserTransformStreamFactory(); + const outputStream = dataStream.pipeThrough(parserTransform); + try { + for await (const _ of outputStream) { + // Only consume + } + } catch { + return; + } + throw Error('Should have thrown an error when parsing'); + }); + // TODO: tests for + // - empty files + // - files larger than content chunks + }); +}); diff --git a/tests/vaults/utils.test.ts b/tests/vaults/utils.test.ts index e9d989848..e0725455d 100644 --- a/tests/vaults/utils.test.ts +++ b/tests/vaults/utils.test.ts @@ -1,4 +1,4 @@ -import type { FileTree, VaultId } from '@/vaults/types'; +import type { VaultId } from '@/vaults/types'; import fs from 'fs'; import os from 'os'; import path from 'path'; @@ -83,405 +83,4 @@ describe('Vaults utils', () => { ).toBeUndefined(); expect(vaultsUtils.decodeVaultId('zF4VfxTOOSHORTxTV9')).toBeUndefined(); }); - - describe('globWalk', () => { - let cwd: string; - - const relativeBase = '.'; - const dir1: string = 'dir1'; - const dir2: string = 'dir2'; - const dir11: string = path.join(dir1, 'dir11'); - const dir12: string = path.join(dir1, 'dir12'); - const dir21: string = path.join(dir2, 'dir21'); - const dir22: string = path.join(dir2, 'dir22'); - const file0b: string = 'file0.b'; - const file1a: string = path.join(dir11, 'file1.a'); - const file2b: string = path.join(dir11, 'file2.b'); - const file3a: string = path.join(dir12, 'file3.a'); - const file4b: string = path.join(dir12, 'file4.b'); - const file5a: string = path.join(dir21, 'file5.a'); - const file6b: string = path.join(dir21, 'file6.b'); - const file7a: string = path.join(dir22, 'file7.a'); - const file8b: string = path.join(dir22, 'file8.b'); - const file9a: string = path.join(dir22, 'file9.a'); - - beforeEach(async () => { - await fs.promises.mkdir(path.join(dataDir, dir1)); - await fs.promises.mkdir(path.join(dataDir, dir11)); - await fs.promises.mkdir(path.join(dataDir, dir12)); - await fs.promises.mkdir(path.join(dataDir, dir2)); - await fs.promises.mkdir(path.join(dataDir, dir21)); - await fs.promises.mkdir(path.join(dataDir, dir22)); - await fs.promises.writeFile(path.join(dataDir, file0b), 'content-file0'); - await fs.promises.writeFile(path.join(dataDir, file1a), 'content-file1'); - await fs.promises.writeFile(path.join(dataDir, file2b), 'content-file2'); - await fs.promises.writeFile(path.join(dataDir, file3a), 'content-file3'); - await fs.promises.writeFile(path.join(dataDir, file4b), 'content-file4'); - await fs.promises.writeFile(path.join(dataDir, file5a), 'content-file5'); - await fs.promises.writeFile(path.join(dataDir, file6b), 'content-file6'); - await fs.promises.writeFile(path.join(dataDir, file7a), 'content-file7'); - await fs.promises.writeFile(path.join(dataDir, file8b), 'content-file8'); - await fs.promises.writeFile(path.join(dataDir, file9a), 'content-file9'); - cwd = process.cwd(); - process.chdir(dataDir); - }); - afterEach(async () => { - process.chdir(cwd); - }); - - test('Works with relative base path `.`', async () => { - const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ - fs: fs, - basePath: relativeBase, - yieldDirectories: true, - yieldFiles: true, - yieldParents: true, - yieldRoot: true, - })) { - tree.push(treeNode); - } - const files = tree.map((v) => v.path ?? ''); - expect(files).toContainAllValues([ - relativeBase, - dir1, - dir2, - dir11, - dir12, - dir21, - dir22, - file0b, - file1a, - file2b, - file3a, - file4b, - file5a, - file6b, - file7a, - file8b, - file9a, - ]); - }); - test('Works with relative base path `./`', async () => { - const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ - fs: fs, - basePath: './', - yieldDirectories: true, - yieldFiles: true, - yieldParents: true, - yieldRoot: true, - })) { - tree.push(treeNode); - } - const files = tree.map((v) => v.path ?? ''); - expect(files).toContainAllValues([ - './', - dir1, - dir2, - dir11, - dir12, - dir21, - dir22, - file0b, - file1a, - file2b, - file3a, - file4b, - file5a, - file6b, - file7a, - file8b, - file9a, - ]); - }); - test('Works with relative base path `./dir1`', async () => { - const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ - fs: fs, - basePath: './dir1', - yieldDirectories: true, - yieldFiles: true, - yieldParents: true, - yieldRoot: true, - })) { - tree.push(treeNode); - } - const files = tree.map((v) => v.path ?? ''); - expect(files).toContainAllValues([ - dir1, - dir11, - dir12, - file1a, - file2b, - file3a, - file4b, - ]); - }); - test('Works with absolute base path', async () => { - const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ - fs: fs, - basePath: dataDir, - yieldDirectories: true, - yieldFiles: true, - yieldParents: true, - yieldRoot: true, - })) { - tree.push(treeNode); - } - const files = tree.map((v) => v.path ?? ''); - expect(files).toContainAllValues( - [ - relativeBase, - dir1, - dir2, - dir11, - dir12, - dir21, - dir22, - file0b, - file1a, - file2b, - file3a, - file4b, - file5a, - file6b, - file7a, - file8b, - file9a, - ].map((v) => path.join(dataDir, v)), - ); - }); - test('Yields parent directories with `yieldParents`', async () => { - const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ - fs: fs, - basePath: relativeBase, - yieldParents: true, - yieldFiles: false, - })) { - tree.push(treeNode); - } - const files = tree.map((v) => v.path); - expect(files).toContainAllValues([ - relativeBase, - dir2, - dir1, - dir11, - dir12, - dir21, - dir22, - ]); - }); - test('Does not yield the base path with `yieldParents` and `yieldRoot`', async () => { - const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ - fs: fs, - basePath: relativeBase, - yieldRoot: false, - yieldParents: true, - yieldFiles: false, - })) { - tree.push(treeNode); - } - const files = tree.map((v) => v.path); - expect(files).not.toInclude(relativeBase); - expect(files).toContainAllValues([ - dir2, - dir1, - dir11, - dir12, - dir21, - dir22, - ]); - }); - test('Does not yield the base path with `yieldParents` and `yieldRoot` and absolute paths', async () => { - const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ - fs: fs, - basePath: dataDir, - yieldRoot: false, - yieldParents: true, - yieldFiles: false, - })) { - tree.push(treeNode); - } - const files = tree.map((v) => v.path); - expect(files).not.toInclude(dataDir); - expect(files).toContainAllValues( - [dir2, dir1, dir11, dir12, dir21, dir22].map((v) => - path.join(dataDir, v), - ), - ); - }); - test('Yields stats with `yieldStats`', async () => { - const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ - fs: fs, - basePath: relativeBase, - yieldStats: true, - yieldFiles: true, - yieldDirectories: true, - })) { - tree.push(treeNode); - } - tree.forEach((v) => - v.type === 'directory' || v.type === 'file' - ? expect(v.stat).toBeDefined() - : '', - ); - }); - // Globbing examples - test('glob with wildcard', async () => { - const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ - fs: fs, - basePath: relativeBase, - pattern: '*', - yieldFiles: true, - yieldDirectories: true, - yieldParents: false, - })) { - tree.push(treeNode); - } - const files = tree.map((v) => v.path); - expect(files).toContainAllValues([dir1, dir2, file0b]); - }); - test('glob with wildcard ignores directories with `yieldDirectories: false`', async () => { - const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ - fs: fs, - basePath: relativeBase, - pattern: '*', - yieldFiles: true, - yieldDirectories: false, - yieldParents: false, - })) { - tree.push(treeNode); - } - const files = tree.map((v) => v.path); - expect(files).not.toContainAllValues([relativeBase, dir1, dir2]); - expect(files).toContainAllValues([file0b]); - }); - test('glob with wildcard ignores files with `yieldFiles: false`', async () => { - const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ - fs: fs, - basePath: relativeBase, - pattern: '*', - yieldFiles: false, - yieldDirectories: true, - yieldParents: false, - })) { - tree.push(treeNode); - } - const files = tree.map((v) => v.path); - expect(files).not.toContainAllValues([file0b]); - expect(files).toContainAllValues([dir1, dir2]); - }); - test('glob with globstar', async () => { - const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ - fs: fs, - basePath: relativeBase, - pattern: '**', - yieldFiles: true, - yieldDirectories: true, - yieldParents: false, - })) { - tree.push(treeNode); - } - const files = tree.map((v) => v.path); - expect(files).not.toInclude(relativeBase); - expect(files).toContainAllValues([ - dir1, - dir2, - file0b, - dir11, - dir12, - dir21, - dir22, - file1a, - file2b, - file3a, - file4b, - file5a, - file6b, - file7a, - file8b, - file9a, - ]); - }); - test('glob with globstar and directory pattern', async () => { - const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ - fs: fs, - basePath: relativeBase, - pattern: '**/dir2/**', - yieldFiles: true, - yieldDirectories: true, - yieldParents: false, - })) { - tree.push(treeNode); - } - const files = tree.map((v) => v.path); - expect(files).not.toContainAllValues([ - relativeBase, - dir1, - dir2, - file0b, - dir11, - dir12, - file1a, - file2b, - file3a, - file4b, - ]); - expect(files).toContainAllValues([ - dir21, - dir22, - file5a, - file6b, - file7a, - file8b, - file9a, - ]); - }); - test('glob with globstar and wildcard', async () => { - const tree: FileTree = []; - for await (const treeNode of vaultsUtils.globWalk({ - fs: fs, - basePath: relativeBase, - pattern: '**/*.a', - yieldFiles: true, - yieldDirectories: true, - yieldParents: false, - })) { - tree.push(treeNode); - } - const files = tree.map((v) => v.path); - expect(files).not.toContainAllValues([ - relativeBase, - dir1, - dir2, - file0b, - dir11, - dir12, - dir21, - dir22, - file2b, - file4b, - file6b, - file8b, - ]); - expect(files).toContainAllValues([ - file1a, - file3a, - file5a, - file7a, - file9a, - ]); - }); - }); }); diff --git a/tests/vaults/utils.ts b/tests/vaults/utils.ts index 76125efa1..797374ae3 100644 --- a/tests/vaults/utils.ts +++ b/tests/vaults/utils.ts @@ -1,6 +1,12 @@ -import type { VaultActions } from '@/vaults/types'; +import type { + VaultActions, + HeaderContent, + HeaderGeneric, +} from '@/vaults/types'; +import { TransformStream } from 'stream/web'; import fc from 'fast-check'; import { vaultActions } from '@/vaults/types'; +import { HeaderType } from '@/vaults/fileTree'; const vaultActionArb = fc.constantFrom(...vaultActions); @@ -9,4 +15,51 @@ const vaultActionsArb = fc.dictionary(vaultActionArb, fc.constant(null), { maxKeys: vaultActions.length, }) as fc.Arbitrary; -export { vaultActionArb, vaultActionsArb }; +const headerTypeArb: fc.Arbitrary = fc.oneof( + fc.constant(HeaderType.CONTENT), + fc.constant(HeaderType.TREE), +); +const headerGenericArb = fc.record({ + type: headerTypeArb, +}); +const headerContentArb = fc.record({ + dataSize: fc.bigUint({ max: 2n ** 63n }), + iNode: fc.nat(), +}); + +/** + * This is used to convert regular chunks into randomly sized chunks based on + * a provided pattern. This is to replicate randomness introduced by packets + * splitting up the data. + */ +function binaryStreamToSnippedStream( + snippingPattern: Array, +): TransformStream { + let buffer = Buffer.alloc(0); + let iteration = 0; + return new TransformStream({ + transform: (chunk, controller) => { + buffer = Buffer.concat([buffer, chunk]); + while (true) { + const snipAmount = snippingPattern[iteration % snippingPattern.length]; + if (snipAmount > buffer.length) break; + iteration += 1; + const returnBuffer = buffer.subarray(0, snipAmount); + controller.enqueue(returnBuffer); + buffer = buffer.subarray(snipAmount); + } + }, + flush: (controller) => { + controller.enqueue(buffer); + }, + }); +} + +export { + vaultActionArb, + vaultActionsArb, + headerTypeArb, + headerGenericArb, + headerContentArb, + binaryStreamToSnippedStream, +};