From 8aeeebab8042fd3e6d70fbcb2f3698fca5511719 Mon Sep 17 00:00:00 2001 From: Wil Wade Date: Mon, 1 Jul 2024 08:42:32 -0400 Subject: [PATCH 1/5] Add linting and formatting --- .github/ISSUE_TEMPLATE/config.yml | 4 +- .github/workflows/main.yml | 1 + .github/workflows/publish-next.yml | 1 + .github/workflows/release.yml | 1 + .prettierignore | 1 + .prettierrc.json | 8 + eslint.config.mjs | 10 + package-lock.json | 1338 +++++++++++++++++++++++++++- package.json | 11 +- 9 files changed, 1345 insertions(+), 30 deletions(-) create mode 100644 .prettierignore create mode 100644 .prettierrc.json create mode 100644 eslint.config.mjs diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 1d37178c..6551e0f6 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,4 +1,2 @@ +# yaml-language-server: $schema=https://json.schemastore.org/github-issue-config.json blank_issues_enabled: true -contact_links: - - name: Issue Template - diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index e6085c2e..35e87f43 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json name: Tests CI on: push: diff --git a/.github/workflows/publish-next.yml b/.github/workflows/publish-next.yml index 43bf47b4..d6098ce5 100644 --- a/.github/workflows/publish-next.yml +++ b/.github/workflows/publish-next.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json name: Publish NPM @next on: push: diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index e586df01..5f84e706 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,3 +1,4 @@ +# yaml-language-server: $schema=https://json.schemastore.org/github-workflow.json name: Release Package on: release: diff --git a/.prettierignore b/.prettierignore new file mode 100644 index 00000000..de953861 --- /dev/null +++ b/.prettierignore @@ -0,0 +1 @@ +gen-nodejs/ diff --git a/.prettierrc.json b/.prettierrc.json new file mode 100644 index 00000000..cd87a91c --- /dev/null +++ b/.prettierrc.json @@ -0,0 +1,8 @@ +{ + "trailingComma": "es5", + "tabWidth": 2, + "semi": true, + "singleQuote": true, + "useTabs": false, + "printWidth": 120 +} diff --git a/eslint.config.mjs b/eslint.config.mjs new file mode 100644 index 00000000..5f5eedb2 --- /dev/null +++ b/eslint.config.mjs @@ -0,0 +1,10 @@ +// @ts-check + +import eslint from '@eslint/js'; +import tseslint from 'typescript-eslint'; + +export default tseslint.config( + eslint.configs.recommended, + ...tseslint.configs.strict, + ...tseslint.configs.stylistic, +); diff --git a/package-lock.json b/package-lock.json index 970e520e..b93c4916 100644 --- a/package-lock.json +++ b/package-lock.json @@ -25,8 +25,10 @@ "xxhash-wasm": "^1.0.2" }, "devDependencies": { + "@eslint/js": "^9.6.0", "@smithy/util-stream": "^3.0.0", "@types/chai": "^4.3.16", + "@types/eslint__js": "^8.42.3", "@types/json-schema": "^7.0.15", "@types/mocha": "^10.0.6", "@types/node": "^20.12.12", @@ -39,10 +41,12 @@ "chai": "4.4.1", "core-js": "^3.37.1", "esbuild": "^0.21.2", + "eslint": "^8.57.0", "events": "^3.3.0", "mocha": "^10.4.0", "msw": "^2.3.0", "object-stream": "^0.0.1", + "prettier": "3.3.2", "process": "^0.11.10", "regenerator-runtime": "^0.14.1", "sinon": "^17.0.2", @@ -50,7 +54,8 @@ "sinon-chai-in-order": "^0.1.0", "stream-browserify": "^3.0.0", "tsx": "^4.10.2", - "typescript": "^5.4.5" + "typescript": "^5.5.2", + "typescript-eslint": "^7.14.1" }, "engines": { "node": ">=18.18.2" @@ -1245,6 +1250,141 @@ "node": ">=12" } }, + "node_modules/@eslint-community/eslint-utils": { + "version": "4.4.0", + "resolved": "https://registry.npmjs.org/@eslint-community/eslint-utils/-/eslint-utils-4.4.0.tgz", + "integrity": "sha512-1/sA4dwrzBAyeUoQ6oxahHKmrZvsnLCg4RfxW3ZFGGmQkSNQPFNLV9CUEFQP1x9EYXHTo5p6xdhZM1Ne9p/AfA==", + "dev": true, + "dependencies": { + "eslint-visitor-keys": "^3.3.0" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "peerDependencies": { + "eslint": "^6.0.0 || ^7.0.0 || >=8.0.0" + } + }, + "node_modules/@eslint-community/regexpp": { + "version": "4.11.0", + "resolved": "https://registry.npmjs.org/@eslint-community/regexpp/-/regexpp-4.11.0.tgz", + "integrity": "sha512-G/M/tIiMrTAxEWRfLfQJMmGNX28IxBg4PBz8XqQhqUHLFI6TL2htpIB1iQCj144V5ee/JaKyT9/WZ0MGZWfA7A==", + "dev": true, + "engines": { + "node": "^12.0.0 || ^14.0.0 || >=16.0.0" + } + }, + "node_modules/@eslint/eslintrc": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/@eslint/eslintrc/-/eslintrc-2.1.4.tgz", + "integrity": "sha512-269Z39MS6wVJtsoUl10L60WdkhJVdPG24Q4eZTH3nnF6lpvSShEK3wQjDX9JRWAUPvPh7COouPpU9IrqaZFvtQ==", + "dev": true, + "dependencies": { + "ajv": "^6.12.4", + "debug": "^4.3.2", + "espree": "^9.6.0", + "globals": "^13.19.0", + "ignore": "^5.2.0", + "import-fresh": "^3.2.1", + "js-yaml": "^4.1.0", + "minimatch": "^3.1.2", + "strip-json-comments": "^3.1.1" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/@eslint/eslintrc/node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/@eslint/eslintrc/node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/@eslint/js": { + "version": "9.6.0", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-9.6.0.tgz", + "integrity": "sha512-D9B0/3vNg44ZeWbYMpBoXqNP4j6eQD5vNwIlGAuFRRzK/WtT/jvDQW3Bi9kkf3PMDMlM7Yi+73VLUsn5bJcl8A==", + "dev": true, + "engines": { + "node": "^18.18.0 || ^20.9.0 || >=21.1.0" + } + }, + "node_modules/@humanwhocodes/config-array": { + "version": "0.11.14", + "resolved": "https://registry.npmjs.org/@humanwhocodes/config-array/-/config-array-0.11.14.tgz", + "integrity": "sha512-3T8LkOmg45BV5FICb15QQMsyUSWrQ8AygVfC7ZG32zOalnqrilm018ZVCw0eapXux8FtA33q8PSRSstjee3jSg==", + "deprecated": "Use @eslint/config-array instead", + "dev": true, + "dependencies": { + "@humanwhocodes/object-schema": "^2.0.2", + "debug": "^4.3.1", + "minimatch": "^3.0.5" + }, + "engines": { + "node": ">=10.10.0" + } + }, + "node_modules/@humanwhocodes/config-array/node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/@humanwhocodes/config-array/node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/@humanwhocodes/module-importer": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/@humanwhocodes/module-importer/-/module-importer-1.0.1.tgz", + "integrity": "sha512-bxveV4V8v5Yb4ncFTT3rPSgZBOpCkjfK0y4oVVVJwIuDVBRMDXrPyXRL988i5ap9m9bnyEEjWfm5WkBmtffLfA==", + "dev": true, + "engines": { + "node": ">=12.22" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/nzakas" + } + }, + "node_modules/@humanwhocodes/object-schema": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/@humanwhocodes/object-schema/-/object-schema-2.0.3.tgz", + "integrity": "sha512-93zYdMES/c1D69yZiKDBj0V24vqNzB/koF26KPaagAfd3P/4gUlh3Dys5ogAK+Exi9QyzlD8x/08Zt7wIKcDcA==", + "deprecated": "Use @eslint/object-schema instead", + "dev": true + }, "node_modules/@inquirer/confirm": { "version": "3.1.6", "resolved": "https://registry.npmjs.org/@inquirer/confirm/-/confirm-3.1.6.tgz", @@ -1340,6 +1480,41 @@ "node": ">=18" } }, + "node_modules/@nodelib/fs.scandir": { + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.scandir/-/fs.scandir-2.1.5.tgz", + "integrity": "sha512-vq24Bq3ym5HEQm2NKCr3yXDwjc7vTsEThRDnkp2DK9p1uqLR+DHurm/NOTo0KG7HYHU7eppKZj3MyqYuMBf62g==", + "dev": true, + "dependencies": { + "@nodelib/fs.stat": "2.0.5", + "run-parallel": "^1.1.9" + }, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.stat": { + "version": "2.0.5", + "resolved": "https://registry.npmjs.org/@nodelib/fs.stat/-/fs.stat-2.0.5.tgz", + "integrity": "sha512-RkhPPp2zrqDAQA/2jNhnztcPAlv64XdhIp7a7454A5ovI7Bukxgt7MX7udwAu3zg1DcpPU0rz3VV1SeaqvY4+A==", + "dev": true, + "engines": { + "node": ">= 8" + } + }, + "node_modules/@nodelib/fs.walk": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/@nodelib/fs.walk/-/fs.walk-1.2.8.tgz", + "integrity": "sha512-oGB+UxlgWcgQkgwo8GcEGwemoTFt3FIO9ababBmaGwXIoBKZ+GTy0pP185beGg7Llih/NSHSV2XAs1lnznocSg==", + "dev": true, + "dependencies": { + "@nodelib/fs.scandir": "2.1.5", + "fastq": "^1.6.0" + }, + "engines": { + "node": ">= 8" + } + }, "node_modules/@open-draft/deferred-promise": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/@open-draft/deferred-promise/-/deferred-promise-2.2.0.tgz", @@ -2065,6 +2240,31 @@ "integrity": "sha512-4Kh9a6B2bQciAhf7FSuMRRkUWecJgJu9nPnx3yzpsfXX/c50REIqpHY4C82bXP90qrLtXtkDxTZosYO3UpOwlA==", "dev": true }, + "node_modules/@types/eslint": { + "version": "8.56.10", + "resolved": "https://registry.npmjs.org/@types/eslint/-/eslint-8.56.10.tgz", + "integrity": "sha512-Shavhk87gCtY2fhXDctcfS3e6FdxWkCx1iUZ9eEUbh7rTqlZT0/IzOkCOVt0fCjcFuZ9FPYfuezTBImfHCDBGQ==", + "dev": true, + "dependencies": { + "@types/estree": "*", + "@types/json-schema": "*" + } + }, + "node_modules/@types/eslint__js": { + "version": "8.42.3", + "resolved": "https://registry.npmjs.org/@types/eslint__js/-/eslint__js-8.42.3.tgz", + "integrity": "sha512-alfG737uhmPdnvkrLdZLcEKJ/B8s9Y4hrZ+YAdzUeoArBlSUERA2E87ROfOaS4jd/C45fzOoZzidLc1IPwLqOw==", + "dev": true, + "dependencies": { + "@types/eslint": "*" + } + }, + "node_modules/@types/estree": { + "version": "1.0.5", + "resolved": "https://registry.npmjs.org/@types/estree/-/estree-1.0.5.tgz", + "integrity": "sha512-/kYRxGDLWzHOB7q+wtSUQlFrtcdUccpfy+X+9iMBpHK8QLLhx2wIPYuS5DYtR9Wa/YlZAbIovy7qVdB1Aq6Lyw==", + "dev": true + }, "node_modules/@types/json-schema": { "version": "7.0.15", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", @@ -2158,6 +2358,249 @@ "integrity": "sha512-ltIpx+kM7g/MLRZfkbL7EsCEjfzCcScLpkg37eXEtx5kmrAKBkTJwd1GIAjDSL8wTpM6Hzn5YO4pSb91BEwu1g==", "dev": true }, + "node_modules/@typescript-eslint/eslint-plugin": { + "version": "7.14.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/eslint-plugin/-/eslint-plugin-7.14.1.tgz", + "integrity": "sha512-aAJd6bIf2vvQRjUG3ZkNXkmBpN+J7Wd0mfQiiVCJMu9Z5GcZZdcc0j8XwN/BM97Fl7e3SkTXODSk4VehUv7CGw==", + "dev": true, + "dependencies": { + "@eslint-community/regexpp": "^4.10.0", + "@typescript-eslint/scope-manager": "7.14.1", + "@typescript-eslint/type-utils": "7.14.1", + "@typescript-eslint/utils": "7.14.1", + "@typescript-eslint/visitor-keys": "7.14.1", + "graphemer": "^1.4.0", + "ignore": "^5.3.1", + "natural-compare": "^1.4.0", + "ts-api-utils": "^1.3.0" + }, + "engines": { + "node": "^18.18.0 || >=20.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "@typescript-eslint/parser": "^7.0.0", + "eslint": "^8.56.0" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/@typescript-eslint/parser": { + "version": "7.14.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/parser/-/parser-7.14.1.tgz", + "integrity": "sha512-8lKUOebNLcR0D7RvlcloOacTOWzOqemWEWkKSVpMZVF/XVcwjPR+3MD08QzbW9TCGJ+DwIc6zUSGZ9vd8cO1IA==", + "dev": true, + "dependencies": { + "@typescript-eslint/scope-manager": "7.14.1", + "@typescript-eslint/types": "7.14.1", + "@typescript-eslint/typescript-estree": "7.14.1", + "@typescript-eslint/visitor-keys": "7.14.1", + "debug": "^4.3.4" + }, + "engines": { + "node": "^18.18.0 || >=20.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.56.0" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/@typescript-eslint/scope-manager": { + "version": "7.14.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/scope-manager/-/scope-manager-7.14.1.tgz", + "integrity": "sha512-gPrFSsoYcsffYXTOZ+hT7fyJr95rdVe4kGVX1ps/dJ+DfmlnjFN/GcMxXcVkeHDKqsq6uAcVaQaIi3cFffmAbA==", + "dev": true, + "dependencies": { + "@typescript-eslint/types": "7.14.1", + "@typescript-eslint/visitor-keys": "7.14.1" + }, + "engines": { + "node": "^18.18.0 || >=20.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@typescript-eslint/type-utils": { + "version": "7.14.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/type-utils/-/type-utils-7.14.1.tgz", + "integrity": "sha512-/MzmgNd3nnbDbOi3LfasXWWe292+iuo+umJ0bCCMCPc1jLO/z2BQmWUUUXvXLbrQey/JgzdF/OV+I5bzEGwJkQ==", + "dev": true, + "dependencies": { + "@typescript-eslint/typescript-estree": "7.14.1", + "@typescript-eslint/utils": "7.14.1", + "debug": "^4.3.4", + "ts-api-utils": "^1.3.0" + }, + "engines": { + "node": "^18.18.0 || >=20.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.56.0" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/@typescript-eslint/types": { + "version": "7.14.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/types/-/types-7.14.1.tgz", + "integrity": "sha512-mL7zNEOQybo5R3AavY+Am7KLv8BorIv7HCYS5rKoNZKQD9tsfGUpO4KdAn3sSUvTiS4PQkr2+K0KJbxj8H9NDg==", + "dev": true, + "engines": { + "node": "^18.18.0 || >=20.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@typescript-eslint/typescript-estree": { + "version": "7.14.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/typescript-estree/-/typescript-estree-7.14.1.tgz", + "integrity": "sha512-k5d0VuxViE2ulIO6FbxxSZaxqDVUyMbXcidC8rHvii0I56XZPv8cq+EhMns+d/EVIL41sMXqRbK3D10Oza1bbA==", + "dev": true, + "dependencies": { + "@typescript-eslint/types": "7.14.1", + "@typescript-eslint/visitor-keys": "7.14.1", + "debug": "^4.3.4", + "globby": "^11.1.0", + "is-glob": "^4.0.3", + "minimatch": "^9.0.4", + "semver": "^7.6.0", + "ts-api-utils": "^1.3.0" + }, + "engines": { + "node": "^18.18.0 || >=20.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/@typescript-eslint/typescript-estree/node_modules/minimatch": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", + "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", + "dev": true, + "dependencies": { + "brace-expansion": "^2.0.1" + }, + "engines": { + "node": ">=16 || 14 >=14.17" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/@typescript-eslint/utils": { + "version": "7.14.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/utils/-/utils-7.14.1.tgz", + "integrity": "sha512-CMmVVELns3nak3cpJhZosDkm63n+DwBlDX8g0k4QUa9BMnF+lH2lr3d130M1Zt1xxmB3LLk3NV7KQCq86ZBBhQ==", + "dev": true, + "dependencies": { + "@eslint-community/eslint-utils": "^4.4.0", + "@typescript-eslint/scope-manager": "7.14.1", + "@typescript-eslint/types": "7.14.1", + "@typescript-eslint/typescript-estree": "7.14.1" + }, + "engines": { + "node": "^18.18.0 || >=20.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.56.0" + } + }, + "node_modules/@typescript-eslint/visitor-keys": { + "version": "7.14.1", + "resolved": "https://registry.npmjs.org/@typescript-eslint/visitor-keys/-/visitor-keys-7.14.1.tgz", + "integrity": "sha512-Crb+F75U1JAEtBeQGxSKwI60hZmmzaqA3z9sYsVm8X7W5cwLEm5bRe0/uXS6+MR/y8CVpKSR/ontIAIEPFcEkA==", + "dev": true, + "dependencies": { + "@typescript-eslint/types": "7.14.1", + "eslint-visitor-keys": "^3.4.3" + }, + "engines": { + "node": "^18.18.0 || >=20.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + } + }, + "node_modules/@ungap/structured-clone": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@ungap/structured-clone/-/structured-clone-1.2.0.tgz", + "integrity": "sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==", + "dev": true + }, + "node_modules/acorn": { + "version": "8.12.0", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.12.0.tgz", + "integrity": "sha512-RTvkC4w+KNXrM39/lWCUaG0IbRkWdCv7W/IOW9oU6SawyxulvkQy5HQPVTKxEjczcUvapcrw3cFx/60VN/NRNw==", + "dev": true, + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-jsx": { + "version": "5.3.2", + "resolved": "https://registry.npmjs.org/acorn-jsx/-/acorn-jsx-5.3.2.tgz", + "integrity": "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==", + "dev": true, + "peerDependencies": { + "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" + } + }, + "node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dev": true, + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, "node_modules/ansi-colors": { "version": "4.1.1", "resolved": "https://registry.npmjs.org/ansi-colors/-/ansi-colors-4.1.1.tgz", @@ -2244,6 +2687,15 @@ "dev": true, "peer": true }, + "node_modules/array-union": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/array-union/-/array-union-2.1.0.tgz", + "integrity": "sha512-HGyxoOTYUyCM6stUe6EJgnd4EoewAI7zMdfqO+kGjnlZmBDz/cR5pf8r/cR4Wq60sL/p0IkcjUEEPwS3GFrIyw==", + "dev": true, + "engines": { + "node": ">=8" + } + }, "node_modules/assert": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/assert/-/assert-2.1.0.tgz", @@ -2401,12 +2853,12 @@ } }, "node_modules/braces": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", - "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", "dev": true, "dependencies": { - "fill-range": "^7.0.1" + "fill-range": "^7.1.1" }, "engines": { "node": ">=8" @@ -2506,6 +2958,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/callsites": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz", + "integrity": "sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==", + "dev": true, + "engines": { + "node": ">=6" + } + }, "node_modules/camelcase": { "version": "6.3.0", "resolved": "https://registry.npmjs.org/camelcase/-/camelcase-6.3.0.tgz", @@ -2653,6 +3114,12 @@ "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", "dev": true }, + "node_modules/concat-map": { + "version": "0.0.1", + "resolved": "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz", + "integrity": "sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==", + "dev": true + }, "node_modules/cookie": { "version": "0.5.0", "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.5.0.tgz", @@ -2681,6 +3148,20 @@ "node-fetch": "^2.6.12" } }, + "node_modules/cross-spawn": { + "version": "7.0.3", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", + "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "dev": true, + "dependencies": { + "path-key": "^3.1.0", + "shebang-command": "^2.0.0", + "which": "^2.0.1" + }, + "engines": { + "node": ">= 8" + } + }, "node_modules/debug": { "version": "4.3.4", "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", @@ -2728,6 +3209,12 @@ "node": ">=6" } }, + "node_modules/deep-is": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", + "integrity": "sha512-oIPzksmTg4/MriiaYGO+okXDT7ztn/w3Eptv/+gSIdMdKsJo0u4CfYNFJPy+4SKMuCqGw2wxnA+URMg3t8a/bQ==", + "dev": true + }, "node_modules/define-data-property": { "version": "1.1.4", "resolved": "https://registry.npmjs.org/define-data-property/-/define-data-property-1.1.4.tgz", @@ -2771,19 +3258,43 @@ "node": ">=0.3.1" } }, - "node_modules/emoji-regex": { - "version": "8.0.0", - "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", - "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true - }, - "node_modules/es-define-property": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz", - "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==", + "node_modules/dir-glob": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/dir-glob/-/dir-glob-3.0.1.tgz", + "integrity": "sha512-WkrWp9GR4KXfKGYzOLmTuGVi1UWFfws377n9cc55/tb6DuqyF6pcQ5AbiHEshaDpY9v6oaSr2XCDidGmMwdzIA==", "dev": true, "dependencies": { - "get-intrinsic": "^1.2.4" + "path-type": "^4.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/doctrine": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/doctrine/-/doctrine-3.0.0.tgz", + "integrity": "sha512-yS+Q5i3hBf7GBkd4KG8a7eBNNWNGLTaEwwYWUijIYM7zrlYDM0BFXHjjPWlWZ1Rg7UaddZeIDmi9jF3HmqiQ2w==", + "dev": true, + "dependencies": { + "esutils": "^2.0.2" + }, + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", + "dev": true + }, + "node_modules/es-define-property": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.0.tgz", + "integrity": "sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==", + "dev": true, + "dependencies": { + "get-intrinsic": "^1.2.4" }, "engines": { "node": ">= 0.4" @@ -2857,6 +3368,191 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/eslint": { + "version": "8.57.0", + "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.57.0.tgz", + "integrity": "sha512-dZ6+mexnaTIbSBZWgou51U6OmzIhYM2VcNdtiTtI7qPNZm35Akpr0f6vtw3w1Kmn5PYo+tZVfh13WrhpS6oLqQ==", + "dev": true, + "dependencies": { + "@eslint-community/eslint-utils": "^4.2.0", + "@eslint-community/regexpp": "^4.6.1", + "@eslint/eslintrc": "^2.1.4", + "@eslint/js": "8.57.0", + "@humanwhocodes/config-array": "^0.11.14", + "@humanwhocodes/module-importer": "^1.0.1", + "@nodelib/fs.walk": "^1.2.8", + "@ungap/structured-clone": "^1.2.0", + "ajv": "^6.12.4", + "chalk": "^4.0.0", + "cross-spawn": "^7.0.2", + "debug": "^4.3.2", + "doctrine": "^3.0.0", + "escape-string-regexp": "^4.0.0", + "eslint-scope": "^7.2.2", + "eslint-visitor-keys": "^3.4.3", + "espree": "^9.6.1", + "esquery": "^1.4.2", + "esutils": "^2.0.2", + "fast-deep-equal": "^3.1.3", + "file-entry-cache": "^6.0.1", + "find-up": "^5.0.0", + "glob-parent": "^6.0.2", + "globals": "^13.19.0", + "graphemer": "^1.4.0", + "ignore": "^5.2.0", + "imurmurhash": "^0.1.4", + "is-glob": "^4.0.0", + "is-path-inside": "^3.0.3", + "js-yaml": "^4.1.0", + "json-stable-stringify-without-jsonify": "^1.0.1", + "levn": "^0.4.1", + "lodash.merge": "^4.6.2", + "minimatch": "^3.1.2", + "natural-compare": "^1.4.0", + "optionator": "^0.9.3", + "strip-ansi": "^6.0.1", + "text-table": "^0.2.0" + }, + "bin": { + "eslint": "bin/eslint.js" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/eslint-scope": { + "version": "7.2.2", + "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.2.2.tgz", + "integrity": "sha512-dOt21O7lTMhDM+X9mB4GX+DZrZtCUJPL/wlcTqxyrx5IvO0IYtILdtrQGQp+8n5S0gwSVmOf9NQrjMOgfQZlIg==", + "dev": true, + "dependencies": { + "esrecurse": "^4.3.0", + "estraverse": "^5.2.0" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/eslint-visitor-keys": { + "version": "3.4.3", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz", + "integrity": "sha512-wpc+LXeiyiisxPlEkUzU6svyS1frIO3Mgxj1fdy7Pm8Ygzguax2N3Fa/D/ag1WqbOprdI+uY6wMUl8/a2G+iag==", + "dev": true, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/eslint/node_modules/@eslint/js": { + "version": "8.57.0", + "resolved": "https://registry.npmjs.org/@eslint/js/-/js-8.57.0.tgz", + "integrity": "sha512-Ys+3g2TaW7gADOJzPt83SJtCDhMjndcDMFVQ/Tj9iA1BfJzFKD9mAUXT3OenpuPHbI6P/myECxRJrofUsDx/5g==", + "dev": true, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + } + }, + "node_modules/eslint/node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/eslint/node_modules/glob-parent": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", + "integrity": "sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==", + "dev": true, + "dependencies": { + "is-glob": "^4.0.3" + }, + "engines": { + "node": ">=10.13.0" + } + }, + "node_modules/eslint/node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/espree": { + "version": "9.6.1", + "resolved": "https://registry.npmjs.org/espree/-/espree-9.6.1.tgz", + "integrity": "sha512-oruZaFkjorTpF32kDSI5/75ViwGeZginGGy2NoOSg3Q9bnwlnmDm4HLnkl0RE3n+njDXR037aY1+x58Z/zFdwQ==", + "dev": true, + "dependencies": { + "acorn": "^8.9.0", + "acorn-jsx": "^5.3.2", + "eslint-visitor-keys": "^3.4.1" + }, + "engines": { + "node": "^12.22.0 || ^14.17.0 || >=16.0.0" + }, + "funding": { + "url": "https://opencollective.com/eslint" + } + }, + "node_modules/esquery": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/esquery/-/esquery-1.5.0.tgz", + "integrity": "sha512-YQLXUplAwJgCydQ78IMJywZCceoqk1oH01OERdSAJc/7U2AylwjhSCLDEtqwg811idIS/9fIU5GjG73IgjKMVg==", + "dev": true, + "dependencies": { + "estraverse": "^5.1.0" + }, + "engines": { + "node": ">=0.10" + } + }, + "node_modules/esrecurse": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/esrecurse/-/esrecurse-4.3.0.tgz", + "integrity": "sha512-KmfKL3b6G+RXvP8N1vr3Tq1kL/oCFgn2NYXEtqP8/L3pKapUA4G8cFVaoF3SU323CD4XypR/ffioHmkti6/Tag==", + "dev": true, + "dependencies": { + "estraverse": "^5.2.0" + }, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/estraverse": { + "version": "5.3.0", + "resolved": "https://registry.npmjs.org/estraverse/-/estraverse-5.3.0.tgz", + "integrity": "sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==", + "dev": true, + "engines": { + "node": ">=4.0" + } + }, + "node_modules/esutils": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/esutils/-/esutils-2.0.3.tgz", + "integrity": "sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/events": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/events/-/events-3.3.0.tgz", @@ -2866,6 +3562,40 @@ "node": ">=0.8.x" } }, + "node_modules/fast-deep-equal": { + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz", + "integrity": "sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q==", + "dev": true + }, + "node_modules/fast-glob": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/fast-glob/-/fast-glob-3.3.2.tgz", + "integrity": "sha512-oX2ruAFQwf/Orj8m737Y5adxDQO0LAB7/S5MnxCdTNDd4p6BsyIVsv9JQsATbTSq8KHRpLwIHbVlUNatxd+1Ow==", + "dev": true, + "dependencies": { + "@nodelib/fs.stat": "^2.0.2", + "@nodelib/fs.walk": "^1.2.3", + "glob-parent": "^5.1.2", + "merge2": "^1.3.0", + "micromatch": "^4.0.4" + }, + "engines": { + "node": ">=8.6.0" + } + }, + "node_modules/fast-json-stable-stringify": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz", + "integrity": "sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw==", + "dev": true + }, + "node_modules/fast-levenshtein": { + "version": "2.0.6", + "resolved": "https://registry.npmjs.org/fast-levenshtein/-/fast-levenshtein-2.0.6.tgz", + "integrity": "sha512-DCXu6Ifhqcks7TZKY3Hxp3y6qphY5SJZmrWMDrKcERSOXWQdMhU9Ig/PYrzyw/ul9jOIyh0N4M0tbC5hodg8dw==", + "dev": true + }, "node_modules/fast-xml-parser": { "version": "4.2.5", "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.2.5.tgz", @@ -2887,10 +3617,31 @@ "fxparser": "src/cli/cli.js" } }, + "node_modules/fastq": { + "version": "1.17.1", + "resolved": "https://registry.npmjs.org/fastq/-/fastq-1.17.1.tgz", + "integrity": "sha512-sRVD3lWVIXWg6By68ZN7vho9a1pQcN/WBFaAAsDDFzlJjvoGx0P8z7V1t72grFJfJhu3YPZBuu25f7Kaw2jN1w==", + "dev": true, + "dependencies": { + "reusify": "^1.0.4" + } + }, + "node_modules/file-entry-cache": { + "version": "6.0.1", + "resolved": "https://registry.npmjs.org/file-entry-cache/-/file-entry-cache-6.0.1.tgz", + "integrity": "sha512-7Gps/XWymbLk2QLYK4NzpMOrYjMhdIxXuIvy2QBsLE6ljuodKvdkWs/cpyJJ3CVIVpH0Oi1Hvg1ovbMzLdFBBg==", + "dev": true, + "dependencies": { + "flat-cache": "^3.0.4" + }, + "engines": { + "node": "^10.12.0 || >=12.0.0" + } + }, "node_modules/fill-range": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", - "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", "dev": true, "dependencies": { "to-regex-range": "^5.0.1" @@ -2924,6 +3675,26 @@ "flat": "cli.js" } }, + "node_modules/flat-cache": { + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/flat-cache/-/flat-cache-3.2.0.tgz", + "integrity": "sha512-CYcENa+FtcUKLmhhqyctpclsq7QF38pKjZHsGNiSQF5r4FtoKDWabFDl3hzaEQMvT1LHEysw5twgLvpYYb4vbw==", + "dev": true, + "dependencies": { + "flatted": "^3.2.9", + "keyv": "^4.5.3", + "rimraf": "^3.0.2" + }, + "engines": { + "node": "^10.12.0 || >=12.0.0" + } + }, + "node_modules/flatted": { + "version": "3.3.1", + "resolved": "https://registry.npmjs.org/flatted/-/flatted-3.3.1.tgz", + "integrity": "sha512-X8cqMLLie7KsNUDSdzeN8FYK9rEt4Dt67OsG/DNGnYTSDBG4uFAJFBnUeiV+zCVAvwFy56IjM9sH51jVaEhNxw==", + "dev": true + }, "node_modules/for-each": { "version": "0.3.3", "resolved": "https://registry.npmjs.org/for-each/-/for-each-0.3.3.tgz", @@ -3042,6 +3813,53 @@ "node": ">= 6" } }, + "node_modules/globals": { + "version": "13.24.0", + "resolved": "https://registry.npmjs.org/globals/-/globals-13.24.0.tgz", + "integrity": "sha512-AhO5QUcj8llrbG09iWhPU2B204J1xnPeL8kQmVorSsy+Sjj1sk8gIyh6cUocGmH4L0UuhAJy+hJMRA4mgA4mFQ==", + "dev": true, + "dependencies": { + "type-fest": "^0.20.2" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/globals/node_modules/type-fest": { + "version": "0.20.2", + "resolved": "https://registry.npmjs.org/type-fest/-/type-fest-0.20.2.tgz", + "integrity": "sha512-Ne+eE4r0/iWnpAxD852z3A+N0Bt5RN//NjJwRd2VFHEmrywxf5vsZlh4R6lixl6B+wz/8d+maTSAkN1FIkI3LQ==", + "dev": true, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/globby": { + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/globby/-/globby-11.1.0.tgz", + "integrity": "sha512-jhIXaOzy1sb8IyocaruWSn1TjmnBVs8Ayhcy83rmxNJ8q2uWKCAj3CnJY+KpGSXCueAPc0i05kVvVKtP1t9S3g==", + "dev": true, + "dependencies": { + "array-union": "^2.1.0", + "dir-glob": "^3.0.1", + "fast-glob": "^3.2.9", + "ignore": "^5.2.0", + "merge2": "^1.4.1", + "slash": "^3.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/gopd": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/gopd/-/gopd-1.0.1.tgz", @@ -3054,6 +3872,12 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/graphemer": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/graphemer/-/graphemer-1.4.0.tgz", + "integrity": "sha512-EtKwoO6kxCL9WO5xipiHTZlSzBm7WLT627TqC/uVRd0HKmq8NXyebnNYxDoBi7wt8eTWrUrKXCOVaFq9x1kgag==", + "dev": true + }, "node_modules/graphql": { "version": "16.8.1", "resolved": "https://registry.npmjs.org/graphql/-/graphql-16.8.1.tgz", @@ -3170,6 +3994,40 @@ } ] }, + "node_modules/ignore": { + "version": "5.3.1", + "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.1.tgz", + "integrity": "sha512-5Fytz/IraMjqpwfd34ke28PTVMjZjJG2MPn5t7OE4eUCUNf8BAa7b5WUS9/Qvr6mwOQS7Mk6vdsMno5he+T8Xw==", + "dev": true, + "engines": { + "node": ">= 4" + } + }, + "node_modules/import-fresh": { + "version": "3.3.0", + "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.0.tgz", + "integrity": "sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==", + "dev": true, + "dependencies": { + "parent-module": "^1.0.0", + "resolve-from": "^4.0.0" + }, + "engines": { + "node": ">=6" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/imurmurhash": { + "version": "0.1.4", + "resolved": "https://registry.npmjs.org/imurmurhash/-/imurmurhash-0.1.4.tgz", + "integrity": "sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==", + "dev": true, + "engines": { + "node": ">=0.8.19" + } + }, "node_modules/inflight": { "version": "1.0.6", "resolved": "https://registry.npmjs.org/inflight/-/inflight-1.0.6.tgz", @@ -3307,6 +4165,15 @@ "node": ">=0.12.0" } }, + "node_modules/is-path-inside": { + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/is-path-inside/-/is-path-inside-3.0.3.tgz", + "integrity": "sha512-Fd4gABb+ycGAmKou8eMftCupSir5lRxqf4aD/vd0cD2qc4HL07OjCeuHMr8Ro4CoMaeCKDB0/ECBOVWjTwUvPQ==", + "dev": true, + "engines": { + "node": ">=8" + } + }, "node_modules/is-plain-obj": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/is-plain-obj/-/is-plain-obj-2.1.0.tgz", @@ -3354,6 +4221,12 @@ "dev": true, "peer": true }, + "node_modules/isexe": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", + "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", + "dev": true + }, "node_modules/isomorphic-ws": { "version": "4.0.1", "resolved": "https://registry.npmjs.org/isomorphic-ws/-/isomorphic-ws-4.0.1.tgz", @@ -3374,6 +4247,24 @@ "js-yaml": "bin/js-yaml.js" } }, + "node_modules/json-buffer": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/json-buffer/-/json-buffer-3.0.1.tgz", + "integrity": "sha512-4bV5BfR2mqfQTJm+V5tPPdf+ZpuhiIvTuAB5g8kcrXOZpTT/QwwVRWBywX1ozr6lEuPdbHxwaJlm9G6mI2sfSQ==", + "dev": true + }, + "node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==", + "dev": true + }, + "node_modules/json-stable-stringify-without-jsonify": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/json-stable-stringify-without-jsonify/-/json-stable-stringify-without-jsonify-1.0.1.tgz", + "integrity": "sha512-Bdboy+l7tA3OGW6FjyFHWkP5LuByj1Tk33Ljyq0axyzdk9//JSi2u3fP1QSmd1KNwq6VOKYGlAu87CisVir6Pw==", + "dev": true + }, "node_modules/just-extend": { "version": "4.2.1", "resolved": "https://registry.npmjs.org/just-extend/-/just-extend-4.2.1.tgz", @@ -3381,6 +4272,28 @@ "dev": true, "peer": true }, + "node_modules/keyv": { + "version": "4.5.4", + "resolved": "https://registry.npmjs.org/keyv/-/keyv-4.5.4.tgz", + "integrity": "sha512-oxVHkHR/EJf2CNXnWxRLW6mg7JyCCUcG0DtEGmL2ctUo1PNTin1PUil+r/+4r5MpVgC/fn1kjsx7mjSujKqIpw==", + "dev": true, + "dependencies": { + "json-buffer": "3.0.1" + } + }, + "node_modules/levn": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/levn/-/levn-0.4.1.tgz", + "integrity": "sha512-+bT2uH4E5LGE7h/n3evcS/sQlJXCpIp6ym8OWJ5eV6+67Dsql/LaaT7qJBAt2rzfoa/5QBGBhxDix1dMt2kQKQ==", + "dev": true, + "dependencies": { + "prelude-ls": "^1.2.1", + "type-check": "~0.4.0" + }, + "engines": { + "node": ">= 0.8.0" + } + }, "node_modules/locate-path": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-6.0.0.tgz", @@ -3408,6 +4321,12 @@ "integrity": "sha512-z+Uw/vLuy6gQe8cfaFWD7p0wVv8fJl3mbzXh33RS+0oW2wvUqiRXiQ69gLWSLpgB5/6sU+r6BlQR0MBILadqTQ==", "dev": true }, + "node_modules/lodash.merge": { + "version": "4.6.2", + "resolved": "https://registry.npmjs.org/lodash.merge/-/lodash.merge-4.6.2.tgz", + "integrity": "sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==", + "dev": true + }, "node_modules/log-symbols": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/log-symbols/-/log-symbols-4.1.0.tgz", @@ -3445,6 +4364,28 @@ "get-func-name": "^2.0.1" } }, + "node_modules/merge2": { + "version": "1.4.1", + "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", + "integrity": "sha512-8q7VEgMJW4J8tcfVPy8g09NcQwZdbwFEqhe/WZkoIzjn/3TGDwtOCYtXGxA3O8tPzpczCCDgv+P2P5y00ZJOOg==", + "dev": true, + "engines": { + "node": ">= 8" + } + }, + "node_modules/micromatch": { + "version": "4.0.7", + "resolved": "https://registry.npmjs.org/micromatch/-/micromatch-4.0.7.tgz", + "integrity": "sha512-LPP/3KorzCwBxfeUuZmaR6bG2kdeHSbe0P2tY3FLRU4vYrjYz5hI4QZwV0njUx3jeuKe67YukQ1LSPZBKDqO/Q==", + "dev": true, + "dependencies": { + "braces": "^3.0.3", + "picomatch": "^2.3.1" + }, + "engines": { + "node": ">=8.6" + } + }, "node_modules/minimatch": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-5.0.1.tgz", @@ -3591,6 +4532,12 @@ "node": "^14.17.0 || ^16.13.0 || >=18.0.0" } }, + "node_modules/natural-compare": { + "version": "1.4.0", + "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", + "integrity": "sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw==", + "dev": true + }, "node_modules/nise": { "version": "5.1.9", "resolved": "https://registry.npmjs.org/nise/-/nise-5.1.9.tgz", @@ -3713,6 +4660,23 @@ "wrappy": "1" } }, + "node_modules/optionator": { + "version": "0.9.4", + "resolved": "https://registry.npmjs.org/optionator/-/optionator-0.9.4.tgz", + "integrity": "sha512-6IpQ7mKUxRcZNLIObR0hz7lxsapSSIYNZJwXPGeF0mTVqGKFIXj1DQcMoT22S3ROcLyY/rz0PWaWZ9ayWmad9g==", + "dev": true, + "dependencies": { + "deep-is": "^0.1.3", + "fast-levenshtein": "^2.0.6", + "levn": "^0.4.1", + "prelude-ls": "^1.2.1", + "type-check": "^0.4.0", + "word-wrap": "^1.2.5" + }, + "engines": { + "node": ">= 0.8.0" + } + }, "node_modules/outvariant": { "version": "1.4.2", "resolved": "https://registry.npmjs.org/outvariant/-/outvariant-1.4.2.tgz", @@ -3754,6 +4718,18 @@ "resolved": "https://registry.npmjs.org/pako/-/pako-1.0.11.tgz", "integrity": "sha512-4hLB8Py4zZce5s4yd9XzopqwVv/yGNhV1Bl8NTmCq1763HeK2+EwVTv+leGeL13Dnh2wfbqowVPXCIO0z4taYw==" }, + "node_modules/parent-module": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/parent-module/-/parent-module-1.0.1.tgz", + "integrity": "sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==", + "dev": true, + "dependencies": { + "callsites": "^3.0.0" + }, + "engines": { + "node": ">=6" + } + }, "node_modules/path-exists": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz", @@ -3763,12 +4739,39 @@ "node": ">=8" } }, + "node_modules/path-is-absolute": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/path-is-absolute/-/path-is-absolute-1.0.1.tgz", + "integrity": "sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, + "node_modules/path-key": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", + "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", + "dev": true, + "engines": { + "node": ">=8" + } + }, "node_modules/path-to-regexp": { "version": "6.2.1", "resolved": "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-6.2.1.tgz", "integrity": "sha512-JLyh7xT1kizaEvcaXOQwOc2/Yhw6KZOvPf1S8401UyLk86CU79LN3vl7ztXGm/pZ+YjoyAJ4rxmHwbkBXJX+yw==", "dev": true }, + "node_modules/path-type": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/path-type/-/path-type-4.0.0.tgz", + "integrity": "sha512-gDKb8aZMDeD/tZWs9P6+q0J9Mwkdl6xMV8TjnGP3qJVJ06bdMgkbBlLU8IdfOsIsFz2BW1rNVT3XuNEl8zPAvw==", + "dev": true, + "engines": { + "node": ">=8" + } + }, "node_modules/pathval": { "version": "1.1.1", "resolved": "https://registry.npmjs.org/pathval/-/pathval-1.1.1.tgz", @@ -3790,6 +4793,30 @@ "url": "https://github.com/sponsors/jonschlinkert" } }, + "node_modules/prelude-ls": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", + "integrity": "sha512-vkcDPrRZo1QZLbn5RLGPpg/WmIQ65qoWWhcGKf/b5eplkkarX0m9z8ppCat4mlOqUsWpyNuYgO3VRyrYHSzX5g==", + "dev": true, + "engines": { + "node": ">= 0.8.0" + } + }, + "node_modules/prettier": { + "version": "3.3.2", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.3.2.tgz", + "integrity": "sha512-rAVeHYMcv8ATV5d508CFdn+8/pHPpXeIid1DdrPwXnaAdH7cqjVbpJaT5eq4yRAFU/lsbwYwSF/n5iNrdJHPQA==", + "dev": true, + "bin": { + "prettier": "bin/prettier.cjs" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/prettier/prettier?sponsor=1" + } + }, "node_modules/process": { "version": "0.11.10", "resolved": "https://registry.npmjs.org/process/-/process-0.11.10.tgz", @@ -3799,6 +4826,15 @@ "node": ">= 0.6.0" } }, + "node_modules/punycode": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", + "integrity": "sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==", + "dev": true, + "engines": { + "node": ">=6" + } + }, "node_modules/q": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/q/-/q-1.5.1.tgz", @@ -3808,6 +4844,26 @@ "teleport": ">=0.2.0" } }, + "node_modules/queue-microtask": { + "version": "1.2.3", + "resolved": "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz", + "integrity": "sha512-NuaNSa6flKT5JaSYQzJok04JzTL1CA6aGhv5rfLW3PgqA+M2ChpZQnAC8h8i4ZFkBS8X5RqkDBHA7r4hej3K9A==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ] + }, "node_modules/randombytes": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz", @@ -3858,6 +4914,15 @@ "node": ">=0.10.0" } }, + "node_modules/resolve-from": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-4.0.0.tgz", + "integrity": "sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==", + "dev": true, + "engines": { + "node": ">=4" + } + }, "node_modules/resolve-pkg-maps": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/resolve-pkg-maps/-/resolve-pkg-maps-1.0.0.tgz", @@ -3867,6 +4932,98 @@ "url": "https://github.com/privatenumber/resolve-pkg-maps?sponsor=1" } }, + "node_modules/reusify": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz", + "integrity": "sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw==", + "dev": true, + "engines": { + "iojs": ">=1.0.0", + "node": ">=0.10.0" + } + }, + "node_modules/rimraf": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz", + "integrity": "sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==", + "deprecated": "Rimraf versions prior to v4 are no longer supported", + "dev": true, + "dependencies": { + "glob": "^7.1.3" + }, + "bin": { + "rimraf": "bin.js" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/rimraf/node_modules/brace-expansion": { + "version": "1.1.11", + "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz", + "integrity": "sha512-iCuPHDFgrHX7H2vEI/5xpz07zSHB00TpugqhmYtVmMO6518mCuRMoOYFldEBl0g187ufozdaHgWKcYFb61qGiA==", + "dev": true, + "dependencies": { + "balanced-match": "^1.0.0", + "concat-map": "0.0.1" + } + }, + "node_modules/rimraf/node_modules/glob": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/glob/-/glob-7.2.3.tgz", + "integrity": "sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==", + "deprecated": "Glob versions prior to v9 are no longer supported", + "dev": true, + "dependencies": { + "fs.realpath": "^1.0.0", + "inflight": "^1.0.4", + "inherits": "2", + "minimatch": "^3.1.1", + "once": "^1.3.0", + "path-is-absolute": "^1.0.0" + }, + "engines": { + "node": "*" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, + "node_modules/rimraf/node_modules/minimatch": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", + "integrity": "sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw==", + "dev": true, + "dependencies": { + "brace-expansion": "^1.1.7" + }, + "engines": { + "node": "*" + } + }, + "node_modules/run-parallel": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/run-parallel/-/run-parallel-1.2.0.tgz", + "integrity": "sha512-5l4VyZR86LZ/lDxZTR6jqL8AFE2S0IFLMP26AbjsLVADxHdhB/c0GUsH+y39UfCi3dzz8OlQuPmnaJOMoDHQBA==", + "dev": true, + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "dependencies": { + "queue-microtask": "^1.2.2" + } + }, "node_modules/safe-buffer": { "version": "5.2.1", "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", @@ -3895,6 +5052,18 @@ "dev": true, "peer": true }, + "node_modules/semver": { + "version": "7.6.2", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.2.tgz", + "integrity": "sha512-FNAIBWCx9qcRhoHcgcJ0gvU7SN1lYU2ZXuSfl04bSC5OpvDHFyJCjdNHomPXxjQlCBU67YW64PzY7/VIEH7F2w==", + "dev": true, + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/serialize-javascript": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/serialize-javascript/-/serialize-javascript-6.0.0.tgz", @@ -3921,6 +5090,27 @@ "node": ">= 0.4" } }, + "node_modules/shebang-command": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", + "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", + "dev": true, + "dependencies": { + "shebang-regex": "^3.0.0" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/shebang-regex": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", + "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", + "dev": true, + "engines": { + "node": ">=8" + } + }, "node_modules/signal-exit": { "version": "4.1.0", "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", @@ -4129,6 +5319,15 @@ "node": ">=8" } }, + "node_modules/slash": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", + "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==", + "dev": true, + "engines": { + "node": ">=8" + } + }, "node_modules/snappyjs": { "version": "0.7.0", "resolved": "https://registry.npmjs.org/snappyjs/-/snappyjs-0.7.0.tgz", @@ -4226,6 +5425,12 @@ "url": "https://github.com/chalk/supports-color?sponsor=1" } }, + "node_modules/text-table": { + "version": "0.2.0", + "resolved": "https://registry.npmjs.org/text-table/-/text-table-0.2.0.tgz", + "integrity": "sha512-N+8UisAXDGk8PFXP4HAzVR9nbfmVJ3zYLAWiTIoqC5v5isinhr+r5uaO8+7r3BMfuNIufIsA7RdpVgacC2cSpw==", + "dev": true + }, "node_modules/thrift": { "version": "0.20.0", "resolved": "https://registry.npmjs.org/thrift/-/thrift-0.20.0.tgz", @@ -4258,6 +5463,18 @@ "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" }, + "node_modules/ts-api-utils": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/ts-api-utils/-/ts-api-utils-1.3.0.tgz", + "integrity": "sha512-UQMIo7pb8WRomKR1/+MFVLTroIvDVtMX3K6OUir8ynLyzB8Jeriont2bTAtmNPa1ekAgN7YPDyf6V+ygrdU+eQ==", + "dev": true, + "engines": { + "node": ">=16" + }, + "peerDependencies": { + "typescript": ">=4.2.0" + } + }, "node_modules/tslib": { "version": "2.6.2", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.6.2.tgz", @@ -4688,6 +5905,18 @@ "@esbuild/win32-x64": "0.20.2" } }, + "node_modules/type-check": { + "version": "0.4.0", + "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", + "integrity": "sha512-XleUoc9uwGXqjWwXaUTZAmzMcFZ5858QA2vvx1Ur5xIcixXIP+8LnFDgRplU30us6teqdlskFfu+ae4K79Ooew==", + "dev": true, + "dependencies": { + "prelude-ls": "^1.2.1" + }, + "engines": { + "node": ">= 0.8.0" + } + }, "node_modules/type-detect": { "version": "4.0.8", "resolved": "https://registry.npmjs.org/type-detect/-/type-detect-4.0.8.tgz", @@ -4710,9 +5939,9 @@ } }, "node_modules/typescript": { - "version": "5.4.5", - "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.4.5.tgz", - "integrity": "sha512-vcI4UpRgg81oIRUFwR0WSIHKt11nJ7SAVlYNIu+QpqeyXP+gpQJy/Z4+F0aGxSE4MqwjyXvW/TzgkLAx2AGHwQ==", + "version": "5.5.2", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.2.tgz", + "integrity": "sha512-NcRtPEOsPFFWjobJEtfihkLCZCXZt/os3zf8nTxjVH3RvTSxjrCamJpbExGvYOF+tFHc3pA65qpdwPbzjohhew==", "dev": true, "bin": { "tsc": "bin/tsc", @@ -4722,11 +5951,46 @@ "node": ">=14.17" } }, + "node_modules/typescript-eslint": { + "version": "7.14.1", + "resolved": "https://registry.npmjs.org/typescript-eslint/-/typescript-eslint-7.14.1.tgz", + "integrity": "sha512-Eo1X+Y0JgGPspcANKjeR6nIqXl4VL5ldXLc15k4m9upq+eY5fhU2IueiEZL6jmHrKH8aCfbIvM/v3IrX5Hg99w==", + "dev": true, + "dependencies": { + "@typescript-eslint/eslint-plugin": "7.14.1", + "@typescript-eslint/parser": "7.14.1", + "@typescript-eslint/utils": "7.14.1" + }, + "engines": { + "node": "^18.18.0 || >=20.0.0" + }, + "funding": { + "type": "opencollective", + "url": "https://opencollective.com/typescript-eslint" + }, + "peerDependencies": { + "eslint": "^8.56.0" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, "node_modules/undici-types": { "version": "5.26.5", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==" }, + "node_modules/uri-js": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/uri-js/-/uri-js-4.4.1.tgz", + "integrity": "sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg==", + "dev": true, + "dependencies": { + "punycode": "^2.1.0" + } + }, "node_modules/util": { "version": "0.12.5", "resolved": "https://registry.npmjs.org/util/-/util-0.12.5.tgz", @@ -4777,6 +6041,21 @@ "webidl-conversions": "^3.0.0" } }, + "node_modules/which": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", + "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", + "dev": true, + "dependencies": { + "isexe": "^2.0.0" + }, + "bin": { + "node-which": "bin/node-which" + }, + "engines": { + "node": ">= 8" + } + }, "node_modules/which-typed-array": { "version": "1.1.9", "resolved": "https://registry.npmjs.org/which-typed-array/-/which-typed-array-1.1.9.tgz", @@ -4797,6 +6076,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/word-wrap": { + "version": "1.2.5", + "resolved": "https://registry.npmjs.org/word-wrap/-/word-wrap-1.2.5.tgz", + "integrity": "sha512-BN22B5eaMMI9UMtjrGd5g5eCYPpCPDUy0FJXbYsaT5zYxjFOckS53SQDE3pWkVoWpHXVb3BrYcEN4Twa55B5cA==", + "dev": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/workerpool": { "version": "6.2.1", "resolved": "https://registry.npmjs.org/workerpool/-/workerpool-6.2.1.tgz", @@ -4827,9 +6115,9 @@ "dev": true }, "node_modules/ws": { - "version": "5.2.3", - "resolved": "https://registry.npmjs.org/ws/-/ws-5.2.3.tgz", - "integrity": "sha512-jZArVERrMsKUatIdnLzqvcfydI85dvd/Fp1u/VOpfdDWQ4c9qWXe+VIeAbQ5FrDwciAkr+lzofXLz3Kuf26AOA==", + "version": "5.2.4", + "resolved": "https://registry.npmjs.org/ws/-/ws-5.2.4.tgz", + "integrity": "sha512-fFCejsuC8f9kOSu9FYaOw8CdO68O3h5v0lg4p74o8JqWpwTf9tniOD+nOB78aWoVSS6WptVUmDrp/KPsMVBWFQ==", "dependencies": { "async-limiter": "~1.0.0" } diff --git a/package.json b/package.json index 0ecafd30..c021c25f 100644 --- a/package.json +++ b/package.json @@ -31,8 +31,10 @@ "xxhash-wasm": "^1.0.2" }, "devDependencies": { + "@eslint/js": "^9.6.0", "@smithy/util-stream": "^3.0.0", "@types/chai": "^4.3.16", + "@types/eslint__js": "^8.42.3", "@types/json-schema": "^7.0.15", "@types/mocha": "^10.0.6", "@types/node": "^20.12.12", @@ -45,10 +47,12 @@ "chai": "4.4.1", "core-js": "^3.37.1", "esbuild": "^0.21.2", + "eslint": "^8.57.0", "events": "^3.3.0", "mocha": "^10.4.0", "msw": "^2.3.0", "object-stream": "^0.0.1", + "prettier": "3.3.2", "process": "^0.11.10", "regenerator-runtime": "^0.14.1", "sinon": "^17.0.2", @@ -56,7 +60,8 @@ "sinon-chai-in-order": "^0.1.0", "stream-browserify": "^3.0.0", "tsx": "^4.10.2", - "typescript": "^5.4.5" + "typescript": "^5.5.2", + "typescript-eslint": "^7.14.1" }, "scripts": { "build": "npm run build:node && npm run build:types", @@ -64,7 +69,9 @@ "build:node": "tsc -b", "build:browser": "node esbuild.js", "type": "tsc --noEmit", - "lint": "echo 'Linting, it is on the TODO list...'", + "lint": "eslint && npx prettier . --check", + "lint:fix": "eslint --fix", + "format": "npx prettier . --write", "test": "mocha 'test/{,!(browser)/**}/*.{js,ts}'", "test:only": "mocha", "clean": "rm -Rf ./dist", From 872c6317a4880a6f9fbcced15207476931aa2333 Mon Sep 17 00:00:00 2001 From: Wil Wade Date: Mon, 1 Jul 2024 08:55:57 -0400 Subject: [PATCH 2/5] eslint setup Add ignore for dist --- eslint.config.mjs | 36 +++++++++++++++++++++++++++++++++ package-lock.json | 51 +++++++++++++++++++++++++++++++++++++++++++++++ package.json | 5 +++-- 3 files changed, 90 insertions(+), 2 deletions(-) diff --git a/eslint.config.mjs b/eslint.config.mjs index 5f5eedb2..7676fb11 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -2,9 +2,45 @@ import eslint from '@eslint/js'; import tseslint from 'typescript-eslint'; +import mochaPlugin from 'eslint-plugin-mocha'; +import globals from 'globals'; export default tseslint.config( eslint.configs.recommended, + mochaPlugin.configs.flat.recommended, ...tseslint.configs.strict, ...tseslint.configs.stylistic, + { + rules: { + // TODO: Fix/ignore in tests and remove + '@typescript-eslint/no-loss-of-precision': 'warn', + // TODO: Fix and remove + '@typescript-eslint/prefer-for-of': 'warn', + // Change back to an error (by removing) once we can + '@typescript-eslint/no-explicit-any': 'warn', + // Change back to an error (by removing) once we can + '@typescript-eslint/no-non-null-assertion': 'warn', + // Enable if we remove all cjs files + '@typescript-eslint/no-var-requires': 'off', + 'mocha/max-top-level-suites': 'off', + '@typescript-eslint/no-unused-vars': [ + 'error', + { + argsIgnorePattern: '^_', + varsIgnorePattern: '^_', + caughtErrorsIgnorePattern: '^_', + }, + ], + }, + }, + { + languageOptions: { + globals: { + ...globals.node, + }, + }, + }, + { + ignores: ['gen-nodejs/*', 'dist/**/*'], + } ); diff --git a/package-lock.json b/package-lock.json index b93c4916..bdd3dbb0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -42,6 +42,7 @@ "core-js": "^3.37.1", "esbuild": "^0.21.2", "eslint": "^8.57.0", + "eslint-plugin-mocha": "^10.4.3", "events": "^3.3.0", "mocha": "^10.4.0", "msw": "^2.3.0", @@ -3423,6 +3424,23 @@ "url": "https://opencollective.com/eslint" } }, + "node_modules/eslint-plugin-mocha": { + "version": "10.4.3", + "resolved": "https://registry.npmjs.org/eslint-plugin-mocha/-/eslint-plugin-mocha-10.4.3.tgz", + "integrity": "sha512-emc4TVjq5Ht0/upR+psftuz6IBG5q279p+1dSRDeHf+NS9aaerBi3lXKo1SEzwC29hFIW21gO89CEWSvRsi8IQ==", + "dev": true, + "dependencies": { + "eslint-utils": "^3.0.0", + "globals": "^13.24.0", + "rambda": "^7.4.0" + }, + "engines": { + "node": ">=14.0.0" + }, + "peerDependencies": { + "eslint": ">=7.0.0" + } + }, "node_modules/eslint-scope": { "version": "7.2.2", "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-7.2.2.tgz", @@ -3439,6 +3457,33 @@ "url": "https://opencollective.com/eslint" } }, + "node_modules/eslint-utils": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/eslint-utils/-/eslint-utils-3.0.0.tgz", + "integrity": "sha512-uuQC43IGctw68pJA1RgbQS8/NP7rch6Cwd4j3ZBtgo4/8Flj4eGE7ZYSZRN3iq5pVUv6GPdW5Z1RFleo84uLDA==", + "dev": true, + "dependencies": { + "eslint-visitor-keys": "^2.0.0" + }, + "engines": { + "node": "^10.0.0 || ^12.0.0 || >= 14.0.0" + }, + "funding": { + "url": "https://github.com/sponsors/mysticatea" + }, + "peerDependencies": { + "eslint": ">=5" + } + }, + "node_modules/eslint-utils/node_modules/eslint-visitor-keys": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-2.1.0.tgz", + "integrity": "sha512-0rSmRBzXgDzIsD6mGdJgevzgezI534Cer5L/vyMX0kHzT/jiB43jRhd9YUlMGYLQy2zprNmoT8qasCGtY+QaKw==", + "dev": true, + "engines": { + "node": ">=10" + } + }, "node_modules/eslint-visitor-keys": { "version": "3.4.3", "resolved": "https://registry.npmjs.org/eslint-visitor-keys/-/eslint-visitor-keys-3.4.3.tgz", @@ -4864,6 +4909,12 @@ } ] }, + "node_modules/rambda": { + "version": "7.5.0", + "resolved": "https://registry.npmjs.org/rambda/-/rambda-7.5.0.tgz", + "integrity": "sha512-y/M9weqWAH4iopRd7EHDEQQvpFPHj1AA3oHozE9tfITHUtTR7Z9PSlIRRG2l1GuW7sefC1cXFfIcF+cgnShdBA==", + "dev": true + }, "node_modules/randombytes": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/randombytes/-/randombytes-2.1.0.tgz", diff --git a/package.json b/package.json index c021c25f..8a2e84f4 100644 --- a/package.json +++ b/package.json @@ -48,6 +48,7 @@ "core-js": "^3.37.1", "esbuild": "^0.21.2", "eslint": "^8.57.0", + "eslint-plugin-mocha": "^10.4.3", "events": "^3.3.0", "mocha": "^10.4.0", "msw": "^2.3.0", @@ -69,8 +70,8 @@ "build:node": "tsc -b", "build:browser": "node esbuild.js", "type": "tsc --noEmit", - "lint": "eslint && npx prettier . --check", - "lint:fix": "eslint --fix", + "lint": "eslint . && npx prettier . --check", + "lint:fix": "eslint --fix .", "format": "npx prettier . --write", "test": "mocha 'test/{,!(browser)/**}/*.{js,ts}'", "test:only": "mocha", From 081b96897bacf81ed47157cd180240adc346b886 Mon Sep 17 00:00:00 2001 From: Wil Wade Date: Mon, 1 Jul 2024 08:50:32 -0400 Subject: [PATCH 3/5] Formatting ONLY --- .babelrc.js | 27 +- .github/ISSUE_TEMPLATE/BUG_Issue.md | 6 + .github/PULL_REQUEST_TEMPLATE.md | 18 +- .github/workflows/main.yml | 4 +- .github/workflows/publish-next.yml | 2 +- .github/workflows/release.yml | 2 +- README.md | 189 ++-- bootstrap.js | 6 +- browser/parquet.cjs.ts | 2 +- browser/parquet.esm.ts | 2 +- esbuild-plugins.js | 115 +- esbuild-serve.js | 38 +- esbuild-shims.js | 5 +- esbuild.js | 123 +- examples/nested.js | 26 +- examples/reader.js | 3 +- examples/server/README.md | 2 + examples/server/app.js | 22 +- examples/writer.js | 23 +- lib/bloom/sbbf.ts | 688 +++++------ lib/bloom/xxhasher.ts | 52 +- lib/bloomFilterIO/bloomFilterReader.ts | 80 +- lib/bloomFilterIO/bloomFilterWriter.ts | 16 +- lib/browser/compression.js | 72 +- lib/bufferReader.ts | 62 +- lib/codec/index.ts | 9 +- lib/codec/plain.ts | 102 +- lib/codec/plain_dictionary.ts | 8 +- lib/codec/rle.ts | 58 +- lib/codec/types.ts | 48 +- lib/compression.ts | 48 +- lib/custom.d.ts | 10 +- lib/declare.ts | 318 +++--- lib/fields.ts | 88 +- lib/jsonSchema.ts | 174 +-- lib/reader.ts | 503 ++++----- lib/schema.ts | 90 +- lib/shred.ts | 110 +- lib/types.ts | 207 ++-- lib/util.ts | 145 +-- lib/writer.ts | 302 +++-- parquet.ts | 18 +- test/bloomFilterIntegration.ts | 230 ++-- test/bloomFilterReader.test.ts | 50 +- test/bloomFilterWriter.test.ts | 56 +- test/browser/index.html | 20 +- test/browser/main.ts | 32 +- test/codec_plain.test.js | 838 +++++++++++--- test/codec_rle.js | 168 ++- test/decodeSchema.js | 299 +++-- test/dictionary.js | 13 +- test/fields.test.ts | 453 ++++---- test/integration.js | 276 +++-- test/jsonSchema.test.ts | 124 +- test/lib/bufferReader.test.js | 255 +++-- test/list.js | 42 +- test/metadata-cache.js | 33 +- test/mocks/handlers.js | 90 +- test/mocks/server.js | 4 +- test/reader.js | 202 ++-- test/readme-examples.test.ts | 14 +- test/reference-test/README.md | 1 - test/reference-test/read-all.test.ts | 20 +- test/sbbf.ts | 513 ++++----- test/schema.js | 105 +- test/shred.js | 276 ++--- test/statistics.js | 147 ++- test/test-files.js | 173 +-- test/test-files/address.schema.json | 68 +- test/test-files/address.schema.result.json | 56 +- test/test-files/array.schema.json | 30 +- test/test-files/array.schema.result.json | 94 +- test/test-files/customer.impala.json | 1003 ++++++++++++++++- .../json-schema-test-file.schema.result.json | 558 ++------- test/test-files/object-nested.schema.json | 116 +- .../object-nested.schema.result.json | 503 ++------- test/test-files/object.schema.json | 42 +- test/test-files/object.schema.result.json | 155 +-- test/thrift.js | 10 +- test/types.js | 267 ++--- test/util/assert_util.js | 4 +- tsconfig.json | 7 +- 82 files changed, 5856 insertions(+), 5314 deletions(-) diff --git a/.babelrc.js b/.babelrc.js index a932f90c..4c9ef314 100644 --- a/.babelrc.js +++ b/.babelrc.js @@ -1,14 +1,17 @@ module.exports = { - sourceType: "unambiguous", - plugins: ["babel-plugin-add-module-exports"], - presets: [ - ['@babel/preset-env', { - loose: true, - modules: "auto", - "useBuiltIns": "entry", // to ensure regeneratorRuntime is defined; see bootstrap.js - "corejs": 3, // use w/ "useBuiltIns", defaults=2, must match what is in package.json - // "targets": "> 0.25%, not dead" - }], - '@babel/preset-typescript' - ] + sourceType: 'unambiguous', + plugins: ['babel-plugin-add-module-exports'], + presets: [ + [ + '@babel/preset-env', + { + loose: true, + modules: 'auto', + useBuiltIns: 'entry', // to ensure regeneratorRuntime is defined; see bootstrap.js + corejs: 3, // use w/ "useBuiltIns", defaults=2, must match what is in package.json + // "targets": "> 0.25%, not dead" + }, + ], + '@babel/preset-typescript', + ], }; diff --git a/.github/ISSUE_TEMPLATE/BUG_Issue.md b/.github/ISSUE_TEMPLATE/BUG_Issue.md index 7e5dabd1..d11e21ed 100644 --- a/.github/ISSUE_TEMPLATE/BUG_Issue.md +++ b/.github/ISSUE_TEMPLATE/BUG_Issue.md @@ -5,19 +5,25 @@ labels: "" assignees: "" --- + Thanks for reporting an issue! ### Steps to reproduce + Tell us how to reproduce this issue. ### Expected behaviour + Tell us what should happen ### Actual behaviour + Tell us what happens instead ### Any logs, error output, etc? + ... ### Any other comments? + ... diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md index 5de7d446..3f55434e 100644 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ b/.github/PULL_REQUEST_TEMPLATE.md @@ -1,5 +1,5 @@ -Problem -======= +# Problem + problem statement Solution ======== @@ -7,14 +7,14 @@ What I/we did to solve this problem with @pairperson1 -Change summary: ---------------- -* Tidy, well formulated commit message -* Another great commit message -* Something else I/we did +## Change summary: + +- Tidy, well formulated commit message +- Another great commit message +- Something else I/we did + +## Steps to Verify: -Steps to Verify: ----------------- 1. A setup step / beginning state 1. What to do next 1. Any other instructions diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 35e87f43..5a6d9f0a 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -2,9 +2,9 @@ name: Tests CI on: push: - branches: [ main ] + branches: [main] pull_request: - branches: [ '**' ] + branches: ['**'] jobs: test: runs-on: ubuntu-latest diff --git a/.github/workflows/publish-next.yml b/.github/workflows/publish-next.yml index d6098ce5..01009acc 100644 --- a/.github/workflows/publish-next.yml +++ b/.github/workflows/publish-next.yml @@ -2,7 +2,7 @@ name: Publish NPM @next on: push: - branches: [ main ] + branches: [main] jobs: test: runs-on: ubuntu-latest diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 5f84e706..652a13eb 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -2,7 +2,7 @@ name: Release Package on: release: - types: [ released ] + types: [released] jobs: publish-to-npm: runs-on: ubuntu-latest diff --git a/README.md b/README.md index 2630056c..749e45a3 100644 --- a/README.md +++ b/README.md @@ -17,10 +17,12 @@ of it back out efficiently. The Parquet format is based on [Google's Dremel pape ## Forked Notice This is a forked repository with code from various sources: + - Primary source [ironSource](https://github.com/ironSource/parquetjs) [npm: parquetjs](https://www.npmjs.com/package/parquetjs) - Secondary source [ZJONSSON](https://github.com/ZJONSSON/parquetjs) [npm: parquetjs-lite](https://www.npmjs.com/package/parquetjs-lite) ## Installation + _parquet.js requires node.js >= 18.18.2_ ``` @@ -28,58 +30,66 @@ _parquet.js requires node.js >= 18.18.2_ ``` ### NodeJS + To use with nodejs: + ```javascript -import parquetjs from "@dsnp/parquetjs" +import parquetjs from '@dsnp/parquetjs'; ``` ### Browser with Bundler + To use in a browser with a bundler, depending on your needs, write the appropriate plugin or resolver to point to either the Common JS or ES Module version: + ```javascript // Common JS -"node_modules/@dsnp/parquetjs/dist/browser/parquetjs.cjs" +'node_modules/@dsnp/parquetjs/dist/browser/parquetjs.cjs'; // ES Modules -"node_modules/@dsnp/parquetjs/dist/browser/parquetjs.esm" +'node_modules/@dsnp/parquetjs/dist/browser/parquetjs.esm'; ``` + or: + ```javascript // Common JS -import parquetjs from "@dsnp/parquetjs/dist/browser/parquetjs.cjs" +import parquetjs from '@dsnp/parquetjs/dist/browser/parquetjs.cjs'; // ES Modules -import parquetjs from "@dsnp/parquetjs/dist/browser/parquetjs.esm" +import parquetjs from '@dsnp/parquetjs/dist/browser/parquetjs.esm'; ``` ### Browser Direct: ES Modules + To use directly in the browser without a bundler using ES Modules: 1. Build the package: `npm install && npm run build:browser` 2. Copy to `dist/browser/parquetjs.esm.js` the server 3. Use it in your html or other ES Modules: - ```html - - ``` + ```html + + ``` ### Browser Direct: Plain Ol' JavaScript + To use directly in the browser without a bundler or ES Modules: 1. Build the package: `npm install && npm run build:browser` 2. Copy to `dist/browser/parquetjs.js` the server -2. Use the global `parquetjs` variable to access parquetjs functions +3. Use the global `parquetjs` variable to access parquetjs functions ```html - ``` + // console.log(parquetjs) + + ``` ## Usage: Writing files Once you have installed the parquet.js library, you can import it as a single module: -``` js +```js var parquet = require('@dsnp/parquetjs'); ``` @@ -89,14 +99,14 @@ is a simple example that shows how to instantiate a `ParquetSchema` object: ### Native Schema Definition -``` js +```js // declare a schema for the `fruits` table var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, quantity: { type: 'INT64' }, price: { type: 'DOUBLE' }, date: { type: 'TIMESTAMP_MILLIS' }, - in_stock: { type: 'BOOLEAN' } + in_stock: { type: 'BOOLEAN' }, }); ``` @@ -108,39 +118,38 @@ var schema = new parquet.ParquetSchema({ quantity: parquet.ParquetFieldBuilder.createIntField(64), price: parquet.ParquetFieldBuilder.createDoubleField(), date: parquet.ParquetFieldBuilder.createTimestampField(), - in_stock: parquet.ParquetFieldBuilder.createBooleanField() + in_stock: parquet.ParquetFieldBuilder.createBooleanField(), }); ``` ### JSON Schema -``` js +```js // declare a schema for the `fruits` JSON Schema var schema = new parquet.ParquetSchema.fromJsonSchema({ - "type": "object", - "properties": { - "name": { - "type": "string" + type: 'object', + properties: { + name: { + type: 'string', }, - "quantity": { - "type": "integer" + quantity: { + type: 'integer', }, - "price": { - "type": "number" + price: { + type: 'number', }, - "date": { - "type": "string", - "format": "date-time" + date: { + type: 'string', + format: 'date-time', + }, + in_stock: { + type: 'boolean', }, - "in_stock": { - "type": "boolean" - } }, - "required": ["name", "quantity", "price", "date", "in_stock"] + required: ['name', 'quantity', 'price', 'date', 'in_stock'], }); ``` - Note that the Parquet schema supports nesting, so you can store complex, arbitrarily nested records into a single row (more on that later) while still maintaining good compression. @@ -149,13 +158,13 @@ Once we have a schema, we can create a `ParquetWriter` object. The writer will take input rows as JSON objects, convert them to the Parquet format and store them on disk. -``` js +```js // create new ParquetWriter that writes to 'fruits.parquet` var writer = await parquet.ParquetWriter.openFile(schema, 'fruits.parquet'); // append a few rows to the file -await writer.appendRow({name: 'apples', quantity: 10, price: 2.5, date: new Date(), in_stock: true}); -await writer.appendRow({name: 'oranges', quantity: 10, price: 2.5, date: new Date(), in_stock: true}); +await writer.appendRow({ name: 'apples', quantity: 10, price: 2.5, date: new Date(), in_stock: true }); +await writer.appendRow({ name: 'oranges', quantity: 10, price: 2.5, date: new Date(), in_stock: true }); ``` Once we are finished adding rows to the file, we have to tell the writer object @@ -165,32 +174,32 @@ to flush the metadata to disk and close the file by calling the `close()` method Bloom filters can be added to multiple columns as demonstrated below: -``` js - const options = { - bloomFilters: [ - { - column: "name", - numFilterBytes: 1024, - }, - { - column: "quantity", - numFilterBytes: 1024, - }, - ] - }; +```js +const options = { + bloomFilters: [ + { + column: 'name', + numFilterBytes: 1024, + }, + { + column: 'quantity', + numFilterBytes: 1024, + }, + ], +}; var writer = await parquet.ParquetWriter.openFile(schema, 'fruits.parquet', options); ``` -By default, not passing any additional options calculates the optimal number of blocks according to the default number of distinct values (128*1024) and default false positive probability (0.001), which gives a filter byte size of 29,920. +By default, not passing any additional options calculates the optimal number of blocks according to the default number of distinct values (128\*1024) and default false positive probability (0.001), which gives a filter byte size of 29,920. The following options are provided to have the ability to adjust the split-block bloom filter settings. -`numFilterBytes` - sets the desire size of bloom filter in bytes. Defaults to 128 * 1024 * 1024 bits. +`numFilterBytes` - sets the desire size of bloom filter in bytes. Defaults to 128 _ 1024 _ 1024 bits. `falsePositiveRate` - set the desired false positive percentage for bloom filter. Defaults to 0.001. -`numDistinct` - sets the number of distinct values. Defaults to 128 * 1024 bits. +`numDistinct` - sets the number of distinct values. Defaults to 128 \* 1024 bits. Note that if numFilterBytes is provided then falsePositiveRate and numDistinct options are ignored. @@ -204,7 +213,7 @@ You may open more than one cursor and use them concurrently. All cursors become invalid once close() is called on the reader object. -``` js +```js // create new ParquetReader that reads from 'fruits.parquet` let reader = await parquet.ParquetReader.openFile('fruits.parquet'); @@ -213,7 +222,7 @@ let cursor = reader.getCursor(); // read all records from the file and print them let record = null; -while (record = await cursor.next()) { +while ((record = await cursor.next())) { console.log(record); } ``` @@ -221,7 +230,7 @@ while (record = await cursor.next()) { When creating a cursor, you can optionally request that only a subset of the columns should be read from disk. For example: -``` js +```js // create a new cursor that will only return the `name` and `price` columns let cursor = reader.getCursor(['name', 'price']); ``` @@ -229,7 +238,7 @@ let cursor = reader.getCursor(['name', 'price']); It is important that you call close() after you are finished reading the file to avoid leaking file descriptors. -``` js +```js await reader.close(); ``` @@ -237,7 +246,8 @@ await reader.close(); Bloom filters can be fetched from a parquet file by creating a reader and calling `getBloomFiltersFor`. -``` js + +```js // create new ParquetReader that reads from 'fruits.parquet` let reader = await parquet.ParquetReader.openFile('fruits.parquet'); @@ -255,26 +265,26 @@ const bloomFilters = reader.getBloomFiltersFor(['name']); } ``` + Calling `getBloomFiltersFor` on the reader returns an object with the keys being a column name and value being an array of length equal to the number of row groups that the column spans. Given the SplitBlockBloomFilter inclusion of a value in the filter can be checked as follows: -``` js +```js const sbbf = bloomFilters.name[0].ssbf; sbbf.check('apples') ===> true ``` - ### Reading data from a url Parquet files can be read from a url without having to download the whole file. You will have to supply the request library as a first argument and the request parameters as a second argument to the function `parquetReader.openUrl`. -``` js +```js const request = require('request'); -let reader = await parquet.ParquetReader.openUrl(request,'https://domain/fruits.parquet'); +let reader = await parquet.ParquetReader.openUrl(request, 'https://domain/fruits.parquet'); ``` ### Reading data from S3 @@ -283,26 +293,26 @@ Parquet files can be read from an S3 object without having to download the whole You will have to supply the aws-sdk client as first argument and the bucket/key information as second argument to the function `parquetReader.openS3`. -``` js +```js const AWS = require('aws-sdk'); const client = new AWS.S3({ accessKeyId: 'xxxxxxxxxxx', - secretAccessKey: 'xxxxxxxxxxx' + secretAccessKey: 'xxxxxxxxxxx', }); const params = { Bucket: 'xxxxxxxxxxx', - Key: 'xxxxxxxxxxx' + Key: 'xxxxxxxxxxx', }; -let reader = await parquet.ParquetReader.openS3(client,params); +let reader = await parquet.ParquetReader.openS3(client, params); ``` ### Reading data from a buffer If the complete parquet file is in buffer it can be read directly from memory without incurring any additional I/O. -``` js +```js const file = fs.readFileSync('fruits.parquet'); let reader = await parquet.ParquetReader.openBuffer(file); ``` @@ -318,7 +328,7 @@ The most simple encoding scheme is the PLAIN encoding. It simply stores the values as they are without any compression. The PLAIN encoding is currently the default for all types except `BOOLEAN`: -``` js +```js var schema = new parquet.ParquetSchema({ name: { type: 'UTF8', encoding: 'PLAIN' }, }); @@ -332,30 +342,28 @@ combination with the `BOOLEAN`, `INT32` and `INT64` types. The RLE encoding requires an additional `typeLength` parameter that contains the maximum number of bits required to store the largest value of the field. -``` js +```js var schema = new parquet.ParquetSchema({ age: { type: 'UINT_32', encoding: 'RLE', typeLength: 7 }, }); ``` - ### Optional Fields By default, all fields are required to be present in each row. You can also mark a field as 'optional' which will let you store rows with that field missing: -``` js +```js var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, quantity: { type: 'INT64', optional: true }, }); var writer = await parquet.ParquetWriter.openFile(schema, 'fruits.parquet'); -await writer.appendRow({name: 'apples', quantity: 10 }); -await writer.appendRow({name: 'banana' }); // not in stock +await writer.appendRow({ name: 'apples', quantity: 10 }); +await writer.appendRow({ name: 'banana' }); // not in stock ``` - ### Nested Rows & Arrays Parquet supports nested schemas that allow you to store rows that have a more @@ -366,7 +374,7 @@ list instead: Consider this example, which allows us to store a more advanced "fruits" table where each row contains a name, a list of colours and a list of "stock" objects. -``` js +```js // advanced fruits table var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, @@ -376,8 +384,8 @@ var schema = new parquet.ParquetSchema({ fields: { price: { type: 'DOUBLE' }, quantity: { type: 'INT64' }, - } - } + }, + }, }); // the above schema allows us to store the following rows: @@ -388,17 +396,17 @@ await writer.appendRow({ colours: ['yellow'], stock: [ { price: 2.45, quantity: 16 }, - { price: 2.60, quantity: 420 } - ] + { price: 2.6, quantity: 420 }, + ], }); await writer.appendRow({ name: 'apple', colours: ['red', 'green'], stock: [ - { price: 1.20, quantity: 42 }, - { price: 1.30, quantity: 230 } - ] + { price: 1.2, quantity: 42 }, + { price: 1.3, quantity: 230 }, + ], }); await writer.close(); @@ -408,7 +416,7 @@ let reader = await parquet.ParquetReader.openFile('fruits.parquet'); let cursor = reader.getCursor([['name'], ['stock', 'price']]); let record = null; -while (record = await cursor.next()) { +while ((record = await cursor.next())) { console.log(record); } @@ -416,7 +424,7 @@ await reader.close(); ``` It might not be obvious why one would want to implement or use such a feature when -the same can - in principle - be achieved by serializing the record using JSON +the same can - in principle - be achieved by serializing the record using JSON (or a similar scheme) and then storing it into a UTF8 field: Putting aside the philosophical discussion on the merits of strict typing, @@ -425,11 +433,9 @@ have to duplicate this metadata (i.e. the field names) for every record. On top of that, knowing about the type of a field allows us to compress the remaining data more efficiently. - ### Nested Lists for Hive / Athena -Lists have to be annotated to be queriable with AWS Athena. See [parquet-format](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists) for more detail and a full working example with comments in the test directory ([`test/list.js`](test/list.js)) - +Lists have to be annotated to be queriable with AWS Athena. See [parquet-format](https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#lists) for more detail and a full working example with comments in the test directory ([`test/list.js`](test/list.js)) ### List of Supported Types & Encodings @@ -463,7 +469,6 @@ encodings: UINT_64INT64PLAIN, RLE - ## Buffering & Row Group Size When writing a Parquet file, the `ParquetWriter` will buffer rows in memory @@ -474,18 +479,16 @@ The size of a row group is configurable by the user and controls the maximum number of rows that are buffered in memory at any given time as well as the number of rows that are co-located on disk: -``` js +```js var writer = await parquet.ParquetWriter.openFile(schema, 'fruits.parquet'); writer.setRowGroupSize(8192); ``` - ## Dependencies Parquet uses [thrift](https://thrift.apache.org/) to encode the schema and other metadata, but the actual data does not use thrift. - ## Notes Currently parquet-cpp doesn't fully support DATA_PAGE_V2. You can work around this diff --git a/bootstrap.js b/bootstrap.js index 4e683190..56c2dde6 100644 --- a/bootstrap.js +++ b/bootstrap.js @@ -1,5 +1,5 @@ -import "regenerator-runtime/runtime"; -import "core-js/stable"; -const coreImportPromise = import('./parquet').catch(e => console.error('Error importing `parquet.js`:', e)) +import 'regenerator-runtime/runtime'; +import 'core-js/stable'; +const coreImportPromise = import('./parquet').catch((e) => console.error('Error importing `parquet.js`:', e)); export const core = coreImportPromise; diff --git a/browser/parquet.cjs.ts b/browser/parquet.cjs.ts index 94c24bbf..ee89badd 100644 --- a/browser/parquet.cjs.ts +++ b/browser/parquet.cjs.ts @@ -1,3 +1,3 @@ // Generates the types for the commonjs browser build -export * from "../parquet"; +export * from '../parquet'; diff --git a/browser/parquet.esm.ts b/browser/parquet.esm.ts index 71611a5f..82332b72 100644 --- a/browser/parquet.esm.ts +++ b/browser/parquet.esm.ts @@ -1,3 +1,3 @@ // Generates the types for the es modules browser build -export * from "../parquet"; +export * from '../parquet'; diff --git a/esbuild-plugins.js b/esbuild-plugins.js index 9ad62026..ef4f3932 100644 --- a/esbuild-plugins.js +++ b/esbuild-plugins.js @@ -3,72 +3,71 @@ * does not include LZO or Brötli comprssion. */ const compressionBrowserPlugin = { - name: 'compressionBrowser', - setup(build) { - let path = require('path') - build.onResolve({filter: /^\.\/compression$/}, args => { - return { - path: path.resolve(__dirname, "lib","browser","compression.js") - } - }) - } -} + name: 'compressionBrowser', + setup(build) { + let path = require('path'); + build.onResolve({ filter: /^\.\/compression$/ }, (args) => { + return { + path: path.resolve(__dirname, 'lib', 'browser', 'compression.js'), + }; + }); + }, +}; // Lifted from https://esbuild.github.io/plugins/#webassembly-plugin const wasmPlugin = { - name: 'wasm', - setup(build) { - let path = require('path') - let fs = require('fs') + name: 'wasm', + setup(build) { + let path = require('path'); + let fs = require('fs'); - // Resolve ".wasm" files to a path with a namespace - build.onResolve({ filter: /\.wasm$/ }, args => { - // If this is the import inside the stub module, import the - // binary itself. Put the path in the "wasm-binary" namespace - // to tell our binary load callback to load the binary file. - if (args.namespace === 'wasm-stub') { - return { - path: args.path, - namespace: 'wasm-binary', - } - } + // Resolve ".wasm" files to a path with a namespace + build.onResolve({ filter: /\.wasm$/ }, (args) => { + // If this is the import inside the stub module, import the + // binary itself. Put the path in the "wasm-binary" namespace + // to tell our binary load callback to load the binary file. + if (args.namespace === 'wasm-stub') { + return { + path: args.path, + namespace: 'wasm-binary', + }; + } - // Otherwise, generate the JavaScript stub module for this - // ".wasm" file. Put it in the "wasm-stub" namespace to tell - // our stub load callback to fill it with JavaScript. - // - // Resolve relative paths to absolute paths here since this - // resolve callback is given "resolveDir", the directory to - // resolve imports against. - if (args.resolveDir === '') { - return // Ignore unresolvable paths - } - return { - path: path.isAbsolute(args.path) ? args.path : path.join(args.resolveDir, args.path), - namespace: 'wasm-stub', - } - }) + // Otherwise, generate the JavaScript stub module for this + // ".wasm" file. Put it in the "wasm-stub" namespace to tell + // our stub load callback to fill it with JavaScript. + // + // Resolve relative paths to absolute paths here since this + // resolve callback is given "resolveDir", the directory to + // resolve imports against. + if (args.resolveDir === '') { + return; // Ignore unresolvable paths + } + return { + path: path.isAbsolute(args.path) ? args.path : path.join(args.resolveDir, args.path), + namespace: 'wasm-stub', + }; + }); - // Virtual modules in the "wasm-stub" namespace are filled with - // the JavaScript code for compiling the WebAssembly binary. The - // binary itself is imported from a second virtual module. - build.onLoad({ filter: /.*/, namespace: 'wasm-stub' }, async (args) => ({ - contents: `import wasm from ${JSON.stringify(args.path)} + // Virtual modules in the "wasm-stub" namespace are filled with + // the JavaScript code for compiling the WebAssembly binary. The + // binary itself is imported from a second virtual module. + build.onLoad({ filter: /.*/, namespace: 'wasm-stub' }, async (args) => ({ + contents: `import wasm from ${JSON.stringify(args.path)} export default (imports) => WebAssembly.instantiate(wasm, imports).then( result => result.instance.exports)`, - })) + })); - // Virtual modules in the "wasm-binary" namespace contain the - // actual bytes of the WebAssembly file. This uses esbuild's - // built-in "binary" loader instead of manually embedding the - // binary data inside JavaScript code ourselves. - build.onLoad({ filter: /.*/, namespace: 'wasm-binary' }, async (args) => ({ - contents: await fs.promises.readFile(args.path), - loader: 'binary', - })) - }, -} - -module.exports = { compressionBrowserPlugin, wasmPlugin} + // Virtual modules in the "wasm-binary" namespace contain the + // actual bytes of the WebAssembly file. This uses esbuild's + // built-in "binary" loader instead of manually embedding the + // binary data inside JavaScript code ourselves. + build.onLoad({ filter: /.*/, namespace: 'wasm-binary' }, async (args) => ({ + contents: await fs.promises.readFile(args.path), + loader: 'binary', + })); + }, +}; +module.exports = { compressionBrowserPlugin, wasmPlugin }; diff --git a/esbuild-serve.js b/esbuild-serve.js index f9dd3167..343048aa 100644 --- a/esbuild-serve.js +++ b/esbuild-serve.js @@ -3,22 +3,26 @@ * It attaches the parquet.js exports to a "parquetjs" global variable. * See the example server for how to use it. */ -const { compressionBrowserPlugin, wasmPlugin } = require("./esbuild-plugins"); +const { compressionBrowserPlugin, wasmPlugin } = require('./esbuild-plugins'); // esbuild has TypeScript support by default. It will use .tsconfig -require('esbuild').context({ - entryPoints: ['parquet.ts'], - outfile: 'main.js', - define: { "process.env.NODE_DEBUG": "false", "process.env.NODE_ENV": "\"production\"", global: "window" }, - platform: 'browser', - plugins: [compressionBrowserPlugin, wasmPlugin], - sourcemap: "external", - bundle: true, - globalName: 'parquetjs', - inject: ['./esbuild-shims.js'] -}).then(context => { - context.serve({ - servedir: __dirname, - }).then(server => { - console.log("serving parquetjs", server) +require('esbuild') + .context({ + entryPoints: ['parquet.ts'], + outfile: 'main.js', + define: { 'process.env.NODE_DEBUG': 'false', 'process.env.NODE_ENV': '"production"', global: 'window' }, + platform: 'browser', + plugins: [compressionBrowserPlugin, wasmPlugin], + sourcemap: 'external', + bundle: true, + globalName: 'parquetjs', + inject: ['./esbuild-shims.js'], }) -}) + .then((context) => { + context + .serve({ + servedir: __dirname, + }) + .then((server) => { + console.log('serving parquetjs', server); + }); + }); diff --git a/esbuild-shims.js b/esbuild-shims.js index 78c6eec8..c948dc27 100644 --- a/esbuild-shims.js +++ b/esbuild-shims.js @@ -1,3 +1,2 @@ -const buffer = require("buffer/").Buffer; -export let Buffer = buffer; - +const buffer = require('buffer/').Buffer; +export let Buffer = buffer; diff --git a/esbuild.js b/esbuild.js index a81513c0..bf2d5250 100644 --- a/esbuild.js +++ b/esbuild.js @@ -1,70 +1,67 @@ -const esbuild = require('esbuild') -const path = require("path") -const {compressionBrowserPlugin, wasmPlugin} = require("./esbuild-plugins"); +const esbuild = require('esbuild'); +const path = require('path'); +const { compressionBrowserPlugin, wasmPlugin } = require('./esbuild-plugins'); // esbuild has TypeScript support by default const baseConfig = { - bundle: true, - entryPoints: ['parquet.ts'], - define: { - "process.env.NODE_DEBUG": "false", - "process.env.NODE_ENV": "\"production\"", - global: "window" - }, - inject: ['./esbuild-shims.js'], - minify: true, - mainFields: ["browser", "module", "main"], - platform: 'browser', // default - plugins: [compressionBrowserPlugin, wasmPlugin], - target: "es2020" // default + bundle: true, + entryPoints: ['parquet.ts'], + define: { + 'process.env.NODE_DEBUG': 'false', + 'process.env.NODE_ENV': '"production"', + global: 'window', + }, + inject: ['./esbuild-shims.js'], + minify: true, + mainFields: ['browser', 'module', 'main'], + platform: 'browser', // default + plugins: [compressionBrowserPlugin, wasmPlugin], + target: 'es2020', // default }; // configuration for generating test code in browser const testConfig = { - bundle: true, - entryPoints: ['test/browser/main.ts'], - define: { - "process.env.NODE_DEBUG": "false", - "process.env.NODE_ENV": "\"production\"", - global: "window" - }, - inject: ['./esbuild-shims.js'], - minify: false, - mainFields: ["browser", "module", "main"], - platform: 'browser', // default - plugins: [compressionBrowserPlugin, wasmPlugin], - target: "es2020" // default -} + bundle: true, + entryPoints: ['test/browser/main.ts'], + define: { + 'process.env.NODE_DEBUG': 'false', + 'process.env.NODE_ENV': '"production"', + global: 'window', + }, + inject: ['./esbuild-shims.js'], + minify: false, + mainFields: ['browser', 'module', 'main'], + platform: 'browser', // default + plugins: [compressionBrowserPlugin, wasmPlugin], + target: 'es2020', // default +}; const targets = [ - { - ...baseConfig, - globalName: 'parquetjs', - outdir: path.resolve(__dirname, "dist","browser"), - }, - { - ...baseConfig, - format: "esm", - outfile: path.resolve(__dirname, "dist","browser","parquet.esm.js"), - }, - { - ...baseConfig, - format: "cjs", - outfile: path.resolve(__dirname, "dist","browser","parquet.cjs.js"), - }, - // Browser test code below - { - ...testConfig, - outfile: path.resolve(__dirname, "test","browser","main.js"), - } -] + { + ...baseConfig, + globalName: 'parquetjs', + outdir: path.resolve(__dirname, 'dist', 'browser'), + }, + { + ...baseConfig, + format: 'esm', + outfile: path.resolve(__dirname, 'dist', 'browser', 'parquet.esm.js'), + }, + { + ...baseConfig, + format: 'cjs', + outfile: path.resolve(__dirname, 'dist', 'browser', 'parquet.cjs.js'), + }, + // Browser test code below + { + ...testConfig, + outfile: path.resolve(__dirname, 'test', 'browser', 'main.js'), + }, +]; Promise.all(targets.map(esbuild.build)) - .then(results => { - if (results.reduce((m,r)=>m && !r.warnings.length, true)) { - console.log("built with no errors or warnings") - } - }) - .catch(e => { - console.error("Finished with errors: ", e.toString()); - process.exit(1); - }); - - - + .then((results) => { + if (results.reduce((m, r) => m && !r.warnings.length, true)) { + console.log('built with no errors or warnings'); + } + }) + .catch((e) => { + console.error('Finished with errors: ', e.toString()); + process.exit(1); + }); diff --git a/examples/nested.js b/examples/nested.js index 375a7ab4..929b5d95 100644 --- a/examples/nested.js +++ b/examples/nested.js @@ -1,7 +1,7 @@ 'use strict'; const parquet = require('..'); -process.on('unhandledRejection', r => console.error(r)); +process.on('unhandledRejection', (r) => console.error(r)); // write a new file 'fruits.parquet' async function example() { @@ -14,7 +14,7 @@ async function example() { fields: { quantity: { type: 'INT64', repeated: true }, warehouse: { type: 'UTF8' }, - } + }, }, }); @@ -23,27 +23,27 @@ async function example() { await writer.appendRow({ name: 'apples', price: 2.6, - colour: [ 'green', 'red' ], + colour: ['green', 'red'], stock: [ - { quantity: 10, warehouse: "A" }, - { quantity: 20, warehouse: "B" } - ] + { quantity: 10, warehouse: 'A' }, + { quantity: 20, warehouse: 'B' }, + ], }); await writer.appendRow({ name: 'oranges', price: 2.7, - colour: [ 'orange' ], + colour: ['orange'], stock: { quantity: [50, 75], - warehouse: "X" - } + warehouse: 'X', + }, }); await writer.appendRow({ name: 'kiwi', price: 4.2, - colour: [ 'green', 'brown' ] + colour: ['green', 'brown'], }); await writer.close(); @@ -53,7 +53,7 @@ async function example() { { let cursor = reader.getCursor(); let record = null; - while (record = await cursor.next()) { + while ((record = await cursor.next())) { console.log(record); } } @@ -61,14 +61,12 @@ async function example() { { let cursor = reader.getCursor([['name'], ['stock', 'warehouse']]); let record = null; - while (record = await cursor.next()) { + while ((record = await cursor.next())) { console.log(record); } } await reader.close(); - } example(); - diff --git a/examples/reader.js b/examples/reader.js index 0c11ce4b..91e72d29 100644 --- a/examples/reader.js +++ b/examples/reader.js @@ -6,7 +6,7 @@ async function example() { let cursor = reader.getCursor(); let record = null; - while (record = await cursor.next()) { + while ((record = await cursor.next())) { console.log(record); } @@ -14,4 +14,3 @@ async function example() { } example(); - diff --git a/examples/server/README.md b/examples/server/README.md index 54cadde0..f406a067 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -1,6 +1,8 @@ # Example Server + This is a toy server that illustrates how to use the parquetjs library built with esbuild. To run it: + 1. npm install 1. View and edit the files in `views` to taste 1. node app.js diff --git a/examples/server/app.js b/examples/server/app.js index 149d7428..c7e26602 100644 --- a/examples/server/app.js +++ b/examples/server/app.js @@ -1,7 +1,7 @@ -const express = require('express') -const path = require("path") -const app = express() -const port = 3000 +const express = require('express'); +const path = require('path'); +const app = express(); +const port = 3000; app.use(express.static(path.join(__dirname, 'public'))); app.engine('ejs', require('ejs').__express); @@ -9,12 +9,12 @@ app.engine('ejs', require('ejs').__express); app.set('view engine', 'ejs'); app.get('/', (req, res) => { - res.render('parquetFiles', { - title: "Parquet Files", - port: port - }) -}) + res.render('parquetFiles', { + title: 'Parquet Files', + port: port, + }); +}); app.listen(port, () => { - console.log(`Example app listening at http://localhost:${port}`) -}) + console.log(`Example app listening at http://localhost:${port}`); +}); diff --git a/examples/writer.js b/examples/writer.js index d6563697..195ae7f9 100644 --- a/examples/writer.js +++ b/examples/writer.js @@ -4,13 +4,13 @@ const parquet = require('..'); // write a new file 'fruits.parquet' async function example() { let schema = new parquet.ParquetSchema({ - name: { type: 'UTF8' }, - quantity: { type: 'INT64', optional: true }, - price: { type: 'DOUBLE' }, - date: { type: 'TIMESTAMP_MICROS' }, - in_stock: { type: 'BOOLEAN' }, - colour: { type: 'UTF8', repeated: true }, - meta_json: { type: 'BSON', optional: true }, + name: { type: 'UTF8' }, + quantity: { type: 'INT64', optional: true }, + price: { type: 'DOUBLE' }, + date: { type: 'TIMESTAMP_MICROS' }, + in_stock: { type: 'BOOLEAN' }, + colour: { type: 'UTF8', repeated: true }, + meta_json: { type: 'BSON', optional: true }, }); let writer = await parquet.ParquetWriter.openFile(schema, 'fruits.parquet'); @@ -21,7 +21,7 @@ async function example() { price: 2.6, date: new Date(), in_stock: true, - colour: [ 'green', 'red' ] + colour: ['green', 'red'], }); await writer.appendRow({ @@ -30,7 +30,7 @@ async function example() { price: 2.7, date: new Date(), in_stock: true, - colour: [ 'orange' ] + colour: ['orange'], }); await writer.appendRow({ @@ -38,12 +38,11 @@ async function example() { price: 4.2, date: new Date(), in_stock: false, - colour: [ 'green', 'brown' ], - meta_json: { expected_ship_date: new Date() } + colour: ['green', 'brown'], + meta_json: { expected_ship_date: new Date() }, }); await writer.close(); } example(); - diff --git a/lib/bloom/sbbf.ts b/lib/bloom/sbbf.ts index c87b40c0..0eb386a5 100644 --- a/lib/bloom/sbbf.ts +++ b/lib/bloom/sbbf.ts @@ -1,7 +1,7 @@ -import parquet_thrift from "../../gen-nodejs/parquet_types"; +import parquet_thrift from '../../gen-nodejs/parquet_types'; import Long from 'long'; -import XxHasher from "./xxhasher" -import {Block} from "../declare"; +import XxHasher from './xxhasher'; +import { Block } from '../declare'; /** * @class SplitBlockBloomFilter @@ -31,368 +31,374 @@ import {Block} from "../declare"; * .init() */ class SplitBlockBloomFilter { - private static readonly salt: Array = [ - 0x47b6137b, - 0x44974d91, - 0x8824ad5b, - 0xa2b7289d, - 0x705495c7, - 0x2df1424b, - 0x9efc4947, - 0x5c6bfb31 - ] - - // How many bits are in a single block: - // - Blocks are UInt32 arrays - // - There are 8 UInt32 words in each block. - private static readonly WORDS_PER_BLOCK = 8 - private static readonly WORD_SIZE = 32 - private static readonly BITS_PER_BLOCK: number = SplitBlockBloomFilter.WORDS_PER_BLOCK * SplitBlockBloomFilter.WORD_SIZE - - // Default number of blocks in a Split Block Bloom filter (SBBF) - private static readonly NUMBER_OF_BLOCKS: number = 32 - - // The lower bound of SBBF size in bytes. - // Currently this is 1024 - private static readonly LOWER_BOUND_BYTES = SplitBlockBloomFilter.NUMBER_OF_BLOCKS * SplitBlockBloomFilter.BITS_PER_BLOCK / 8; - - // The upper bound of SBBF size, set to default row group size in bytes. - // Note that the subsquent requirements for an effective bloom filter on a row group this size would mean this - // is unacceptably large for a lightweight client application. - public static readonly UPPER_BOUND_BYTES = 128 * 1024 * 1024; - - public static readonly DEFAULT_FALSE_POSITIVE_RATE = 0.001 - public static readonly DEFAULT_DISTINCT_VALUES = 128 * 1024 - - /** - * @function initBlock - * @description initializes a single block - */ - static initBlock(): Block { - return Uint32Array.from(Array(SplitBlockBloomFilter.WORDS_PER_BLOCK).fill(0)) + private static readonly salt: Array = [ + 0x47b6137b, 0x44974d91, 0x8824ad5b, 0xa2b7289d, 0x705495c7, 0x2df1424b, 0x9efc4947, 0x5c6bfb31, + ]; + + // How many bits are in a single block: + // - Blocks are UInt32 arrays + // - There are 8 UInt32 words in each block. + private static readonly WORDS_PER_BLOCK = 8; + private static readonly WORD_SIZE = 32; + private static readonly BITS_PER_BLOCK: number = + SplitBlockBloomFilter.WORDS_PER_BLOCK * SplitBlockBloomFilter.WORD_SIZE; + + // Default number of blocks in a Split Block Bloom filter (SBBF) + private static readonly NUMBER_OF_BLOCKS: number = 32; + + // The lower bound of SBBF size in bytes. + // Currently this is 1024 + private static readonly LOWER_BOUND_BYTES = + (SplitBlockBloomFilter.NUMBER_OF_BLOCKS * SplitBlockBloomFilter.BITS_PER_BLOCK) / 8; + + // The upper bound of SBBF size, set to default row group size in bytes. + // Note that the subsquent requirements for an effective bloom filter on a row group this size would mean this + // is unacceptably large for a lightweight client application. + public static readonly UPPER_BOUND_BYTES = 128 * 1024 * 1024; + + public static readonly DEFAULT_FALSE_POSITIVE_RATE = 0.001; + public static readonly DEFAULT_DISTINCT_VALUES = 128 * 1024; + + /** + * @function initBlock + * @description initializes a single block + */ + static initBlock(): Block { + return Uint32Array.from(Array(SplitBlockBloomFilter.WORDS_PER_BLOCK).fill(0)); + } + + /** + * @function from + * @description initialize a SplitBlockBloomFilter for a single column row group + * from the provided Buffer + * @param buffer a NodeJs Buffer containing bloom filter data for a row group. + */ + static from(buffer: Buffer, rowCount?: number): SplitBlockBloomFilter { + if (buffer.length === 0) { + throw new Error('buffer is empty'); } + const chunkSize = SplitBlockBloomFilter.WORDS_PER_BLOCK; + const uint32sFromBuf = new Uint32Array(buffer.buffer); + let result: Array = []; + const length = uint32sFromBuf.length; - /** - * @function from - * @description initialize a SplitBlockBloomFilter for a single column row group - * from the provided Buffer - * @param buffer a NodeJs Buffer containing bloom filter data for a row group. - */ - static from(buffer: Buffer, rowCount?: number): SplitBlockBloomFilter { - if (buffer.length === 0) { - throw new Error("buffer is empty") - } - const chunkSize = SplitBlockBloomFilter.WORDS_PER_BLOCK - const uint32sFromBuf = new Uint32Array(buffer.buffer) - let result: Array = []; - const length = uint32sFromBuf.length; - - for (let index = 0; index < length; index += chunkSize) { - result.push(uint32sFromBuf.subarray(index, index + chunkSize)); - } - let sb = new SplitBlockBloomFilter() - sb.splitBlockFilter = result - sb.numBlocks = result.length - // these will not be knowable when reading - sb.numDistinctValues = 0 - sb.desiredFalsePositiveRate = 0.0 - return sb; - }; - - /** - * @function getBlockIndex: get a block index to insert a hash value for - * @param h the hash from which to derive a block index (?) - * @param z the number of blocks in the filter - * - * @return a number from 0 to z-1, inclusive - */ - static getBlockIndex(h: Long, z: number): number { - const zLong = Long.fromNumber(z, true) - const hTopBits = Long.fromNumber(h.getHighBitsUnsigned(), true); - return hTopBits.mul(zLong).shiftRightUnsigned(32).getLowBitsUnsigned(); + for (let index = 0; index < length; index += chunkSize) { + result.push(uint32sFromBuf.subarray(index, index + chunkSize)); + } + let sb = new SplitBlockBloomFilter(); + sb.splitBlockFilter = result; + sb.numBlocks = result.length; + // these will not be knowable when reading + sb.numDistinctValues = 0; + sb.desiredFalsePositiveRate = 0.0; + return sb; + } + + /** + * @function getBlockIndex: get a block index to insert a hash value for + * @param h the hash from which to derive a block index (?) + * @param z the number of blocks in the filter + * + * @return a number from 0 to z-1, inclusive + */ + static getBlockIndex(h: Long, z: number): number { + const zLong = Long.fromNumber(z, true); + const hTopBits = Long.fromNumber(h.getHighBitsUnsigned(), true); + return hTopBits.mul(zLong).shiftRightUnsigned(32).getLowBitsUnsigned(); + } + + /** + * @function optimalNumOfBlocks + * + * @description Calculate optimal number of blocks, according to the number of distinct + * values and false positive probability. Using a Bloom filter calculator, the upper bound is + * far too large for client applications. Sourced from: + * https://github.com/apache/parquet-mr/blob/5608695f5777de1eb0899d9075ec9411cfdf31d3/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java#L285 + * + * @param numDistinct The number of distinct values. + * @param falsePositiveRate The false positive rate, a number between 0 and 1 exclusive + * + * @return number: number of bits of given n and p. + */ + static optimalNumOfBlocks(numDistinct: number, falsePositiveRate: number): number { + let m = (-8 * numDistinct) / Math.log(1 - Math.pow(falsePositiveRate, 1.0 / 8)); + + m = (m + SplitBlockBloomFilter.NUMBER_OF_BLOCKS - 1) & ~SplitBlockBloomFilter.NUMBER_OF_BLOCKS; + + // Handle overflow: + const upperShiftL3 = SplitBlockBloomFilter.UPPER_BOUND_BYTES << 3; + if (m > upperShiftL3 || m < 0) { + m = upperShiftL3; } - /** - * @function optimalNumOfBlocks - * - * @description Calculate optimal number of blocks, according to the number of distinct - * values and false positive probability. Using a Bloom filter calculator, the upper bound is - * far too large for client applications. Sourced from: - * https://github.com/apache/parquet-mr/blob/5608695f5777de1eb0899d9075ec9411cfdf31d3/parquet-column/src/main/java/org/apache/parquet/column/values/bloomfilter/BlockSplitBloomFilter.java#L285 - * - * @param numDistinct The number of distinct values. - * @param falsePositiveRate The false positive rate, a number between 0 and 1 exclusive - * - * @return number: number of bits of given n and p. - */ - static optimalNumOfBlocks(numDistinct: number, falsePositiveRate: number): number { - let m = -8 * numDistinct / Math.log(1 - Math.pow(falsePositiveRate, 1.0 / 8)) - - m = (m + SplitBlockBloomFilter.NUMBER_OF_BLOCKS - 1) & (~SplitBlockBloomFilter.NUMBER_OF_BLOCKS); - - // Handle overflow: - const upperShiftL3 = SplitBlockBloomFilter.UPPER_BOUND_BYTES << 3 - if (m > upperShiftL3 || m < 0 ) { - m = upperShiftL3; - } - - // Round numBits up - m = (m + SplitBlockBloomFilter.BITS_PER_BLOCK -1) & ~SplitBlockBloomFilter.BITS_PER_BLOCK - - const lowerBoundShiftL3 = SplitBlockBloomFilter.LOWER_BOUND_BYTES << 3 - if (m < lowerBoundShiftL3 ) { - m = lowerBoundShiftL3; - } + // Round numBits up + m = (m + SplitBlockBloomFilter.BITS_PER_BLOCK - 1) & ~SplitBlockBloomFilter.BITS_PER_BLOCK; - return Math.ceil(m / this.BITS_PER_BLOCK) + const lowerBoundShiftL3 = SplitBlockBloomFilter.LOWER_BOUND_BYTES << 3; + if (m < lowerBoundShiftL3) { + m = lowerBoundShiftL3; } - /** - * @function mask - * @description generate a mask block for a bloom filter block - * @param hashValue: the hash value to generate the mask from - * @private - * - * @return mask Block - */ - static mask(hashValue: Long): Block { - let result: Block = SplitBlockBloomFilter.initBlock() - for (let i = 0; i < result.length; i++) { - const y = hashValue.getLowBitsUnsigned() * SplitBlockBloomFilter.salt[i] - result[i] = result[i] | (1 << (y >>> 27)) - } - return result + return Math.ceil(m / this.BITS_PER_BLOCK); + } + + /** + * @function mask + * @description generate a mask block for a bloom filter block + * @param hashValue: the hash value to generate the mask from + * @private + * + * @return mask Block + */ + static mask(hashValue: Long): Block { + let result: Block = SplitBlockBloomFilter.initBlock(); + for (let i = 0; i < result.length; i++) { + const y = hashValue.getLowBitsUnsigned() * SplitBlockBloomFilter.salt[i]; + result[i] = result[i] | (1 << (y >>> 27)); } - - /** - * @function blockInsert - * @description insert a hash into a Bloom filter Block - * @param b: the block to flip a bit for: is changed - * @param hashValue: the hash value to insert into b - * @private - * - * @return void - */ - static blockInsert(b: Block, hashValue: Long): void { - const masked: Block = this.mask(hashValue) - for (let i = 0; i < masked.length; i++) { - for (let j = 0; j < this.WORD_SIZE; j++) { - const isSet = masked[i] & (2 ** j) - if (isSet) { - b[i] = b[i] | (2 ** j) - } - } + return result; + } + + /** + * @function blockInsert + * @description insert a hash into a Bloom filter Block + * @param b: the block to flip a bit for: is changed + * @param hashValue: the hash value to insert into b + * @private + * + * @return void + */ + static blockInsert(b: Block, hashValue: Long): void { + const masked: Block = this.mask(hashValue); + for (let i = 0; i < masked.length; i++) { + for (let j = 0; j < this.WORD_SIZE; j++) { + const isSet = masked[i] & (2 ** j); + if (isSet) { + b[i] = b[i] | (2 ** j); } + } } - - /** - * @function blockCheck - * @description check if a hashValue exists for this filter - * @param b: the block to check for inclusion - * @param hashValue: the hash to check for should be long - * @private - * - * @return true if hashed item is __probably__ in the data set represented by this filter - * @return false if it is __definitely not__ in the data set. - */ - static blockCheck(b: Block, hashValue: Long): boolean { - const masked: Block = this.mask(hashValue) - for (let i = 0; i < masked.length; i++) { - for (let j = 0; j < this.WORD_SIZE; j++) { - const isSet = masked[i] & (2 ** j) - if (isSet) { - const match = b[i] & (2 ** j) - if (!match) { - return false - } - } - } + } + + /** + * @function blockCheck + * @description check if a hashValue exists for this filter + * @param b: the block to check for inclusion + * @param hashValue: the hash to check for should be long + * @private + * + * @return true if hashed item is __probably__ in the data set represented by this filter + * @return false if it is __definitely not__ in the data set. + */ + static blockCheck(b: Block, hashValue: Long): boolean { + const masked: Block = this.mask(hashValue); + for (let i = 0; i < masked.length; i++) { + for (let j = 0; j < this.WORD_SIZE; j++) { + const isSet = masked[i] & (2 ** j); + if (isSet) { + const match = b[i] & (2 ** j); + if (!match) { + return false; + } } - return true + } } - - /** - * Instance - */ - - private splitBlockFilter: Array = [] - private desiredFalsePositiveRate: number = SplitBlockBloomFilter.DEFAULT_FALSE_POSITIVE_RATE - private numBlocks: number = 0 - private numDistinctValues: number = SplitBlockBloomFilter.DEFAULT_DISTINCT_VALUES - private hashStrategy = new parquet_thrift.BloomFilterHash(new parquet_thrift.XxHash()) - private hasher = new XxHasher() - - private isInitialized(): boolean { return this.splitBlockFilter.length > 0 } - - getFalsePositiveRate(): number { return this.desiredFalsePositiveRate } - getNumDistinct(): number { return this.numDistinctValues } - getNumFilterBlocks(): number { return this.splitBlockFilter.length } - getFilter(): Array { return this.splitBlockFilter } - - /** - * @function optNumFilterBytes - * @description return the actual number of filter bytes set; if the option to numBytes - * was called, this value will be returned. If the options for preferred FPR - * and/or numDistinct were called, this function returns the calculated value. - */ - getNumFilterBytes(): number { - return this.numBlocks * SplitBlockBloomFilter.BITS_PER_BLOCK >>> 3 + return true; + } + + /** + * Instance + */ + + private splitBlockFilter: Array = []; + private desiredFalsePositiveRate: number = SplitBlockBloomFilter.DEFAULT_FALSE_POSITIVE_RATE; + private numBlocks: number = 0; + private numDistinctValues: number = SplitBlockBloomFilter.DEFAULT_DISTINCT_VALUES; + private hashStrategy = new parquet_thrift.BloomFilterHash(new parquet_thrift.XxHash()); + private hasher = new XxHasher(); + + private isInitialized(): boolean { + return this.splitBlockFilter.length > 0; + } + + getFalsePositiveRate(): number { + return this.desiredFalsePositiveRate; + } + getNumDistinct(): number { + return this.numDistinctValues; + } + getNumFilterBlocks(): number { + return this.splitBlockFilter.length; + } + getFilter(): Array { + return this.splitBlockFilter; + } + + /** + * @function optNumFilterBytes + * @description return the actual number of filter bytes set; if the option to numBytes + * was called, this value will be returned. If the options for preferred FPR + * and/or numDistinct were called, this function returns the calculated value. + */ + getNumFilterBytes(): number { + return (this.numBlocks * SplitBlockBloomFilter.BITS_PER_BLOCK) >>> 3; + } + + /** + * @function setOptionFalsePositiveRate + * @description set the desired false positive percentage for this Bloom filter. + * defaults to SplitBlockBLoomFilter.DEFAULT_FALSE_POSITIVE_RATE + * This function does nothing if the filter has already been allocated. + * @param proportion: number, between 0.0 and 1.0, exclusive + */ + setOptionFalsePositiveRate(proportion: number): SplitBlockBloomFilter { + if (this.isInitialized()) { + console.error('filter already initialized. options may no longer be changed.'); + return this; } - - /** - * @function setOptionFalsePositiveRate - * @description set the desired false positive percentage for this Bloom filter. - * defaults to SplitBlockBLoomFilter.DEFAULT_FALSE_POSITIVE_RATE - * This function does nothing if the filter has already been allocated. - * @param proportion: number, between 0.0 and 1.0, exclusive - */ - setOptionFalsePositiveRate(proportion: number): SplitBlockBloomFilter { - if (this.isInitialized()) { - console.error("filter already initialized. options may no longer be changed.") - return this - } - if (proportion <= 0.0 || proportion >= 1.0) { - console.error("falsePositiveProbability. Must be < 1.0 and > 0.0") - return this - } - this.desiredFalsePositiveRate = proportion - return this + if (proportion <= 0.0 || proportion >= 1.0) { + console.error('falsePositiveProbability. Must be < 1.0 and > 0.0'); + return this; } - - /** - * @function setOptionNumDistinct - * @description set the number of expected distinct values for the filter. - * this should generally be <= to the row group size. Defaults to - * SplitBlockBloomFilter.UPPER_BOUND_BYTES - * This function does nothing if the filter has already been allocated. - * @param numDistinct - */ - setOptionNumDistinct(numDistinct: number): SplitBlockBloomFilter { - if (this.isInitialized()) { - console.error("filter already initialized. options may no longer be changed.") - return this - } - if (numDistinct <= 0 || numDistinct > SplitBlockBloomFilter.UPPER_BOUND_BYTES) { - console.error(`invalid numDistinct. Must be > 0 and < ${SplitBlockBloomFilter.UPPER_BOUND_BYTES}`) - return this - } - this.numDistinctValues = numDistinct - return this + this.desiredFalsePositiveRate = proportion; + return this; + } + + /** + * @function setOptionNumDistinct + * @description set the number of expected distinct values for the filter. + * this should generally be <= to the row group size. Defaults to + * SplitBlockBloomFilter.UPPER_BOUND_BYTES + * This function does nothing if the filter has already been allocated. + * @param numDistinct + */ + setOptionNumDistinct(numDistinct: number): SplitBlockBloomFilter { + if (this.isInitialized()) { + console.error('filter already initialized. options may no longer be changed.'); + return this; } - - - /** - * @function nextPwr2 - * @description return the next highest power of 2 above v - * see https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 - * NOTE: cannot use values > 2**31. We are not expecting these values internally, - * so this works as intended. - */ - private static nextPwr2(v:number): number { - v--; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v++; - return v + if (numDistinct <= 0 || numDistinct > SplitBlockBloomFilter.UPPER_BOUND_BYTES) { + console.error(`invalid numDistinct. Must be > 0 and < ${SplitBlockBloomFilter.UPPER_BOUND_BYTES}`); + return this; } - - /** - * @function setOptionNumFilterBytes - * @description set the bytes for this Bloom filter. Set this if you don't want an - * optimal value calculated for you. Rounds up to nearest power of 2 - * This function does nothing if the filter has already been allocated. - * @param numBytes: number, the desired bit size. - */ - setOptionNumFilterBytes(numBytes: number): SplitBlockBloomFilter { - if (this.isInitialized()) { - console.error("filter already initialized. options may no longer be changed.") - return this - } - if (numBytes < SplitBlockBloomFilter.LOWER_BOUND_BYTES || numBytes > SplitBlockBloomFilter.UPPER_BOUND_BYTES) { - console.error(`invalid numBits. Must be > ${SplitBlockBloomFilter.LOWER_BOUND_BYTES} and < ${SplitBlockBloomFilter.UPPER_BOUND_BYTES}`) - return this - } - // numBlocks = Bytes * 8b/Byte * 1Block/256b - this.numBlocks = SplitBlockBloomFilter.nextPwr2(numBytes) * 8 / SplitBlockBloomFilter.BITS_PER_BLOCK - return this + this.numDistinctValues = numDistinct; + return this; + } + + /** + * @function nextPwr2 + * @description return the next highest power of 2 above v + * see https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2 + * NOTE: cannot use values > 2**31. We are not expecting these values internally, + * so this works as intended. + */ + private static nextPwr2(v: number): number { + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v++; + return v; + } + + /** + * @function setOptionNumFilterBytes + * @description set the bytes for this Bloom filter. Set this if you don't want an + * optimal value calculated for you. Rounds up to nearest power of 2 + * This function does nothing if the filter has already been allocated. + * @param numBytes: number, the desired bit size. + */ + setOptionNumFilterBytes(numBytes: number): SplitBlockBloomFilter { + if (this.isInitialized()) { + console.error('filter already initialized. options may no longer be changed.'); + return this; } - - /** - * @function initFilter - * @description initialize the Bloom filter using the options previously provided. - * If numBlocks has not been calculated and set via setOptionNumBytes, we calculate - * the optimal filter size based on number of distinct values and - * percent false positive rate. See setOptionNumDistinct and setOptionFalsePositiveRate - * - * Repeated calls to init do nothing to avoid multiple memory allocations or - * accidental loss of filters. - * @return void - */ - init(): SplitBlockBloomFilter { - if (this.isInitialized()) { - console.error("filter already initialized.") - return this - } - - if (!this.hashStrategy.hasOwnProperty("XXHASH")) { - throw new Error("unsupported hash strategy") - } - - if (this.numBlocks === 0) { - this.numBlocks = SplitBlockBloomFilter.optimalNumOfBlocks( - this.numDistinctValues, this.desiredFalsePositiveRate) >>> 3 - } - - this.splitBlockFilter = Array(this.numBlocks).fill(SplitBlockBloomFilter.initBlock()) - return this + if (numBytes < SplitBlockBloomFilter.LOWER_BOUND_BYTES || numBytes > SplitBlockBloomFilter.UPPER_BOUND_BYTES) { + console.error( + `invalid numBits. Must be > ${SplitBlockBloomFilter.LOWER_BOUND_BYTES} and < ${SplitBlockBloomFilter.UPPER_BOUND_BYTES}` + ); + return this; } - - async hash(value: any): Promise { - if (!this.hashStrategy.hasOwnProperty("XXHASH")) { - throw new Error("unsupported hash strategy") - } - const hashed = await this.hasher.hash64(value) - return Long.fromString(hashed, true, 16) + // numBlocks = Bytes * 8b/Byte * 1Block/256b + this.numBlocks = (SplitBlockBloomFilter.nextPwr2(numBytes) * 8) / SplitBlockBloomFilter.BITS_PER_BLOCK; + return this; + } + + /** + * @function initFilter + * @description initialize the Bloom filter using the options previously provided. + * If numBlocks has not been calculated and set via setOptionNumBytes, we calculate + * the optimal filter size based on number of distinct values and + * percent false positive rate. See setOptionNumDistinct and setOptionFalsePositiveRate + * + * Repeated calls to init do nothing to avoid multiple memory allocations or + * accidental loss of filters. + * @return void + */ + init(): SplitBlockBloomFilter { + if (this.isInitialized()) { + console.error('filter already initialized.'); + return this; } - private insertHash(hashValue: Long): void { - if (!hashValue.unsigned) throw new Error("hashValue must be an unsigned Long") - if (!this.isInitialized()) throw new Error("filter has not been initialized. call init() first") - const i = SplitBlockBloomFilter.getBlockIndex(hashValue, this.splitBlockFilter.length) - SplitBlockBloomFilter.blockInsert(this.splitBlockFilter[i], hashValue); + if (!this.hashStrategy.hasOwnProperty('XXHASH')) { + throw new Error('unsupported hash strategy'); } - /** - * @function insert - * @description add a hash value to this filter - * @param value: an unsigned Long, the value to add. If not a string, will be JSON.stringified - * @return void - */ - async insert(value: any): Promise { - if (!this.isInitialized()) throw new Error("filter has not been initialized. call init() first") - this.insertHash(await this.hash(value)) + if (this.numBlocks === 0) { + this.numBlocks = + SplitBlockBloomFilter.optimalNumOfBlocks(this.numDistinctValues, this.desiredFalsePositiveRate) >>> 3; } - private checkHash(hashValue: Long): boolean { - if (!hashValue.unsigned) throw new Error("hashValue must be an unsigned Long") - if (!this.isInitialized()) throw new Error("filter has not been initialized") - const i = SplitBlockBloomFilter.getBlockIndex(hashValue, this.splitBlockFilter.length) - return SplitBlockBloomFilter.blockCheck(this.splitBlockFilter[i], hashValue); - } - /** - * @function check - * @description check if a hashValue exists for this filter - * @param value: the value to check for. If not a string, will be JSON.stringified - * @return true if hashed item is found in the data set represented by this filter - * @return false if it is __definitely not__ in the data set. - */ - async check(value: any): Promise { - if (!this.isInitialized()) throw new Error("filter has not been initialized") - return this.checkHash(await this.hash(value)) + this.splitBlockFilter = Array(this.numBlocks).fill(SplitBlockBloomFilter.initBlock()); + return this; + } + + async hash(value: any): Promise { + if (!this.hashStrategy.hasOwnProperty('XXHASH')) { + throw new Error('unsupported hash strategy'); } + const hashed = await this.hasher.hash64(value); + return Long.fromString(hashed, true, 16); + } + + private insertHash(hashValue: Long): void { + if (!hashValue.unsigned) throw new Error('hashValue must be an unsigned Long'); + if (!this.isInitialized()) throw new Error('filter has not been initialized. call init() first'); + const i = SplitBlockBloomFilter.getBlockIndex(hashValue, this.splitBlockFilter.length); + SplitBlockBloomFilter.blockInsert(this.splitBlockFilter[i], hashValue); + } + + /** + * @function insert + * @description add a hash value to this filter + * @param value: an unsigned Long, the value to add. If not a string, will be JSON.stringified + * @return void + */ + async insert(value: any): Promise { + if (!this.isInitialized()) throw new Error('filter has not been initialized. call init() first'); + this.insertHash(await this.hash(value)); + } + + private checkHash(hashValue: Long): boolean { + if (!hashValue.unsigned) throw new Error('hashValue must be an unsigned Long'); + if (!this.isInitialized()) throw new Error('filter has not been initialized'); + const i = SplitBlockBloomFilter.getBlockIndex(hashValue, this.splitBlockFilter.length); + return SplitBlockBloomFilter.blockCheck(this.splitBlockFilter[i], hashValue); + } + /** + * @function check + * @description check if a hashValue exists for this filter + * @param value: the value to check for. If not a string, will be JSON.stringified + * @return true if hashed item is found in the data set represented by this filter + * @return false if it is __definitely not__ in the data set. + */ + async check(value: any): Promise { + if (!this.isInitialized()) throw new Error('filter has not been initialized'); + return this.checkHash(await this.hash(value)); + } } -export default SplitBlockBloomFilter +export default SplitBlockBloomFilter; diff --git a/lib/bloom/xxhasher.ts b/lib/bloom/xxhasher.ts index b61a3911..392baa74 100644 --- a/lib/bloom/xxhasher.ts +++ b/lib/bloom/xxhasher.ts @@ -1,7 +1,7 @@ -import xxhash from "xxhash-wasm"; -import Long from "long" +import xxhash from 'xxhash-wasm'; +import Long from 'long'; -type HasherFunc = (input: string, seedHigh?: number, seedLow?: number) => string +type HasherFunc = (input: string, seedHigh?: number, seedLow?: number) => string; /** * @class XxHasher @@ -14,29 +14,31 @@ type HasherFunc = (input: string, seedHigh?: number, seedLow?: number) => string * [xxHash spec](https://github.com/Cyan4973/xxHash/blob/v0.7.0/doc/xxhash_spec.md) */ export default class XxHasher { - private static h64 = xxhash().then(x => x.h64ToString) + private static h64 = xxhash().then((x) => x.h64ToString); - private async hashIt(value: string): Promise { - return (await XxHasher.h64)(value) - } + private async hashIt(value: string): Promise { + return (await XxHasher.h64)(value); + } - /** - * @function hash64 - * @description creates a hash for certain data types. All data is converted using toString() - * prior to hashing. - * @return the 64 big XXHash as a hex-encoded string. - * @param value, must be of type string, Buffer, Uint8Array, Long, boolean, number, or bigint - */ - async hash64(value: any): Promise { - if (typeof value === 'string') return this.hashIt(value) - if (value instanceof Buffer || - value instanceof Uint8Array || - value instanceof Long || - typeof value === 'boolean' || - typeof value === 'number' || - typeof value === 'bigint') { - return this.hashIt(value.toString()) - } - throw new Error("unsupported type: " + value) + /** + * @function hash64 + * @description creates a hash for certain data types. All data is converted using toString() + * prior to hashing. + * @return the 64 big XXHash as a hex-encoded string. + * @param value, must be of type string, Buffer, Uint8Array, Long, boolean, number, or bigint + */ + async hash64(value: any): Promise { + if (typeof value === 'string') return this.hashIt(value); + if ( + value instanceof Buffer || + value instanceof Uint8Array || + value instanceof Long || + typeof value === 'boolean' || + typeof value === 'number' || + typeof value === 'bigint' + ) { + return this.hashIt(value.toString()); } + throw new Error('unsupported type: ' + value); + } } diff --git a/lib/bloomFilterIO/bloomFilterReader.ts b/lib/bloomFilterIO/bloomFilterReader.ts index c396b26e..a63a89c1 100644 --- a/lib/bloomFilterIO/bloomFilterReader.ts +++ b/lib/bloomFilterIO/bloomFilterReader.ts @@ -1,12 +1,10 @@ -import * as parquet_util from "../util"; -import parquet_thrift from "../../gen-nodejs/parquet_types"; -import sbbf from "../bloom/sbbf"; -import { ParquetEnvelopeReader } from "../reader" -import { ColumnChunkData } from "../declare"; - -const filterColumnChunksWithBloomFilters = ( - columnChunkDataCollection: Array -) => { +import * as parquet_util from '../util'; +import parquet_thrift from '../../gen-nodejs/parquet_types'; +import sbbf from '../bloom/sbbf'; +import { ParquetEnvelopeReader } from '../reader'; +import { ColumnChunkData } from '../declare'; + +const filterColumnChunksWithBloomFilters = (columnChunkDataCollection: Array) => { return columnChunkDataCollection.filter((columnChunk) => { return columnChunk.column.meta_data?.bloom_filter_offset; }); @@ -19,10 +17,10 @@ type bloomFilterOffsetData = { }; const toInteger = (buffer: Buffer) => { - const integer = parseInt(buffer.toString("hex"), 16); + const integer = parseInt(buffer.toString('hex'), 16); if (integer >= Number.MAX_VALUE) { - throw Error("Number exceeds Number.MAX_VALUE: Godspeed"); + throw Error('Number exceeds Number.MAX_VALUE: Godspeed'); } return integer; @@ -31,15 +29,12 @@ const toInteger = (buffer: Buffer) => { export const parseBloomFilterOffsets = ( ColumnChunkDataCollection: Array ): Array => { - return ColumnChunkDataCollection.map(({rowGroupIndex,column}) => { - const { - bloom_filter_offset: bloomOffset, - path_in_schema: pathInSchema, - } = column.meta_data || {}; + return ColumnChunkDataCollection.map(({ rowGroupIndex, column }) => { + const { bloom_filter_offset: bloomOffset, path_in_schema: pathInSchema } = column.meta_data || {}; return { offsetBytes: toInteger(bloomOffset!.buffer), - columnName: pathInSchema!.join(","), + columnName: pathInSchema!.join(','), rowGroupIndex, }; }); @@ -53,20 +48,14 @@ const getBloomFilterHeader = async ( let bloomFilterHeaderData; try { - bloomFilterHeaderData = await envelopeReader.read( - offsetBytes, - headerByteSizeEstimate - ); + bloomFilterHeaderData = await envelopeReader.read(offsetBytes, headerByteSizeEstimate); } catch (e) { if (typeof e === 'string') throw new Error(e); - else throw e + else throw e; } const bloomFilterHeader = new parquet_thrift.BloomFilterHeader(); - const sizeOfBloomFilterHeader = parquet_util.decodeThrift( - bloomFilterHeader, - bloomFilterHeaderData - ); + const sizeOfBloomFilterHeader = parquet_util.decodeThrift(bloomFilterHeader, bloomFilterHeaderData); return { bloomFilterHeader, @@ -78,24 +67,18 @@ const readFilterData = async ( offsetBytes: number, envelopeReader: InstanceType ): Promise => { - const { - bloomFilterHeader, - sizeOfBloomFilterHeader, - } = await getBloomFilterHeader(offsetBytes, envelopeReader); + const { bloomFilterHeader, sizeOfBloomFilterHeader } = await getBloomFilterHeader(offsetBytes, envelopeReader); const { numBytes: filterByteSize } = bloomFilterHeader; try { const filterBlocksOffset = offsetBytes + sizeOfBloomFilterHeader; - const buffer = await envelopeReader.read( - filterBlocksOffset, - filterByteSize - ); + const buffer = await envelopeReader.read(filterBlocksOffset, filterByteSize); return buffer; } catch (e) { if (typeof e === 'string') throw new Error(e); - else throw e + else throw e; } }; @@ -103,35 +86,22 @@ const readFilterDataFrom = ( offsets: Array, envelopeReader: InstanceType ): Promise> => { - return Promise.all( - offsets.map((offset) => readFilterData(offset, envelopeReader)) - ); + return Promise.all(offsets.map((offset) => readFilterData(offset, envelopeReader))); }; -export const siftAllByteOffsets = ( - columnChunkDataCollection: Array -): Array => { - return parseBloomFilterOffsets( - filterColumnChunksWithBloomFilters(columnChunkDataCollection) - ); +export const siftAllByteOffsets = (columnChunkDataCollection: Array): Array => { + return parseBloomFilterOffsets(filterColumnChunksWithBloomFilters(columnChunkDataCollection)); }; export const getBloomFiltersFor = async ( paths: Array, envelopeReader: InstanceType ) => { - const columnChunkDataCollection = envelopeReader.getAllColumnChunkDataFor( - paths - ); + const columnChunkDataCollection = envelopeReader.getAllColumnChunkDataFor(paths); const bloomFilterOffsetData = siftAllByteOffsets(columnChunkDataCollection); - const offsetByteValues = bloomFilterOffsetData.map( - ({ offsetBytes }) => offsetBytes - ); - - const filterBlocksBuffers: Array = await readFilterDataFrom( - offsetByteValues, - envelopeReader - ); + const offsetByteValues = bloomFilterOffsetData.map(({ offsetBytes }) => offsetBytes); + + const filterBlocksBuffers: Array = await readFilterDataFrom(offsetByteValues, envelopeReader); return filterBlocksBuffers.map((buffer, index) => { const { columnName, rowGroupIndex } = bloomFilterOffsetData[index]; diff --git a/lib/bloomFilterIO/bloomFilterWriter.ts b/lib/bloomFilterIO/bloomFilterWriter.ts index 34e676c5..c6ac0de1 100644 --- a/lib/bloomFilterIO/bloomFilterWriter.ts +++ b/lib/bloomFilterIO/bloomFilterWriter.ts @@ -1,9 +1,9 @@ -import * as parquet_util from "../util"; -import parquet_thrift from "../../gen-nodejs/parquet_types"; -import SplitBlockBloomFilter from "../bloom/sbbf"; +import * as parquet_util from '../util'; +import parquet_thrift from '../../gen-nodejs/parquet_types'; +import SplitBlockBloomFilter from '../bloom/sbbf'; -import { Block } from "../declare"; -import Int64 from 'node-int64' +import { Block } from '../declare'; +import Int64 from 'node-int64'; export type createSBBFParams = { numFilterBytes?: number; @@ -21,11 +21,9 @@ export const createSBBF = (params: createSBBFParams): SplitBlockBloomFilter => { if (!hasOptions) return bloomFilter.init(); - if (numFilterBytes) - return bloomFilter.setOptionNumFilterBytes(numFilterBytes).init(); + if (numFilterBytes) return bloomFilter.setOptionNumFilterBytes(numFilterBytes).init(); - if (falsePositiveRate) - bloomFilter.setOptionFalsePositiveRate(falsePositiveRate); + if (falsePositiveRate) bloomFilter.setOptionFalsePositiveRate(falsePositiveRate); if (numDistinct) bloomFilter.setOptionNumDistinct(numDistinct); diff --git a/lib/browser/compression.js b/lib/browser/compression.js index 85499fd2..dab9618b 100644 --- a/lib/browser/compression.js +++ b/lib/browser/compression.js @@ -3,76 +3,76 @@ const zlib = require('zlib'); const snappy = require('snappyjs'); const PARQUET_COMPRESSION_METHODS = { - 'UNCOMPRESSED': { - deflate: deflate_identity, - inflate: inflate_identity - }, - 'GZIP': { - deflate: deflate_gzip, - inflate: inflate_gzip - }, - 'SNAPPY': { - deflate: deflate_snappy, - inflate: inflate_snappy - }, + UNCOMPRESSED: { + deflate: deflate_identity, + inflate: inflate_identity, + }, + GZIP: { + deflate: deflate_gzip, + inflate: inflate_gzip, + }, + SNAPPY: { + deflate: deflate_snappy, + inflate: inflate_snappy, + }, }; /** * Deflate a value using compression method `method` */ async function deflate(method, value) { - if (!(method in PARQUET_COMPRESSION_METHODS)) { - throw 'invalid compression method: ' + method; - } + if (!(method in PARQUET_COMPRESSION_METHODS)) { + throw 'invalid compression method: ' + method; + } - return PARQUET_COMPRESSION_METHODS[method].deflate(value); + return PARQUET_COMPRESSION_METHODS[method].deflate(value); } function deflate_identity(value) { - return buffer_from_result(value); + return buffer_from_result(value); } function deflate_gzip(value) { - return zlib.gzipSync(value); + return zlib.gzipSync(value); } function deflate_snappy(value) { - const compressedValue = snappy.compress(value); - return buffer_from_result(compressedValue); + const compressedValue = snappy.compress(value); + return buffer_from_result(compressedValue); } /** * Inflate a value using compression method `method` */ async function inflate(method, value) { - if (!(method in PARQUET_COMPRESSION_METHODS)) { - throw 'invalid compression method: ' + method; - } + if (!(method in PARQUET_COMPRESSION_METHODS)) { + throw 'invalid compression method: ' + method; + } - return await PARQUET_COMPRESSION_METHODS[method].inflate(value); + return await PARQUET_COMPRESSION_METHODS[method].inflate(value); } function inflate_identity(value) { - return buffer_from_result(value); + return buffer_from_result(value); } function inflate_gzip(value) { - return zlib.gunzipSync(value); + return zlib.gunzipSync(value); } function inflate_snappy(value) { - const uncompressedValue = snappy.uncompress(value); - return buffer_from_result(uncompressedValue); + const uncompressedValue = snappy.uncompress(value); + return buffer_from_result(uncompressedValue); } function buffer_from_result(result) { - if (Buffer.isBuffer(result)) { - return result; - } else { - return Buffer.from(result); - } + if (Buffer.isBuffer(result)) { + return result; + } else { + return Buffer.from(result); + } } -exports.PARQUET_COMPRESSION_METHODS = PARQUET_COMPRESSION_METHODS -exports.deflate = deflate -exports.inflate = inflate +exports.PARQUET_COMPRESSION_METHODS = PARQUET_COMPRESSION_METHODS; +exports.deflate = deflate; +exports.inflate = inflate; diff --git a/lib/bufferReader.ts b/lib/bufferReader.ts index bf519dea..a29a3823 100644 --- a/lib/bufferReader.ts +++ b/lib/bufferReader.ts @@ -1,30 +1,30 @@ -import { Statistics } from "../gen-nodejs/parquet_types" -import { ParquetEnvelopeReader } from "./reader" -import { FileMetaDataExt } from "./declare" +import { Statistics } from '../gen-nodejs/parquet_types'; +import { ParquetEnvelopeReader } from './reader'; +import { FileMetaDataExt } from './declare'; export interface BufferReaderOptions { - maxSpan?: number, - maxLength?: number, - queueWait?: number + maxSpan?: number; + maxLength?: number; + queueWait?: number; default_dictionary_size?: number; - metadata?: FileMetaDataExt - rawStatistics?: Statistics + metadata?: FileMetaDataExt; + rawStatistics?: Statistics; } interface BufferReaderQueueRow { - offset: number, - length: number, - resolve: (buf: Buffer) => void - reject: unknown + offset: number; + length: number; + resolve: (buf: Buffer) => void; + reject: unknown; } export default class BufferReader { - maxSpan: number - maxLength: number - queueWait: number - scheduled?: boolean - queue: Array - envelopeReader: ParquetEnvelopeReader + maxSpan: number; + maxLength: number; + queueWait: number; + scheduled?: boolean; + queue: Array; + envelopeReader: ParquetEnvelopeReader; constructor(envelopeReader: ParquetEnvelopeReader, options: BufferReaderOptions) { options = options || {}; @@ -39,14 +39,14 @@ export default class BufferReader { read(offset: number, length: number): Promise { if (!this.scheduled) { this.scheduled = true; - setTimeout( () => { + setTimeout(() => { this.scheduled = false; this.processQueue(); - },this.queueWait); + }, this.queueWait); } - return new Promise( (resolve, reject) => { - this.queue.push({offset,length,resolve,reject}); + return new Promise((resolve, reject) => { + this.queue.push({ offset, length, resolve, reject }); }); } @@ -54,7 +54,7 @@ export default class BufferReader { const queue = this.queue; if (!queue.length) return; this.queue = []; - queue.sort( (a,b) => a.offset - b.offset); + queue.sort((a, b) => a.offset - b.offset); var subqueue: Array = []; @@ -66,21 +66,21 @@ export default class BufferReader { const processQueue = subqueue; subqueue = []; - const lastElement = processQueue[processQueue.length-1]; + const lastElement = processQueue[processQueue.length - 1]; const start = processQueue[0].offset; - const finish = lastElement.offset +lastElement.length; + const finish = lastElement.offset + lastElement.length; const buffer = await this.envelopeReader.readFn(start, finish - start); - processQueue.forEach(async d => { + processQueue.forEach(async (d) => { d.resolve(buffer.subarray(d.offset - start, d.offset + d.length - start)); }); }; - queue.forEach((d,i) => { - const prev = queue[i-1]; - if (!prev || (d.offset - (prev.offset + prev.length)) < this.maxSpan) { + queue.forEach((d, i) => { + const prev = queue[i - 1]; + if (!prev || d.offset - (prev.offset + prev.length) < this.maxSpan) { subqueue.push(d); - if ( (d.offset + d.length) - subqueue[0].offset > this.maxLength) { + if (d.offset + d.length - subqueue[0].offset > this.maxLength) { readSubqueue(); } } else { @@ -90,4 +90,4 @@ export default class BufferReader { }); readSubqueue(); } -}; +} diff --git a/lib/codec/index.ts b/lib/codec/index.ts index 85a1e507..f32072c6 100644 --- a/lib/codec/index.ts +++ b/lib/codec/index.ts @@ -1,5 +1,4 @@ -export * as PLAIN from './plain' -export * as RLE from './rle' -export * as PLAIN_DICTIONARY from './plain_dictionary' -export * as RLE_DICTIONARY from './plain_dictionary' - +export * as PLAIN from './plain'; +export * as RLE from './rle'; +export * as PLAIN_DICTIONARY from './plain_dictionary'; +export * as RLE_DICTIONARY from './plain_dictionary'; diff --git a/lib/codec/plain.ts b/lib/codec/plain.ts index a5fbb359..d950cf9c 100644 --- a/lib/codec/plain.ts +++ b/lib/codec/plain.ts @@ -1,5 +1,5 @@ -import INT53 from "int53"; -import { Cursor, Options } from "./types"; +import INT53 from 'int53'; +import { Cursor, Options } from './types'; function encodeValues_BOOLEAN(values: Array) { let buf = Buffer.alloc(Math.ceil(values.length / 8)); @@ -32,7 +32,7 @@ function encodeValues_INT32(values: Array, opts: Options) { let buf = Buffer.alloc(4 * values.length); for (let i = 0; i < values.length; i++) { if (isDecimal) { - buf.writeInt32LE(values[i] * Math.pow(10, scale), i * 4); + buf.writeInt32LE(values[i] * Math.pow(10, scale), i * 4); } else { buf.writeInt32LE(values[i], i * 4); } @@ -67,7 +67,7 @@ function encodeValues_INT64(values: Array, opts: Options) { let buf = Buffer.alloc(8 * values.length); for (let i = 0; i < values.length; i++) { if (isDecimal) { - buf.writeBigInt64LE(BigInt(Math.floor(values[i] * Math.pow(10, scale))), i * 8); + buf.writeBigInt64LE(BigInt(Math.floor(values[i] * Math.pow(10, scale))), i * 8); } else { buf.writeBigInt64LE(BigInt(values[i]), i * 8); } @@ -102,7 +102,7 @@ function decodeValues_DECIMAL(cursor: Cursor, count: number, opts: Options) { // Default scale to 0 per spec const scale = opts.scale || 0; - const name = opts.name || undefined + const name = opts.name || undefined; if (!precision) { throw `missing option: precision (required for DECIMAL) for column: ${name}`; } @@ -118,7 +118,7 @@ function decodeValues_DECIMAL(cursor: Cursor, count: number, opts: Options) { bufferFunction = (offset: number) => cursor.buffer.readBigInt64LE(offset); } for (let i = 0; i < count; ++i) { - const bufferSize = cursor.size || 0 + const bufferSize = cursor.size || 0; if (bufferSize === 0 || cursor.offset < bufferSize) { const fullValue = bufferFunction(cursor.offset); const valueWithDecimalApplied = Number(fullValue) / Math.pow(10, scale); @@ -237,12 +237,9 @@ function decodeValues_BYTE_ARRAY(cursor: Cursor, count: number) { return values; } -function encodeValues_FIXED_LEN_BYTE_ARRAY( - values: Array, - opts: Options -) { +function encodeValues_FIXED_LEN_BYTE_ARRAY(values: Array, opts: Options) { if (!opts.typeLength) { - throw "missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)"; + throw 'missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)'; } const returnedValues: Array = []; @@ -250,107 +247,96 @@ function encodeValues_FIXED_LEN_BYTE_ARRAY( returnedValues[i] = Buffer.from(values[i]); if (returnedValues[i].length !== opts.typeLength) { - throw "invalid value for FIXED_LEN_BYTE_ARRAY: " + returnedValues[i]; + throw 'invalid value for FIXED_LEN_BYTE_ARRAY: ' + returnedValues[i]; } } return Buffer.concat(returnedValues); } -function decodeValues_FIXED_LEN_BYTE_ARRAY( - cursor: Cursor, - count: number, - opts: Options -) { +function decodeValues_FIXED_LEN_BYTE_ARRAY(cursor: Cursor, count: number, opts: Options) { let values = []; - const typeLength = - opts.typeLength ?? (opts.column ? opts.column.typeLength : undefined); + const typeLength = opts.typeLength ?? (opts.column ? opts.column.typeLength : undefined); if (!typeLength) { - throw "missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)"; + throw 'missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)'; } for (let i = 0; i < count; ++i) { - values.push( - cursor.buffer.subarray(cursor.offset, cursor.offset + typeLength) - ); + values.push(cursor.buffer.subarray(cursor.offset, cursor.offset + typeLength)); cursor.offset += typeLength; } return values; } -type ValidValueTypes = "BOOLEAN" | "INT32" | "INT64" | "INT96" | "FLOAT" | "DOUBLE" | "BYTE_ARRAY" | "FIXED_LEN_BYTE_ARRAY" - -export const encodeValues = function ( - type: ValidValueTypes | string, - values: Array, - opts: Options -) { +type ValidValueTypes = + | 'BOOLEAN' + | 'INT32' + | 'INT64' + | 'INT96' + | 'FLOAT' + | 'DOUBLE' + | 'BYTE_ARRAY' + | 'FIXED_LEN_BYTE_ARRAY'; + +export const encodeValues = function (type: ValidValueTypes | string, values: Array, opts: Options) { switch (type) { - case "BOOLEAN": + case 'BOOLEAN': return encodeValues_BOOLEAN(values as Array); - case "INT32": + case 'INT32': return encodeValues_INT32(values as Array, opts); - case "INT64": + case 'INT64': return encodeValues_INT64(values as Array, opts); - case "INT96": + case 'INT96': return encodeValues_INT96(values as Array); - case "FLOAT": + case 'FLOAT': return encodeValues_FLOAT(values as Array); - case "DOUBLE": + case 'DOUBLE': return encodeValues_DOUBLE(values as Array); - case "BYTE_ARRAY": + case 'BYTE_ARRAY': return encodeValues_BYTE_ARRAY(values as Array); - case "FIXED_LEN_BYTE_ARRAY": - return encodeValues_FIXED_LEN_BYTE_ARRAY( - values as Array, - opts - ); + case 'FIXED_LEN_BYTE_ARRAY': + return encodeValues_FIXED_LEN_BYTE_ARRAY(values as Array, opts); default: - throw "unsupported type: " + type; + throw 'unsupported type: ' + type; } }; -export const decodeValues = function ( - type: ValidValueTypes | string, - cursor: Cursor, - count: number, - opts: Options -) { +export const decodeValues = function (type: ValidValueTypes | string, cursor: Cursor, count: number, opts: Options) { switch (type) { - case "BOOLEAN": + case 'BOOLEAN': return decodeValues_BOOLEAN(cursor, count); - case "INT32": + case 'INT32': return decodeValues_INT32(cursor, count, opts); - case "INT64": + case 'INT64': return decodeValues_INT64(cursor, count, opts); - case "INT96": + case 'INT96': return decodeValues_INT96(cursor, count); - case "FLOAT": + case 'FLOAT': return decodeValues_FLOAT(cursor, count); - case "DOUBLE": + case 'DOUBLE': return decodeValues_DOUBLE(cursor, count); - case "BYTE_ARRAY": + case 'BYTE_ARRAY': return decodeValues_BYTE_ARRAY(cursor, count); - case "FIXED_LEN_BYTE_ARRAY": + case 'FIXED_LEN_BYTE_ARRAY': return decodeValues_FIXED_LEN_BYTE_ARRAY(cursor, count, opts); default: - throw "unsupported type: " + type; + throw 'unsupported type: ' + type; } }; diff --git a/lib/codec/plain_dictionary.ts b/lib/codec/plain_dictionary.ts index 963baec8..916730b3 100644 --- a/lib/codec/plain_dictionary.ts +++ b/lib/codec/plain_dictionary.ts @@ -1,8 +1,8 @@ -import * as rle from './rle' -import { Cursor, Options } from './types' +import * as rle from './rle'; +import { Cursor, Options } from './types'; -export const decodeValues = function(type: string, cursor: Cursor, count: number, opts: Options) { - const bitWidth = cursor.buffer.subarray(cursor.offset, cursor.offset+1).readInt8(0); +export const decodeValues = function (type: string, cursor: Cursor, count: number, opts: Options) { + const bitWidth = cursor.buffer.subarray(cursor.offset, cursor.offset + 1).readInt8(0); cursor.offset += 1; return rle.decodeValues(type, cursor, count, Object.assign({}, opts, { disableEnvelope: true, bitWidth })); }; diff --git a/lib/codec/rle.ts b/lib/codec/rle.ts index 0e519587..6c2f5d87 100644 --- a/lib/codec/rle.ts +++ b/lib/codec/rle.ts @@ -2,8 +2,8 @@ // // https://github.com/apache/parquet-format/blob/master/Encodings.md -import varint from 'varint' -import { Cursor } from './types' +import varint from 'varint'; +import { Cursor } from './types'; function encodeRunBitpacked(values: Array, opts: { bitWidth: number }) { for (let i = 0; i < values.length % 8; i++) { @@ -13,19 +13,16 @@ function encodeRunBitpacked(values: Array, opts: { bitWidth: number }) { let buf = Buffer.alloc(Math.ceil(opts.bitWidth * (values.length / 8))); for (let b = 0; b < opts.bitWidth * values.length; ++b) { if ((values[Math.floor(b / opts.bitWidth)] & (1 << b % opts.bitWidth)) > 0) { - buf[Math.floor(b / 8)] |= (1 << (b % 8)); + buf[Math.floor(b / 8)] |= 1 << b % 8; } } - return Buffer.concat([ - Buffer.from(varint.encode(((values.length / 8) << 1) | 1)), - buf - ]); + return Buffer.concat([Buffer.from(varint.encode(((values.length / 8) << 1) | 1)), buf]); } function encodeRunRepeated(value: number, count: number, opts: { bitWidth: number }) { let buf = Buffer.alloc(Math.ceil(opts.bitWidth / 8)); - let remainingValue = value + let remainingValue = value; // This is encoded LSB to MSB, so we pick off the least // significant byte and shift to get the next one. @@ -34,27 +31,27 @@ function encodeRunRepeated(value: number, count: number, opts: { bitWidth: numbe remainingValue = remainingValue >> 8; } - return Buffer.concat([ - Buffer.from(varint.encode(count << 1)), - buf - ]); + return Buffer.concat([Buffer.from(varint.encode(count << 1)), buf]); } function unknownToParsedInt(value: string | number) { if (typeof value === 'string') { - return parseInt(value, 10) + return parseInt(value, 10); } else { - return value + return value; } } -export const encodeValues = function(type: string, values: Array, opts: { bitWidth: number, disableEnvelope?: boolean }) { +export const encodeValues = function ( + type: string, + values: Array, + opts: { bitWidth: number; disableEnvelope?: boolean } +) { if (!('bitWidth' in opts)) { throw 'bitWidth is required'; } switch (type) { - case 'BOOLEAN': case 'INT32': case 'INT64': @@ -72,19 +69,19 @@ export const encodeValues = function(type: string, values: Array, opts: for (let i = 0; i < values.length; i++) { // If we are at the beginning of a run and the next value is same we start // collecting repeated values - if ( repeats === 0 && run.length % 8 === 0 && values[i] === values[i+1]) { + if (repeats === 0 && run.length % 8 === 0 && values[i] === values[i + 1]) { // If we have any data in runs we need to encode them if (run.length) { buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]); run = []; } repeats = 1; - } else if (repeats > 0 && values[i] === values[i-1]) { - repeats += 1; + } else if (repeats > 0 && values[i] === values[i - 1]) { + repeats += 1; } else { // If values changes we need to post any previous repeated values if (repeats) { - buf = Buffer.concat([buf, encodeRunRepeated(values[i-1], repeats, opts)]); + buf = Buffer.concat([buf, encodeRunRepeated(values[i - 1], repeats, opts)]); repeats = 0; } run.push(values[i]); @@ -92,7 +89,7 @@ export const encodeValues = function(type: string, values: Array, opts: } if (repeats) { - buf = Buffer.concat([buf, encodeRunRepeated(values[values.length-1], repeats, opts)]); + buf = Buffer.concat([buf, encodeRunRepeated(values[values.length - 1], repeats, opts)]); } else if (run.length) { buf = Buffer.concat([buf, encodeRunBitpacked(run, opts)]); } @@ -108,15 +105,15 @@ export const encodeValues = function(type: string, values: Array, opts: return envelope; }; -function decodeRunBitpacked(cursor : Cursor, count: number, opts: { bitWidth: number }) { +function decodeRunBitpacked(cursor: Cursor, count: number, opts: { bitWidth: number }) { if (count % 8 !== 0) { throw 'must be a multiple of 8'; } let values = new Array(count).fill(0); for (let b = 0; b < opts.bitWidth * count; ++b) { - if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << (b % 8))) { - values[Math.floor(b / opts.bitWidth)] |= (1 << b % opts.bitWidth); + if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << b % 8)) { + values[Math.floor(b / opts.bitWidth)] |= 1 << b % opts.bitWidth; } } @@ -129,7 +126,7 @@ function decodeRunRepeated(cursor: Cursor, count: number, opts: { bitWidth: numb let value = 0; for (let i = 0; i < bytesNeededForFixedBitWidth; ++i) { - const byte = cursor.buffer[cursor.offset] + const byte = cursor.buffer[cursor.offset]; // Bytes are stored LSB to MSB, so we need to shift // each new byte appropriately. value += byte << (i * 8); @@ -139,7 +136,12 @@ function decodeRunRepeated(cursor: Cursor, count: number, opts: { bitWidth: numb return new Array(count).fill(value); } -export const decodeValues = function(_: string, cursor: Cursor, count: number, opts: { bitWidth: number, disableEnvelope?: boolean }) { +export const decodeValues = function ( + _: string, + cursor: Cursor, + count: number, + opts: { bitWidth: number; disableEnvelope?: boolean } +) { if (!('bitWidth' in opts)) { throw 'bitWidth is required'; } @@ -164,10 +166,10 @@ export const decodeValues = function(_: string, cursor: Cursor, count: number, o values.push(res[i]); } } - values = values.slice(0,count); + values = values.slice(0, count); if (values.length !== count) { - throw "invalid RLE encoding"; + throw 'invalid RLE encoding'; } return values; diff --git a/lib/codec/types.ts b/lib/codec/types.ts index c4b0ce1c..e91e5315 100644 --- a/lib/codec/types.ts +++ b/lib/codec/types.ts @@ -1,30 +1,30 @@ -import { PrimitiveType } from "../declare"; -import { ParquetCodec, OriginalType, ParquetField } from "../declare"; -import { Statistics } from "../../gen-nodejs/parquet_types"; +import { PrimitiveType } from '../declare'; +import { ParquetCodec, OriginalType, ParquetField } from '../declare'; +import { Statistics } from '../../gen-nodejs/parquet_types'; export interface Options { - typeLength: number, - bitWidth: number, - disableEnvelope?: boolean - primitiveType?: PrimitiveType; - originalType?: OriginalType; - encoding?: ParquetCodec; - compression?: string, - column?: ParquetField, - rawStatistics?: Statistics, - cache?: unknown, - dictionary?: Array - num_values?: number - rLevelMax?: number, - dLevelMax?: number, - type?: string, - name?: string, - precision?: number, - scale?: number + typeLength: number; + bitWidth: number; + disableEnvelope?: boolean; + primitiveType?: PrimitiveType; + originalType?: OriginalType; + encoding?: ParquetCodec; + compression?: string; + column?: ParquetField; + rawStatistics?: Statistics; + cache?: unknown; + dictionary?: Array; + num_values?: number; + rLevelMax?: number; + dLevelMax?: number; + type?: string; + name?: string; + precision?: number; + scale?: number; } export interface Cursor { - buffer: Buffer, - offset: number, - size?: number, + buffer: Buffer; + offset: number; + size?: number; } diff --git a/lib/compression.ts b/lib/compression.ts index ca9d63eb..1d19eb08 100644 --- a/lib/compression.ts +++ b/lib/compression.ts @@ -1,36 +1,36 @@ -import zlib from 'zlib' -import snappy from 'snappyjs' -import { compress as brotliCompress, decompress as brotliDecompress } from 'brotli-wasm' +import zlib from 'zlib'; +import snappy from 'snappyjs'; +import { compress as brotliCompress, decompress as brotliDecompress } from 'brotli-wasm'; -type d_identity = (value: ArrayBuffer | Buffer | Uint8Array ) => ArrayBuffer | Buffer | Uint8Array -type d_gzip = (value: ArrayBuffer | Buffer | string ) => Buffer -type d_snappy = (value: ArrayBuffer | Buffer | Uint8Array ) => ArrayBuffer | Buffer | Uint8Array -type d_brotli = (value: Uint8Array ) => Promise +type d_identity = (value: ArrayBuffer | Buffer | Uint8Array) => ArrayBuffer | Buffer | Uint8Array; +type d_gzip = (value: ArrayBuffer | Buffer | string) => Buffer; +type d_snappy = (value: ArrayBuffer | Buffer | Uint8Array) => ArrayBuffer | Buffer | Uint8Array; +type d_brotli = (value: Uint8Array) => Promise; interface PARQUET_COMPRESSION_METHODS { - [key:string]: { - deflate: (value: any) => Buffer | Promise - inflate: (value: any) => Buffer | Promise - } + [key: string]: { + deflate: (value: any) => Buffer | Promise; + inflate: (value: any) => Buffer | Promise; + }; } // LZO compression is disabled. See: https://github.com/LibertyDSNP/parquetjs/issues/18 export const PARQUET_COMPRESSION_METHODS: PARQUET_COMPRESSION_METHODS = { - 'UNCOMPRESSED': { + UNCOMPRESSED: { deflate: deflate_identity, - inflate: inflate_identity + inflate: inflate_identity, }, - 'GZIP': { + GZIP: { deflate: deflate_gzip, - inflate: inflate_gzip + inflate: inflate_gzip, }, - 'SNAPPY': { + SNAPPY: { deflate: deflate_snappy, - inflate: inflate_snappy + inflate: inflate_snappy, }, - 'BROTLI': { + BROTLI: { deflate: deflate_brotli, - inflate: inflate_brotli - } + inflate: inflate_brotli, + }, }; /** @@ -58,12 +58,14 @@ function deflate_snappy(value: ArrayBuffer | Buffer | Uint8Array) { } async function deflate_brotli(value: Uint8Array) { - const compressedContent = await brotliCompress(value/*, { + const compressedContent = await brotliCompress( + value /*, { mode: 0, quality: 8, lgwin: 22 } - */) + */ + ); return Buffer.from(compressedContent); } @@ -93,7 +95,7 @@ function inflate_snappy(value: ArrayBuffer | Buffer | Uint8Array) { } async function inflate_brotli(value: Uint8Array) { - const uncompressedContent = await brotliDecompress(value) + const uncompressedContent = await brotliDecompress(value); return Buffer.from(uncompressedContent); } diff --git a/lib/custom.d.ts b/lib/custom.d.ts index 9ee74add..79dc2738 100644 --- a/lib/custom.d.ts +++ b/lib/custom.d.ts @@ -1,11 +1,9 @@ - declare module 'int53' { - export const writeInt64LE: (value: number, buf: Buffer, num: number) => void - export const readInt64LE: (buf: Buffer, offset: number) => number + export const writeInt64LE: (value: number, buf: Buffer, num: number) => void; + export const readInt64LE: (buf: Buffer, offset: number) => number; } declare module 'snappyjs' { - export const compress: (value: ArrayBuffer | Buffer | Uint8Array) => ArrayBuffer | Buffer | Uint8Array - export const uncompress: (value: ArrayBuffer | Buffer | Uint8Array) => ArrayBuffer | Buffer | Uint8Array + export const compress: (value: ArrayBuffer | Buffer | Uint8Array) => ArrayBuffer | Buffer | Uint8Array; + export const uncompress: (value: ArrayBuffer | Buffer | Uint8Array) => ArrayBuffer | Buffer | Uint8Array; } - diff --git a/lib/declare.ts b/lib/declare.ts index 6a8d0b5d..1f2e64e2 100644 --- a/lib/declare.ts +++ b/lib/declare.ts @@ -1,10 +1,21 @@ // Thanks to https://github.com/kbajalc/parquets -import parquet_thrift from "../gen-nodejs/parquet_types"; -import { Statistics, OffsetIndex, ColumnIndex, PageType, DataPageHeader, DataPageHeaderV2, DictionaryPageHeader, IndexPageHeader, Type, ColumnMetaData } from "../gen-nodejs/parquet_types"; -import SplitBlockBloomFilter from "./bloom/sbbf"; -import { createSBBFParams } from "./bloomFilterIO/bloomFilterWriter"; -import Int64 from 'node-int64' +import parquet_thrift from '../gen-nodejs/parquet_types'; +import { + Statistics, + OffsetIndex, + ColumnIndex, + PageType, + DataPageHeader, + DataPageHeaderV2, + DictionaryPageHeader, + IndexPageHeader, + Type, + ColumnMetaData, +} from '../gen-nodejs/parquet_types'; +import SplitBlockBloomFilter from './bloom/sbbf'; +import { createSBBFParams } from './bloomFilterIO/bloomFilterWriter'; +import Int64 from 'node-int64'; export type ParquetCodec = 'PLAIN' | 'RLE'; export type ParquetCompression = 'UNCOMPRESSED' | 'GZIP' | 'SNAPPY' | 'LZO' | 'BROTLI' | 'LZ4'; @@ -12,150 +23,162 @@ export type RepetitionType = 'REQUIRED' | 'OPTIONAL' | 'REPEATED'; export type ParquetType = PrimitiveType | OriginalType; export type PrimitiveType = -// Base Types - |'BOOLEAN' // 0 - | 'INT32' // 1 - | 'INT64' // 2 - | 'INT96' // 3 - | 'FLOAT' // 4 - | 'DOUBLE' // 5 - | 'BYTE_ARRAY' // 6, - | 'FIXED_LEN_BYTE_ARRAY'; // 7 + // Base Types + | 'BOOLEAN' // 0 + | 'INT32' // 1 + | 'INT64' // 2 + | 'INT96' // 3 + | 'FLOAT' // 4 + | 'DOUBLE' // 5 + | 'BYTE_ARRAY' // 6, + | 'FIXED_LEN_BYTE_ARRAY'; // 7 export type OriginalType = -// Converted Types - | 'UTF8' // 0 - | 'MAP' // 1 - // | 'MAP_KEY_VALUE' // 2 - | 'LIST' // 3 - | 'ENUM' // 4 - | 'DECIMAL' // 5 - | 'DATE' // 6 - | 'TIME_MILLIS' // 7 - | 'TIME_MICROS' // 8 - | 'TIMESTAMP_MILLIS' // 9 - | 'TIMESTAMP_MICROS' // 10 - | 'UINT_8' // 11 - | 'UINT_16' // 12 - | 'UINT_32' // 13 - | 'UINT_64' // 14 - | 'INT_8' // 15 - | 'INT_16' // 16 - | 'INT_32' // 17 - | 'INT_64' // 18 - | 'JSON' // 19 - | 'BSON' // 20 - | 'INTERVAL'; // 21 + // Converted Types + | 'UTF8' // 0 + | 'MAP' // 1 + // | 'MAP_KEY_VALUE' // 2 + | 'LIST' // 3 + | 'ENUM' // 4 + | 'DECIMAL' // 5 + | 'DATE' // 6 + | 'TIME_MILLIS' // 7 + | 'TIME_MICROS' // 8 + | 'TIMESTAMP_MILLIS' // 9 + | 'TIMESTAMP_MICROS' // 10 + | 'UINT_8' // 11 + | 'UINT_16' // 12 + | 'UINT_32' // 13 + | 'UINT_64' // 14 + | 'INT_8' // 15 + | 'INT_16' // 16 + | 'INT_32' // 17 + | 'INT_64' // 18 + | 'JSON' // 19 + | 'BSON' // 20 + | 'INTERVAL'; // 21 export interface SchemaDefinition { - [string: string]: FieldDefinition; + [string: string]: FieldDefinition; } export interface FieldDefinition { - type?: ParquetType; - typeLength?: number; - encoding?: ParquetCodec; - compression?: ParquetCompression; - optional?: boolean; - repeated?: boolean; - fields?: SchemaDefinition; - statistics?: Statistics | false; - parent?: ParentField - num_children?: NumChildrenField - precision?: number - scale?: number + type?: ParquetType; + typeLength?: number; + encoding?: ParquetCodec; + compression?: ParquetCompression; + optional?: boolean; + repeated?: boolean; + fields?: SchemaDefinition; + statistics?: Statistics | false; + parent?: ParentField; + num_children?: NumChildrenField; + precision?: number; + scale?: number; } export interface ParquetField { - name: string; - path: string[]; - statistics?: Statistics | false; - primitiveType?: PrimitiveType; - originalType?: OriginalType; - repetitionType: RepetitionType; - typeLength?: number; - encoding?: ParquetCodec; - compression?: ParquetCompression; - precision?: number; - scale?: number; - rLevelMax: number; - dLevelMax: number; - isNested?: boolean; - fieldCount?: number; - fields?: Record; - disableEnvelope?: boolean + name: string; + path: string[]; + statistics?: Statistics | false; + primitiveType?: PrimitiveType; + originalType?: OriginalType; + repetitionType: RepetitionType; + typeLength?: number; + encoding?: ParquetCodec; + compression?: ParquetCompression; + precision?: number; + scale?: number; + rLevelMax: number; + dLevelMax: number; + isNested?: boolean; + fieldCount?: number; + fields?: Record; + disableEnvelope?: boolean; } interface ParentField { - value: SchemaDefinition - enumerable: boolean + value: SchemaDefinition; + enumerable: boolean; } interface NumChildrenField { - value: number - enumerable:boolean + value: number; + enumerable: boolean; } export interface ParquetBuffer { - rowCount?: number; - columnData?: Record; + rowCount?: number; + columnData?: Record; } export interface ParquetRecord { - [key: string]: any; + [key: string]: any; } export interface ColumnChunkData { - rowGroupIndex: number, - column: parquet_thrift.ColumnChunk + rowGroupIndex: number; + column: parquet_thrift.ColumnChunk; } -export interface ColumnChunkExt extends parquet_thrift.ColumnChunk{ - meta_data?: ColumnMetaDataExt - columnIndex?: ColumnIndex | Promise - offsetIndex?: OffsetIndex | Promise +export interface ColumnChunkExt extends parquet_thrift.ColumnChunk { + meta_data?: ColumnMetaDataExt; + columnIndex?: ColumnIndex | Promise; + offsetIndex?: OffsetIndex | Promise; } export interface ColumnMetaDataExt extends parquet_thrift.ColumnMetaData { - offsetIndex?: OffsetIndex - columnIndex?: ColumnIndex + offsetIndex?: OffsetIndex; + columnIndex?: ColumnIndex; } export interface RowGroupExt extends parquet_thrift.RowGroup { - columns: ColumnChunkExt[]; + columns: ColumnChunkExt[]; } export declare class KeyValue { - key: string; - value?: string; + key: string; + value?: string; } -export type Block = Uint32Array +export type Block = Uint32Array; export interface BloomFilterData { - sbbf: SplitBlockBloomFilter, - columnName: string, - RowGroupIndex: number, -}; + sbbf: SplitBlockBloomFilter; + columnName: string; + RowGroupIndex: number; +} export interface Parameter { - url: string; - headers?: string + url: string; + headers?: string; } export interface PageData { - rlevels?: number[]; - dlevels?: number[]; - distinct_values?: Set - values?: number[]; - pageHeaders?: PageHeader[]; - pageHeader?: PageHeader; - count?: number; - dictionary?: Array - column?: parquet_thrift.ColumnChunk - useDictionary?: boolean + rlevels?: number[]; + dlevels?: number[]; + distinct_values?: Set; + values?: number[]; + pageHeaders?: PageHeader[]; + pageHeader?: PageHeader; + count?: number; + dictionary?: Array; + column?: parquet_thrift.ColumnChunk; + useDictionary?: boolean; } export declare class PageHeader { + type: PageType; + uncompressed_page_size: number; + compressed_page_size: number; + crc?: number; + data_page_header?: DataPageHeader; + index_page_header?: IndexPageHeader; + dictionary_page_header?: DictionaryPageHeader; + data_page_header_v2?: DataPageHeaderV2; + offset?: number; + headerSize?: number; + + constructor(args?: { type: PageType; uncompressed_page_size: number; compressed_page_size: number; @@ -164,63 +187,60 @@ export declare class PageHeader { index_page_header?: IndexPageHeader; dictionary_page_header?: DictionaryPageHeader; data_page_header_v2?: DataPageHeaderV2; - offset?: number; - headerSize?: number; - - constructor(args?: { type: PageType; uncompressed_page_size: number; compressed_page_size: number; crc?: number; data_page_header?: DataPageHeader; index_page_header?: IndexPageHeader; dictionary_page_header?: DictionaryPageHeader; data_page_header_v2?: DataPageHeaderV2; }); - } + }); +} - export interface ClientParameters { - Bucket: string, - Key: string - } +export interface ClientParameters { + Bucket: string; + Key: string; +} - export interface PromiseS3 { - promise: () => Promise - } +export interface PromiseS3 { + promise: () => Promise; +} - export interface ClientS3 { - accessKeyId: string, - secretAccessKey: string, - headObject: (params: ClientParameters) => PromiseS3 - getObject: (args: any) => PromiseS3 +export interface ClientS3 { + accessKeyId: string; + secretAccessKey: string; + headObject: (params: ClientParameters) => PromiseS3; + getObject: (args: any) => PromiseS3; } export interface FileMetaDataExt extends parquet_thrift.FileMetaData { - json?:JSON; - row_groups: RowGroupExt[]; - } + json?: JSON; + row_groups: RowGroupExt[]; +} export class NewPageHeader extends parquet_thrift.PageHeader { - offset?: number; - headerSize?: number; - constructor() { - super() - } + offset?: number; + headerSize?: number; + constructor() { + super(); } +} export type WriterOptions = { - pageIndex?: boolean; - pageSize?: number; - useDataPageV2?: boolean; - bloomFilters?: createSBBFParams[]; - baseOffset?: Int64; - rowGroupSize?: number; - flags?: string; - encoding?: BufferEncoding; - fd?: number; - mode?: number; - autoClose?: boolean; - emitClose?: boolean; - start?: number; - highWaterMark?: number; -} + pageIndex?: boolean; + pageSize?: number; + useDataPageV2?: boolean; + bloomFilters?: createSBBFParams[]; + baseOffset?: Int64; + rowGroupSize?: number; + flags?: string; + encoding?: BufferEncoding; + fd?: number; + mode?: number; + autoClose?: boolean; + emitClose?: boolean; + start?: number; + highWaterMark?: number; +}; export type Page = { - page: Buffer, - statistics: parquet_thrift.Statistics, - first_row_index: number, - distinct_values: Set, - num_values: number, - count?: number, -} + page: Buffer; + statistics: parquet_thrift.Statistics; + first_row_index: number; + distinct_values: Set; + num_values: number; + count?: number; +}; diff --git a/lib/fields.ts b/lib/fields.ts index f94edf5c..cc54cb1e 100644 --- a/lib/fields.ts +++ b/lib/fields.ts @@ -1,74 +1,82 @@ // Helper functions for creating fields -import { FieldDefinition, ParquetType, SchemaDefinition } from "./declare"; +import { FieldDefinition, ParquetType, SchemaDefinition } from './declare'; export function createStringField(optional = true, fieldOptions: FieldDefinition = {}): FieldDefinition { - return { ...fieldOptions, optional, type: 'UTF8' }; + return { ...fieldOptions, optional, type: 'UTF8' }; } export function createBooleanField(optional = true, fieldOptions: FieldDefinition = {}): FieldDefinition { - return { ...fieldOptions, optional, type: 'BOOLEAN' }; + return { ...fieldOptions, optional, type: 'BOOLEAN' }; } export function createIntField(size: 32 | 64, optional = true, fieldOptions: FieldDefinition = {}): FieldDefinition { - return { ...fieldOptions, optional, type: `INT${size}` }; + return { ...fieldOptions, optional, type: `INT${size}` }; } export function createFloatField(optional = true, fieldOptions: FieldDefinition = {}): FieldDefinition { - return { ...fieldOptions, optional, type: 'FLOAT' }; + return { ...fieldOptions, optional, type: 'FLOAT' }; } export function createDoubleField(optional = true, fieldOptions: FieldDefinition = {}): FieldDefinition { - return { ...fieldOptions, optional, type: 'DOUBLE' }; + return { ...fieldOptions, optional, type: 'DOUBLE' }; } -export function createDecimalField(precision: number, optional = true, fieldOptions: FieldDefinition = {}): FieldDefinition { - return { ...fieldOptions, precision, optional, type: 'FLOAT' }; +export function createDecimalField( + precision: number, + optional = true, + fieldOptions: FieldDefinition = {} +): FieldDefinition { + return { ...fieldOptions, precision, optional, type: 'FLOAT' }; } export function createTimestampField(optional = true, fieldOptions: FieldDefinition = {}): FieldDefinition { - return { ...fieldOptions, optional, type: 'TIMESTAMP_MILLIS' }; + return { ...fieldOptions, optional, type: 'TIMESTAMP_MILLIS' }; } export function createStructField(fields: SchemaDefinition, optional = true): FieldDefinition { - return { - optional, - fields, - } + return { + optional, + fields, + }; } export function createStructListField(fields: SchemaDefinition, optional = true): FieldDefinition { - return { - type: 'LIST', - optional, + return { + type: 'LIST', + optional, + fields: { + list: { + repeated: true, fields: { - list: { - repeated: true, - fields: { - element: { - fields, - }, - } - }, + element: { + fields, + }, }, - } + }, + }, + }; } -export function createListField(type: ParquetType, optional = true, elementOptions: FieldDefinition = { optional: true }): FieldDefinition { - return { - type: 'LIST', - optional, +export function createListField( + type: ParquetType, + optional = true, + elementOptions: FieldDefinition = { optional: true } +): FieldDefinition { + return { + type: 'LIST', + optional, + fields: { + list: { + repeated: true, fields: { - list: { - repeated: true, - fields: { - element: { - optional: true, - ...elementOptions, - type, - }, - }, - }, + element: { + optional: true, + ...elementOptions, + type, + }, }, - } + }, + }, + }; } diff --git a/lib/jsonSchema.ts b/lib/jsonSchema.ts index f7b401ec..5466e1c3 100644 --- a/lib/jsonSchema.ts +++ b/lib/jsonSchema.ts @@ -3,133 +3,139 @@ import { JSONSchema4 } from 'json-schema'; import { FieldDefinition, SchemaDefinition } from './declare'; import * as fields from './fields'; -type SupportedJSONSchema4 = Omit +type SupportedJSONSchema4 = Omit< + JSONSchema4, + '$ref' | 'multipleOf' | 'allOf' | 'anyOf' | 'oneOf' | 'not' | 'additionalItems' | 'enum' | 'extends' +>; /** * Simple check to make sure that `SupportedJSONSchema4` is correct. * There are a lot of JSON schema stuff we just don't support for now. */ const isJsonSchemaSupported = (js: JSONSchema4): js is SupportedJSONSchema4 => { - const unsupportedFields = [ - "$ref", - "multipleOf", - "allOf", - "anyOf", - "oneOf", - "not", - "additionalItems", - "enum", - "extends", - ]; - for (const field in unsupportedFields) { - if (!(js[field] === undefined || js[field] === false)) { - return false; - } + const unsupportedFields = [ + '$ref', + 'multipleOf', + 'allOf', + 'anyOf', + 'oneOf', + 'not', + 'additionalItems', + 'enum', + 'extends', + ]; + for (const field in unsupportedFields) { + if (!(js[field] === undefined || js[field] === false)) { + return false; } - return true; -} + } + return true; +}; /** * Error to capture all the unsupported edge cases */ export class UnsupportedJsonSchemaError extends Error { - constructor(msg: string) { - const message = `Unsupported JSON schema: ${msg}`; - super(message); - this.name = 'UnsupportedJsonSchemaError'; - } + constructor(msg: string) { + const message = `Unsupported JSON schema: ${msg}`; + super(message); + this.name = 'UnsupportedJsonSchemaError'; + } } /** * Json Schema has required at the top level instead of field level */ -const isJsonSchemaRequired = (jsonSchema: SupportedJSONSchema4) => (field: string): boolean => { +const isJsonSchemaRequired = + (jsonSchema: SupportedJSONSchema4) => + (field: string): boolean => { switch (jsonSchema.required) { - case true: return true; - case undefined: - case false: - return false; + case true: + return true; + case undefined: + case false: + return false; } return jsonSchema.required.includes(field); -} + }; /** * Converts the Array field type into the correct Field Definition */ const fromJsonSchemaArray = (fieldValue: SupportedJSONSchema4, optionalFieldList: boolean): FieldDefinition => { - if (!fieldValue.items || !fieldValue.items.type) { - throw new UnsupportedJsonSchemaError("Array field with no values found."); - } + if (!fieldValue.items || !fieldValue.items.type) { + throw new UnsupportedJsonSchemaError('Array field with no values found.'); + } - switch (fieldValue.items.type) { - case 'string': - if (fieldValue.items.format && fieldValue.items.format == 'date-time') { - return fields.createListField('TIMESTAMP_MILLIS', optionalFieldList); - } - return fields.createListField('UTF8', optionalFieldList); - case 'integer': - return fields.createListField('INT64', optionalFieldList); - case 'number': - return fields.createListField('DOUBLE', optionalFieldList); - case 'boolean': - return fields.createListField('BOOLEAN', optionalFieldList); - case 'object': - return fields.createStructListField(fromJsonSchema(fieldValue.items), optionalFieldList); - default: - throw new UnsupportedJsonSchemaError(`Array field type ${JSON.stringify(fieldValue.items)} is unsupported.`); - } -} + switch (fieldValue.items.type) { + case 'string': + if (fieldValue.items.format && fieldValue.items.format == 'date-time') { + return fields.createListField('TIMESTAMP_MILLIS', optionalFieldList); + } + return fields.createListField('UTF8', optionalFieldList); + case 'integer': + return fields.createListField('INT64', optionalFieldList); + case 'number': + return fields.createListField('DOUBLE', optionalFieldList); + case 'boolean': + return fields.createListField('BOOLEAN', optionalFieldList); + case 'object': + return fields.createStructListField(fromJsonSchema(fieldValue.items), optionalFieldList); + default: + throw new UnsupportedJsonSchemaError(`Array field type ${JSON.stringify(fieldValue.items)} is unsupported.`); + } +}; /** * Converts a field from a JSON Schema into a Parquet Field Definition */ -const fromJsonSchemaField = (jsonSchema: JSONSchema4) => (fieldName: string, fieldValue: JSONSchema4): FieldDefinition => { +const fromJsonSchemaField = + (jsonSchema: JSONSchema4) => + (fieldName: string, fieldValue: JSONSchema4): FieldDefinition => { if (!isJsonSchemaSupported(fieldValue)) { - throw new UnsupportedJsonSchemaError(`Field: ${fieldName} has an unsupported schema`); + throw new UnsupportedJsonSchemaError(`Field: ${fieldName} has an unsupported schema`); } const optional = !isJsonSchemaRequired(jsonSchema)(fieldName); switch (fieldValue.type) { - case 'string': - if (fieldValue.format && fieldValue.format == 'date-time') { - return fields.createTimestampField(optional); - } - return fields.createStringField(optional); - case 'integer': - return fields.createIntField(64, optional); - case 'number': - return fields.createDoubleField(optional); - case 'boolean': - return fields.createBooleanField(optional); - case 'array': - return fromJsonSchemaArray(fieldValue, optional); - case 'object': - return fields.createStructField(fromJsonSchema(fieldValue), optional); - default: - throw new UnsupportedJsonSchemaError( - `Unable to convert "${fieldName}" with JSON Schema type "${fieldValue.type}" to a Parquet Schema.`, - ) + case 'string': + if (fieldValue.format && fieldValue.format == 'date-time') { + return fields.createTimestampField(optional); + } + return fields.createStringField(optional); + case 'integer': + return fields.createIntField(64, optional); + case 'number': + return fields.createDoubleField(optional); + case 'boolean': + return fields.createBooleanField(optional); + case 'array': + return fromJsonSchemaArray(fieldValue, optional); + case 'object': + return fields.createStructField(fromJsonSchema(fieldValue), optional); + default: + throw new UnsupportedJsonSchemaError( + `Unable to convert "${fieldName}" with JSON Schema type "${fieldValue.type}" to a Parquet Schema.` + ); } -} + }; /** * Converts supported Json Schemas into Parquet Schema Definitions */ export const fromJsonSchema = (jsonSchema: JSONSchema4): SchemaDefinition => { - if (!isJsonSchemaSupported(jsonSchema)) { - throw new UnsupportedJsonSchemaError("Unsupported fields found"); - } + if (!isJsonSchemaSupported(jsonSchema)) { + throw new UnsupportedJsonSchemaError('Unsupported fields found'); + } - const schema: SchemaDefinition = {}; + const schema: SchemaDefinition = {}; - const fromField = fromJsonSchemaField(jsonSchema) + const fromField = fromJsonSchemaField(jsonSchema); - for (const [fieldName, fieldValue] of Object.entries( - jsonSchema.properties || {}, - )) { - schema[fieldName] = fromField(fieldName, fieldValue); - } + for (const [fieldName, fieldValue] of Object.entries(jsonSchema.properties || {})) { + schema[fieldName] = fromField(fieldName, fieldValue); + } - return schema; -} + return schema; +}; diff --git a/lib/reader.ts b/lib/reader.ts index 113fb517..7a1c2630 100644 --- a/lib/reader.ts +++ b/lib/reader.ts @@ -6,7 +6,7 @@ import * as parquet_schema from './schema'; import * as parquet_codec from './codec'; import * as parquet_compression from './compression'; import * as parquet_types from './types'; -import BufferReader, {BufferReaderOptions} from './bufferReader'; +import BufferReader, { BufferReaderOptions } from './bufferReader'; import * as bloomFilterReader from './bloomFilterIO/bloomFilterReader'; import fetch from 'cross-fetch'; import { @@ -22,20 +22,14 @@ import { FileMetaDataExt, NewPageHeader, RowGroupExt, - ColumnChunkExt + ColumnChunkExt, } from './declare'; -import {Cursor, Options} from './codec/types'; -import { - GetObjectCommand, - HeadObjectCommand, - S3Client, -} from "@aws-sdk/client-s3"; -import type { Readable } from "stream"; -import type { Blob } from "buffer"; +import { Cursor, Options } from './codec/types'; +import { GetObjectCommand, HeadObjectCommand, S3Client } from '@aws-sdk/client-s3'; +import type { Readable } from 'stream'; +import type { Blob } from 'buffer'; -const { - getBloomFiltersFor, -} = bloomFilterReader; +const { getBloomFiltersFor } = bloomFilterReader; /** * Parquet File Magic String @@ -57,7 +51,6 @@ const PARQUET_RDLVL_ENCODING = 'RLE'; * A parquet cursor is used to retrieve rows from a parquet file in order */ class ParquetCursor { - metadata: FileMetaDataExt; envelopeReader: ParquetEnvelopeReader; schema: parquet_schema.ParquetSchema; @@ -72,7 +65,12 @@ class ParquetCursor { * advanced and internal use cases. Consider using getCursor() on the * ParquetReader instead */ - constructor(metadata: FileMetaDataExt, envelopeReader: ParquetEnvelopeReader, schema: parquet_schema.ParquetSchema, columnList: Array>) { + constructor( + metadata: FileMetaDataExt, + envelopeReader: ParquetEnvelopeReader, + schema: parquet_schema.ParquetSchema, + columnList: Array> + ) { this.metadata = metadata; this.envelopeReader = envelopeReader; this.schema = schema; @@ -89,14 +87,14 @@ class ParquetCursor { async next() { if (this.cursorIndex >= this.rowGroup.length) { if (this.rowGroupIndex >= this.metadata.row_groups.length) { - return null; } let rowBuffer = await this.envelopeReader.readRowGroup( this.schema, this.metadata.row_groups[this.rowGroupIndex], - this.columnList); + this.columnList + ); this.rowGroup = parquet_shredder.materializeRecords(this.schema, rowBuffer); this.rowGroupIndex++; @@ -114,7 +112,7 @@ class ParquetCursor { this.rowGroupIndex = 0; this.cursorIndex = 0; } -}; +} /** * A parquet reader allows retrieving the rows from a parquet file in order. @@ -124,10 +122,9 @@ class ParquetCursor { * avoid leaking file descriptors. */ export class ParquetReader { - envelopeReader: ParquetEnvelopeReader | null; metadata: FileMetaDataExt | null; - schema: parquet_schema.ParquetSchema + schema: parquet_schema.ParquetSchema; /** * Open the parquet file pointed to by the specified path and return a new @@ -154,9 +151,9 @@ export class ParquetReader { static async openS3(client: any, params: ClientParameters, options?: BufferReaderOptions) { try { let envelopeReader: ParquetEnvelopeReader = - 'function' === typeof client['headObject'] ? - await ParquetEnvelopeReader.openS3(client as ClientS3, params, options) :// S3 client v2 - await ParquetEnvelopeReader.openS3v3(client as S3Client, params, options) ; // S3 client v3 + 'function' === typeof client['headObject'] + ? await ParquetEnvelopeReader.openS3(client as ClientS3, params, options) // S3 client v2 + : await ParquetEnvelopeReader.openS3v3(client as S3Client, params, options); // S3 client v3 return this.openEnvelopeReader(envelopeReader, options); } catch (e: any) { throw new Error(`Error accessing S3 Bucket ${params.Bucket}. Message: ${e.message}`); @@ -205,9 +202,9 @@ export class ParquetReader { // If metadata is a json file then we need to convert INT64 and CTIME if (metadata.json) { - const convert = (o: {[string: string]: any } ) => { - if (o && typeof o === 'object') { - Object.keys(o).forEach(key => o[key] = convert(o[key])); + const convert = (o: { [string: string]: any }) => { + if (o && typeof o === 'object') { + Object.keys(o).forEach((key) => (o[key] = convert(o[key]))); if (o.parquetType === 'CTIME') { return new Date(o.value); } else if (o.parquetType === 'INT64') { @@ -218,14 +215,16 @@ export class ParquetReader { }; // Go through all PageLocation objects and set the proper prototype - metadata.row_groups.forEach(rowGroup => { - rowGroup.columns.forEach(column => { + metadata.row_groups.forEach((rowGroup) => { + rowGroup.columns.forEach((column) => { if (column.offsetIndex) { - Promise.resolve(column.offsetIndex).then(offset => (offset.page_locations.forEach(d => { - if (Array.isArray(d)) { - Object.setPrototypeOf(d,parquet_thrift.PageLocation.prototype); - } - }))); + Promise.resolve(column.offsetIndex).then((offset) => + offset.page_locations.forEach((d) => { + if (Array.isArray(d)) { + Object.setPrototypeOf(d, parquet_thrift.PageLocation.prototype); + } + }) + ); } }); }); @@ -236,21 +235,23 @@ export class ParquetReader { this.metadata = envelopeReader.metadata = metadata; this.envelopeReader = envelopeReader; this.schema = envelopeReader.schema = new parquet_schema.ParquetSchema( - decodeSchema( - this.metadata.schema.slice(1)) as SchemaDefinition); + decodeSchema(this.metadata.schema.slice(1)) as SchemaDefinition + ); /* decode any statistics values */ if (this.metadata.row_groups && !this.metadata.json && !opts.rawStatistics) { - this.metadata.row_groups.forEach(row => row.columns.forEach( col => { - const stats = col.meta_data!.statistics; - if (stats) { - const field = this.schema.findField(col.meta_data!.path_in_schema); - stats.max_value = decodeStatisticsValue(stats.max_value, field); - stats.min_value = decodeStatisticsValue(stats.min_value, field); - stats.min = decodeStatisticsValue(stats.min, field); - stats.max = decodeStatisticsValue(stats.max, field); - } - })); + this.metadata.row_groups.forEach((row) => + row.columns.forEach((col) => { + const stats = col.meta_data!.statistics; + if (stats) { + const field = this.schema.findField(col.meta_data!.path_in_schema); + stats.max_value = decodeStatisticsValue(stats.max_value, field); + stats.min_value = decodeStatisticsValue(stats.min_value, field); + stats.min = decodeStatisticsValue(stats.min, field); + stats.max = decodeStatisticsValue(stats.max, field); + } + }) + ); } } @@ -264,15 +265,13 @@ export class ParquetReader { * } * ``` */ - async* [Symbol.asyncIterator]() { + async *[Symbol.asyncIterator]() { const cursor = this.getCursor(); let record = null; - while (record = await cursor.next()) { + while ((record = await cursor.next())) { yield record; } -} - - + } /** * Return a cursor to the file. You may open more than one cursor and use @@ -288,20 +287,16 @@ export class ParquetReader { columnList = []; } - columnList = columnList.map((x: Array) => x.constructor === Array ? x : [x]); + columnList = columnList.map((x: Array) => (x.constructor === Array ? x : [x])); - return new ParquetCursor( - this.metadata!, - this.envelopeReader!, - this.schema, - columnList); + return new ParquetCursor(this.metadata!, this.envelopeReader!, this.schema, columnList); } async getBloomFiltersFor(columnNames: string[]) { const bloomFilterData = await getBloomFiltersFor(columnNames, this.envelopeReader!); return bloomFilterData.reduce((acc: Record, value) => { - if (acc[value.columnName]) acc[value.columnName].push(value) - else acc[value.columnName] = [value] + if (acc[value.columnName]) acc[value.columnName].push(value); + else acc[value.columnName] = [value]; return acc; }, {}); } @@ -334,7 +329,7 @@ export class ParquetReader { } async exportMetadata(indent: string | number | undefined) { - function replacer(_key: unknown, value: parquet_thrift.PageLocation | bigint | {[string:string]: any}) { + function replacer(_key: unknown, value: parquet_thrift.PageLocation | bigint | { [string: string]: any }) { if (value instanceof parquet_thrift.PageLocation) { return [value.offset, value.compressed_page_size, value.first_row_index]; } @@ -344,7 +339,7 @@ export class ParquetReader { if (value[k] instanceof Date) { value[k].toJSON = () => ({ parquetType: 'CTIME', - value: value[k].valueOf() + value: value[k].valueOf(), }); } } @@ -360,29 +355,29 @@ export class ParquetReader { } else { return { parquetType: 'INT64', - value: [...value.buffer] + value: [...value.buffer], }; } } else { return value; } } - const metadata = Object.assign({}, this.metadata, {json: true}); + const metadata = Object.assign({}, this.metadata, { json: true }); for (let i = 0; i < metadata.row_groups.length; i++) { const rowGroup = metadata.row_groups[i]; for (let j = 0; j < rowGroup.columns.length; j++) { const column = rowGroup.columns[j]; - if (column.offsetIndex instanceof Promise) { - column.offsetIndex = await column.offsetIndex; - } - if (column.columnIndex instanceof Promise) { - column.columnIndex = await column.columnIndex; - } + if (column.offsetIndex instanceof Promise) { + column.offsetIndex = await column.offsetIndex; + } + if (column.columnIndex instanceof Promise) { + column.columnIndex = await column.columnIndex; + } } } - return JSON.stringify(metadata,replacer,indent); + return JSON.stringify(metadata, replacer, indent); } /** @@ -398,7 +393,6 @@ export class ParquetReader { decodePages(buffer: Buffer, opts: Options) { return decodePages(buffer, opts); } - } /** @@ -416,7 +410,7 @@ export class ParquetEnvelopeReader { fileSize: Function | number; default_dictionary_size: number; metadata?: FileMetaDataExt; - schema?: parquet_schema.ParquetSchema + schema?: parquet_schema.ParquetSchema; static async openFile(filePath: string | Buffer | URL, options?: BufferReaderOptions) { let fileStat = await parquet_util.fstat(filePath); @@ -449,15 +443,19 @@ export class ParquetEnvelopeReader { } static async openS3(client: ClientS3, params: ClientParameters, options?: BufferReaderOptions) { - let fileStat = async () => client.headObject(params).promise().then((d: { ContentLength: number }) => d.ContentLength); + let fileStat = async () => + client + .headObject(params) + .promise() + .then((d: { ContentLength: number }) => d.ContentLength); let readFn = async (offset: number, length: number, file?: string) => { if (file) { return Promise.reject('external references are not supported'); } - let Range = `bytes=${offset}-${offset+length-1}`; - let res = await client.getObject(Object.assign({Range}, params)).promise(); + let Range = `bytes=${offset}-${offset + length - 1}`; + let res = await client.getObject(Object.assign({ Range }, params)).promise(); return Promise.resolve(res.Body); }; @@ -471,18 +469,17 @@ export class ParquetEnvelopeReader { try { let headObjectCommand = await client.send(new HeadObjectCommand(params)); return Promise.resolve(headObjectCommand.ContentLength); - } - catch (e: any){ + } catch (e: any) { // having params match command names makes e.message clear to user - return Promise.reject("rejected headObjectCommand: " + e.message); + return Promise.reject('rejected headObjectCommand: ' + e.message); } - } + }; - const readFn = async (offset: number, length: number, file: string|undefined): Promise => { + const readFn = async (offset: number, length: number, file: string | undefined): Promise => { if (file) { - return Promise.reject("external references are not supported"); + return Promise.reject('external references are not supported'); } - const Range = `bytes=${offset}-${offset+length-1}`; + const Range = `bytes=${offset}-${offset + length - 1}`; const input = { ...{ Range }, ...params }; const response = await client.send(new GetObjectCommand(input)); @@ -510,36 +507,34 @@ export class ParquetEnvelopeReader { const readable = body as Readable; return await new Promise((resolve, reject) => { const chunks: Uint8Array[] = []; - readable.on("data", (chunk) => chunks.push(chunk)); - readable.on("error", reject); - readable.on("end", () => resolve(Buffer.concat(chunks))); + readable.on('data', (chunk) => chunks.push(chunk)); + readable.on('error', reject); + readable.on('end', () => resolve(Buffer.concat(chunks))); }); } static async openUrl(url: Parameter | URL | string, options?: BufferReaderOptions) { let params: Parameter; if (typeof url === 'string') params = { url }; - else if(url instanceof URL) params = { url: url.toString() } + else if (url instanceof URL) params = { url: url.toString() }; else params = url; - if (!params.url) - throw new Error('URL missing'); + if (!params.url) throw new Error('URL missing'); const baseArr = params.url.split('/'); - const base = baseArr.slice(0, baseArr.length-1).join('/')+'/'; + const base = baseArr.slice(0, baseArr.length - 1).join('/') + '/'; let defaultHeaders = params.headers || {}; let filesize = async () => { - const { headers } = await fetch(params.url); return headers.get('Content-Length'); }; let readFn = async (offset: number, length: number, file?: string) => { - let url = file ? base+file : params.url; - let range = `bytes=${offset}-${offset+length-1}`; - let headers = Object.assign({}, defaultHeaders, {range}); + let url = file ? base + file : params.url; + let range = `bytes=${offset}-${offset + length - 1}`; + let headers = Object.assign({}, defaultHeaders, { range }); const response = await fetch(url, { headers }); const arrayBuffer = await response.arrayBuffer(); const buffer = Buffer.from(arrayBuffer); @@ -552,7 +547,13 @@ export class ParquetEnvelopeReader { return new ParquetEnvelopeReader(readFn, closeFn, filesize, options); } - constructor(readFn: (offset: number, length: number, file?: string) => Promise , closeFn: () => unknown, fileSize: Function | number, options?: BufferReaderOptions, metadata?: FileMetaDataExt) { + constructor( + readFn: (offset: number, length: number, file?: string) => Promise, + closeFn: () => unknown, + fileSize: Function | number, + options?: BufferReaderOptions, + metadata?: FileMetaDataExt + ) { options = options || {}; this.readFn = readFn; this.id = ++ParquetEnvelopeReaderIdCounter; @@ -572,15 +573,17 @@ export class ParquetEnvelopeReader { readHeader() { return this.read(0, PARQUET_MAGIC.length).then((buf: Buffer) => { - if (buf.toString() != PARQUET_MAGIC) { - throw 'not valid parquet file' + throw 'not valid parquet file'; } }); } // Helper function to get the column object for a particular path and row_group - getColumn(path: string | parquet_thrift.ColumnChunk, row_group: RowGroupExt | number | string | null) : ColumnChunkExt { + getColumn( + path: string | parquet_thrift.ColumnChunk, + row_group: RowGroupExt | number | string | null + ): ColumnChunkExt { let column; let parsedRowGroup: parquet_thrift.RowGroup | undefined; if (!isNaN(Number(row_group))) { @@ -591,9 +594,9 @@ export class ParquetEnvelopeReader { if (typeof path === 'string') { if (!parsedRowGroup) { - throw `Missing RowGroup ${row_group}`; + throw `Missing RowGroup ${row_group}`; } - column = parsedRowGroup.columns.find(d => d.meta_data!.path_in_schema.join(',') === path); + column = parsedRowGroup.columns.find((d) => d.meta_data!.path_in_schema.join(',') === path); if (!column) { throw `Column ${path} Not Found`; @@ -610,14 +613,18 @@ export class ParquetEnvelopeReader { } return row_groups.flatMap((rowGroup, index) => - paths.map(columnName => ({ - rowGroupIndex: index, - column: this.getColumn(columnName, rowGroup) - })) - ) + paths.map((columnName) => ({ + rowGroupIndex: index, + column: this.getColumn(columnName, rowGroup), + })) + ); } - readOffsetIndex(path: string | ColumnChunkExt, row_group: RowGroupExt | number | null, opts: Options): Promise { + readOffsetIndex( + path: string | ColumnChunkExt, + row_group: RowGroupExt | number | null, + opts: Options + ): Promise { let column = this.getColumn(path, row_group); if (column.offsetIndex) { return Promise.resolve(column.offsetIndex); @@ -628,7 +635,7 @@ export class ParquetEnvelopeReader { const data = this.read(+column.offset_index_offset, column.offset_index_length).then((data: Buffer) => { let offset_index = new parquet_thrift.OffsetIndex(); parquet_util.decodeThrift(offset_index, data); - Object.defineProperty(offset_index,'column', {value: column, enumerable: false}); + Object.defineProperty(offset_index, 'column', { value: column, enumerable: false }); return offset_index; }); if (opts?.cache) { @@ -637,7 +644,11 @@ export class ParquetEnvelopeReader { return data; } - readColumnIndex(path: string | ColumnChunkExt, row_group: RowGroupExt | number, opts: Options): Promise { + readColumnIndex( + path: string | ColumnChunkExt, + row_group: RowGroupExt | number, + opts: Options + ): Promise { let column = this.getColumn(path, row_group); if (column.columnIndex) { return Promise.resolve(column.columnIndex); @@ -645,7 +656,7 @@ export class ParquetEnvelopeReader { return Promise.reject(new Error('Column Index Missing')); } - const data = this.read(+column.column_index_offset, (column.column_index_length as number)).then((buf: Buffer) => { + const data = this.read(+column.column_index_offset, column.column_index_length as number).then((buf: Buffer) => { let column_index = new parquet_thrift.ColumnIndex(); parquet_util.decodeThrift(column_index, buf); Object.defineProperty(column_index, 'column', { value: column }); @@ -653,10 +664,10 @@ export class ParquetEnvelopeReader { // decode the statistics values const field = this.schema!.findField(column.meta_data!.path_in_schema); if (column_index.max_values) { - column_index.max_values = column_index.max_values.map(max_value => decodeStatisticsValue(max_value, field)); + column_index.max_values = column_index.max_values.map((max_value) => decodeStatisticsValue(max_value, field)); } if (column_index.min_values) { - column_index.min_values = column_index.min_values.map(min_value => decodeStatisticsValue(min_value, field)); + column_index.min_values = column_index.min_values.map((min_value) => decodeStatisticsValue(min_value, field)); } return column_index; }); @@ -667,25 +678,32 @@ export class ParquetEnvelopeReader { return data; } - async readPage(column: ColumnChunkExt, page: parquet_thrift.PageLocation | number, records: Array>, opts: Options) { - column = Object.assign({},column); - column.meta_data = Object.assign({},column.meta_data); + async readPage( + column: ColumnChunkExt, + page: parquet_thrift.PageLocation | number, + records: Array>, + opts: Options + ) { + column = Object.assign({}, column); + column.meta_data = Object.assign({}, column.meta_data); if (page instanceof parquet_thrift.PageLocation && page.offset !== undefined) { if (isNaN(Number(page.offset)) || isNaN(page.compressed_page_size)) { throw Error('page offset and/or size missing'); } column.meta_data.data_page_offset = parquet_util.cloneInteger(page.offset); - column.meta_data.total_compressed_size = new Int64(page.compressed_page_size); + column.meta_data.total_compressed_size = new Int64(page.compressed_page_size); } else { const offsetIndex = await this.readOffsetIndex(column, null, opts); column.meta_data.data_page_offset = parquet_util.cloneInteger(offsetIndex.page_locations[page as number].offset); - column.meta_data.total_compressed_size = new Int64(offsetIndex.page_locations[page as number].compressed_page_size); + column.meta_data.total_compressed_size = new Int64( + offsetIndex.page_locations[page as number].compressed_page_size + ); } const chunk = await this.readColumnChunk(this.schema!, column); - Object.defineProperty(chunk,'column', {value: column}); + Object.defineProperty(chunk, 'column', { value: column }); let data = { - columnData: {[chunk.column!.meta_data!.path_in_schema.join(',')]: chunk} + columnData: { [chunk.column!.meta_data!.path_in_schema.join(',')]: chunk }, }; return parquet_shredder.materializeRecords(this.schema!, data, records); @@ -696,7 +714,7 @@ export class ParquetEnvelopeReader { rowCount: +rowGroup.num_rows, columnData: {}, pageRowCount: 0, - pages: {} + pages: {}, }; for (let colChunk of rowGroup.columns) { @@ -714,15 +732,11 @@ export class ParquetEnvelopeReader { } async readColumnChunk(schema: parquet_schema.ParquetSchema, colChunk: ColumnChunkExt, opts?: Options) { - let metadata = colChunk.meta_data! + let metadata = colChunk.meta_data!; let field = schema.findField(metadata.path_in_schema); - let type = parquet_util.getThriftEnum( - parquet_thrift.Type, - metadata.type); + let type = parquet_util.getThriftEnum(parquet_thrift.Type, metadata.type); - let compression = parquet_util.getThriftEnum( - parquet_thrift.CompressionCodec, - metadata.codec); + let compression = parquet_util.getThriftEnum(parquet_thrift.CompressionCodec, metadata.codec); let pagesOffset = +metadata.data_page_offset; let pagesSize = +metadata.total_compressed_size; @@ -731,13 +745,13 @@ export class ParquetEnvelopeReader { pagesSize = Math.min((this.fileSize as number) - pagesOffset, +metadata.total_compressed_size); } - opts = Object.assign({},opts, { + opts = Object.assign({}, opts, { type: type, rLevelMax: field.rLevelMax, dLevelMax: field.dLevelMax, compression: compression, column: field, - num_values: metadata.num_values + num_values: metadata.num_values, }); // If this exists and is greater than zero then we need to have an offset @@ -746,14 +760,15 @@ export class ParquetEnvelopeReader { const size = Math.min(+this.fileSize - offset, this.default_dictionary_size); await this.read(offset, size, colChunk.file_path).then(async (buffer: Buffer) => { - await decodePage({offset: 0, buffer, size: buffer.length}, opts!).then(dict => { - opts!.dictionary = opts!.dictionary || dict.dictionary as number[]; - }) - }) - + await decodePage({ offset: 0, buffer, size: buffer.length }, opts!).then((dict) => { + opts!.dictionary = opts!.dictionary || (dict.dictionary as number[]); + }); + }); } - return this.read(pagesOffset, pagesSize, colChunk.file_path).then((pagesBuf: Buffer) => decodePages(pagesBuf, opts!)); + return this.read(pagesOffset, pagesSize, colChunk.file_path).then((pagesBuf: Buffer) => + decodePages(pagesBuf, opts!) + ); } async readFooter() { @@ -781,13 +796,18 @@ export class ParquetEnvelopeReader { parquet_util.decodeThrift(metadata, metadataBuf); return metadata; } - } /** * Decode a consecutive array of data using one of the parquet encodings */ -function decodeValues(type: string, encoding: ParquetCodec, cursor: Cursor, count: number, opts: Options | {bitWidth: number}) { +function decodeValues( + type: string, + encoding: ParquetCodec, + cursor: Cursor, + count: number, + opts: Options | { bitWidth: number } +) { if (!(encoding in parquet_codec)) { throw 'invalid encoding: ' + encoding; } @@ -795,13 +815,18 @@ function decodeValues(type: string, encoding: ParquetCodec, cursor: Cursor, coun return parquet_codec[encoding].decodeValues(type, cursor, count, opts as Options); } - function decodeStatisticsValue(value: any, column: ParquetField | Options) { if (value === null || !value.length) { return undefined; } if (!column.primitiveType!.includes('BYTE_ARRAY')) { - value = decodeValues(column.primitiveType!,'PLAIN',{buffer: Buffer.from(value), offset: 0}, 1, column as Options); + value = decodeValues( + column.primitiveType!, + 'PLAIN', + { buffer: Buffer.from(value), offset: 0 }, + 1, + column as Options + ); if (value.length === 1) value = value[0]; } @@ -837,27 +862,31 @@ async function decodePage(cursor: Cursor, opts: Options): Promise { const headerSize = parquet_util.decodeThrift(pageHeader, cursor.buffer.subarray(cursor.offset)); cursor.offset += headerSize; - const pageType = parquet_util.getThriftEnum( - parquet_thrift.PageType, - pageHeader.type); + const pageType = parquet_util.getThriftEnum(parquet_thrift.PageType, pageHeader.type); switch (pageType) { case 'DATA_PAGE': if (!opts.rawStatistics) { - pageHeader.data_page_header!.statistics = decodeStatistics(pageHeader.data_page_header!.statistics!, opts.column!); + pageHeader.data_page_header!.statistics = decodeStatistics( + pageHeader.data_page_header!.statistics!, + opts.column! + ); } page = await decodeDataPage(cursor, pageHeader, opts); break; case 'DATA_PAGE_V2': if (!opts.rawStatistics) { - pageHeader.data_page_header_v2!.statistics = decodeStatistics(pageHeader.data_page_header_v2!.statistics!, opts.column!); + pageHeader.data_page_header_v2!.statistics = decodeStatistics( + pageHeader.data_page_header_v2!.statistics!, + opts.column! + ); } page = await decodeDataPageV2(cursor, pageHeader, opts); break; case 'DICTIONARY_PAGE': - const dict = await decodeDictionaryPage(cursor, pageHeader, opts) + const dict = await decodeDictionaryPage(cursor, pageHeader, opts); page = { - dictionary: dict + dictionary: dict, }; break; default: @@ -871,13 +900,12 @@ async function decodePage(cursor: Cursor, opts: Options): Promise { return page; } - async function decodePages(buffer: Buffer, opts: Options) { opts = opts || {}; let cursor = { buffer: buffer, offset: 0, - size: buffer.length + size: buffer.length, }; let data: PageData = { @@ -885,7 +913,7 @@ async function decodePages(buffer: Buffer, opts: Options) { dlevels: [], values: [], pageHeaders: [], - count: 0 + count: 0, }; while (cursor.offset < cursor.size && (!opts.num_values || data.dlevels!.length < opts.num_values)) { @@ -901,7 +929,7 @@ async function decodePages(buffer: Buffer, opts: Options) { // not (PLAIN for example). if (opts.dictionary && pageData.useDictionary) { - pageData.values = pageData.values!.map(d => opts.dictionary![d]); + pageData.values = pageData.values!.map((d) => opts.dictionary![d]); } let length = pageData.rlevels != undefined ? pageData.rlevels.length : 0; @@ -926,27 +954,32 @@ async function decodeDictionaryPage(cursor: Cursor, header: parquet_thrift.PageH let dictCursor = { offset: 0, - buffer: cursor.buffer.subarray(cursor.offset,cursorEnd), - size: cursorEnd - cursor.offset + buffer: cursor.buffer.subarray(cursor.offset, cursorEnd), + size: cursorEnd - cursor.offset, }; cursor.offset = cursorEnd; if (opts.compression && opts.compression !== 'UNCOMPRESSED') { let valuesBuf = await parquet_compression.inflate( - opts.compression, - dictCursor.buffer.subarray(dictCursor.offset,cursorEnd)); + opts.compression, + dictCursor.buffer.subarray(dictCursor.offset, cursorEnd) + ); dictCursor = { buffer: valuesBuf, offset: 0, - size: valuesBuf.length + size: valuesBuf.length, }; } - return decodeValues(opts.column!.primitiveType!, opts.column!.encoding!, dictCursor, (header.dictionary_page_header!).num_values, opts) - .map((d:Array) => d.toString()); - + return decodeValues( + opts.column!.primitiveType!, + opts.column!.encoding!, + dictCursor, + header.dictionary_page_header!.num_values, + opts + ).map((d: Array) => d.toString()); } async function decodeDataPage(cursor: Cursor, header: parquet_thrift.PageHeader, opts: Options) { @@ -955,55 +988,42 @@ async function decodeDataPage(cursor: Cursor, header: parquet_thrift.PageHeader, const dataPageHeader = header.data_page_header!; let valueCount = dataPageHeader.num_values; - let valueEncoding = parquet_util.getThriftEnum( - parquet_thrift.Encoding, - dataPageHeader.encoding); + let valueEncoding = parquet_util.getThriftEnum(parquet_thrift.Encoding, dataPageHeader.encoding); let valuesBufCursor = cursor; if (opts.compression && opts.compression !== 'UNCOMPRESSED') { let valuesBuf = await parquet_compression.inflate( - opts.compression, - cursor.buffer.subarray(cursor.offset, cursorEnd)); + opts.compression, + cursor.buffer.subarray(cursor.offset, cursorEnd) + ); valuesBufCursor = { buffer: valuesBuf, offset: 0, - size: valuesBuf.length + size: valuesBuf.length, }; } - - /* read repetition levels */ - let rLevelEncoding = parquet_util.getThriftEnum( - parquet_thrift.Encoding, - dataPageHeader.repetition_level_encoding); + let rLevelEncoding = parquet_util.getThriftEnum(parquet_thrift.Encoding, dataPageHeader.repetition_level_encoding); let rLevels = new Array(valueCount); if (opts.rLevelMax! > 0) { - rLevels = decodeValues( - PARQUET_RDLVL_TYPE, - rLevelEncoding as ParquetCodec, - valuesBufCursor, - valueCount, - { bitWidth: parquet_util.getBitWidth(opts.rLevelMax!) }); + rLevels = decodeValues(PARQUET_RDLVL_TYPE, rLevelEncoding as ParquetCodec, valuesBufCursor, valueCount, { + bitWidth: parquet_util.getBitWidth(opts.rLevelMax!), + }); } else { rLevels.fill(0); } /* read definition levels */ - let dLevelEncoding = parquet_util.getThriftEnum( - parquet_thrift.Encoding, - dataPageHeader.definition_level_encoding); + let dLevelEncoding = parquet_util.getThriftEnum(parquet_thrift.Encoding, dataPageHeader.definition_level_encoding); let dLevels = new Array(valueCount); if (opts.dLevelMax! > 0) { - dLevels = decodeValues( - PARQUET_RDLVL_TYPE, - dLevelEncoding as ParquetCodec, - valuesBufCursor, - valueCount, - { bitWidth: parquet_util.getBitWidth(opts.dLevelMax!) }); + dLevels = decodeValues(PARQUET_RDLVL_TYPE, dLevelEncoding as ParquetCodec, valuesBufCursor, valueCount, { + bitWidth: parquet_util.getBitWidth(opts.dLevelMax!), + }); } else { dLevels.fill(0); } @@ -1016,20 +1036,15 @@ async function decodeDataPage(cursor: Cursor, header: parquet_thrift.PageHeader, } } - let values = decodeValues( - opts.type!, - valueEncoding as ParquetCodec, - valuesBufCursor, - valueCountNonNull, - { - typeLength: opts.column!.typeLength!, - bitWidth: opts.column!.typeLength!, - disableEnvelope: opts.column!.disableEnvelope, - originalType: opts.column!.originalType, - precision: opts.column!.precision, - scale: opts.column!.scale, - name: opts.column!.name - }); + let values = decodeValues(opts.type!, valueEncoding as ParquetCodec, valuesBufCursor, valueCountNonNull, { + typeLength: opts.column!.typeLength!, + bitWidth: opts.column!.typeLength!, + disableEnvelope: opts.column!.disableEnvelope, + originalType: opts.column!.originalType, + precision: opts.column!.precision, + scale: opts.column!.scale, + name: opts.column!.name, + }); cursor.offset = cursorEnd; @@ -1038,7 +1053,7 @@ async function decodeDataPage(cursor: Cursor, header: parquet_thrift.PageHeader, rlevels: rLevels, values: values, count: valueCount, - useDictionary: valueEncoding === 'PLAIN_DICTIONARY' || valueEncoding === 'RLE_DICTIONARY' + useDictionary: valueEncoding === 'PLAIN_DICTIONARY' || valueEncoding === 'RLE_DICTIONARY', }; } @@ -1048,22 +1063,15 @@ async function decodeDataPageV2(cursor: Cursor, header: parquet_thrift.PageHeade const valueCount = dataPageHeaderV2.num_values; const valueCountNonNull = valueCount - dataPageHeaderV2.num_nulls; - const valueEncoding = parquet_util.getThriftEnum( - parquet_thrift.Encoding, - dataPageHeaderV2.encoding); + const valueEncoding = parquet_util.getThriftEnum(parquet_thrift.Encoding, dataPageHeaderV2.encoding); /* read repetition levels */ let rLevels = new Array(valueCount); if (opts.rLevelMax! > 0) { - rLevels = decodeValues( - PARQUET_RDLVL_TYPE, - PARQUET_RDLVL_ENCODING, - cursor, - valueCount, - { - bitWidth: parquet_util.getBitWidth(opts.rLevelMax!), - disableEnvelope: true - }); + rLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount, { + bitWidth: parquet_util.getBitWidth(opts.rLevelMax!), + disableEnvelope: true, + }); } else { rLevels.fill(0); } @@ -1071,15 +1079,10 @@ async function decodeDataPageV2(cursor: Cursor, header: parquet_thrift.PageHeade /* read definition levels */ let dLevels = new Array(valueCount); if (opts.dLevelMax! > 0) { - dLevels = decodeValues( - PARQUET_RDLVL_TYPE, - PARQUET_RDLVL_ENCODING, - cursor, - valueCount, - { - bitWidth: parquet_util.getBitWidth(opts.dLevelMax!), - disableEnvelope: true - }); + dLevels = decodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, cursor, valueCount, { + bitWidth: parquet_util.getBitWidth(opts.dLevelMax!), + disableEnvelope: true, + }); } else { dLevels.fill(0); } @@ -1089,44 +1092,37 @@ async function decodeDataPageV2(cursor: Cursor, header: parquet_thrift.PageHeade if (dataPageHeaderV2.is_compressed) { let valuesBuf = await parquet_compression.inflate( - opts.compression!, - cursor.buffer.subarray(cursor.offset, cursorEnd)); + opts.compression!, + cursor.buffer.subarray(cursor.offset, cursorEnd) + ); valuesBufCursor = { buffer: valuesBuf, offset: 0, - size: valuesBuf.length + size: valuesBuf.length, }; cursor.offset = cursorEnd; } - let values = decodeValues( - opts.type!, - valueEncoding as ParquetCodec, - valuesBufCursor, - valueCountNonNull, - { - bitWidth: opts.column!.typeLength!, - ...opts.column! - }); + let values = decodeValues(opts.type!, valueEncoding as ParquetCodec, valuesBufCursor, valueCountNonNull, { + bitWidth: opts.column!.typeLength!, + ...opts.column!, + }); return { dlevels: dLevels, rlevels: rLevels, values: values, count: valueCount, - useDictionary: valueEncoding === 'PLAIN_DICTIONARY' || valueEncoding === 'RLE_DICTIONARY' + useDictionary: valueEncoding === 'PLAIN_DICTIONARY' || valueEncoding === 'RLE_DICTIONARY', }; } function decodeSchema(schemaElements: Array) { - let schema: SchemaDefinition | FieldDefinition = {}; - schemaElements.forEach(schemaElement => { - - let repetitionType = parquet_util.getThriftEnum( - parquet_thrift.FieldRepetitionType, - schemaElement.repetition_type); + let schema: SchemaDefinition | FieldDefinition = {}; + schemaElements.forEach((schemaElement) => { + let repetitionType = parquet_util.getThriftEnum(parquet_thrift.FieldRepetitionType, schemaElement.repetition_type); let optional = false; let repeated = false; @@ -1139,35 +1135,34 @@ function decodeSchema(schemaElements: Array) { case 'REPEATED': repeated = true; break; - }; + } if (schemaElement.num_children != undefined && schemaElement.num_children > 0) { (schema as SchemaDefinition)[schemaElement.name] = { optional: optional, repeated: repeated, - fields: Object.create({},{ - /* define parent and num_children as non-enumerable */ - parent: { - value: schema, - enumerable: false - }, - num_children: { - value: schemaElement.num_children, - enumerable: false + fields: Object.create( + {}, + { + /* define parent and num_children as non-enumerable */ + parent: { + value: schema, + enumerable: false, + }, + num_children: { + value: schemaElement.num_children, + enumerable: false, + }, } - }) + ), }; /* move the schema pointer to the children */ schema = (schema as SchemaDefinition)[schemaElement.name].fields as SchemaDefinition; } else { - let logicalType = parquet_util.getThriftEnum( - parquet_thrift.Type, - schemaElement.type); + let logicalType = parquet_util.getThriftEnum(parquet_thrift.Type, schemaElement.type); if (schemaElement.converted_type != null) { - logicalType = parquet_util.getThriftEnum( - parquet_thrift.ConvertedType, - schemaElement.converted_type); + logicalType = parquet_util.getThriftEnum(parquet_thrift.ConvertedType, schemaElement.converted_type); } (schema as SchemaDefinition)[schemaElement.name] = { @@ -1176,7 +1171,7 @@ function decodeSchema(schemaElements: Array) { optional: optional, repeated: repeated, scale: schemaElement.scale, - precision: schemaElement.precision + precision: schemaElement.precision, }; } diff --git a/lib/schema.ts b/lib/schema.ts index adabe390..5671bea9 100644 --- a/lib/schema.ts +++ b/lib/schema.ts @@ -1,20 +1,19 @@ import * as parquet_codec from './codec'; -import * as parquet_compression from './compression' -import * as parquet_types from './types' -import { SchemaDefinition, ParquetField, RepetitionType, FieldDefinition } from './declare' -import { JSONSchema4 } from 'json-schema' +import * as parquet_compression from './compression'; +import * as parquet_types from './types'; +import { SchemaDefinition, ParquetField, RepetitionType, FieldDefinition } from './declare'; +import { JSONSchema4 } from 'json-schema'; import { fromJsonSchema } from './jsonSchema'; const PARQUET_COLUMN_KEY_SEPARATOR = '.'; - /** * A parquet file schema */ export class ParquetSchema { - schema: SchemaDefinition - fields: Record - fieldList: Array + schema: SchemaDefinition; + fields: Record; + fieldList: Array; /** * Create a new schema from JSON Schema (json-schema.org) @@ -38,14 +37,14 @@ export class ParquetSchema { */ findField(path: string | Array) { if (typeof path === 'string') { - path = path.split(","); + path = path.split(','); } else { path = path.slice(0); // clone array } let n = this.fields; for (; path.length > 1; path.shift()) { - let fields = n[path[0]]?.fields + let fields = n[path[0]]?.fields; if (isDefined(fields)) { n = fields; } @@ -59,7 +58,7 @@ export class ParquetSchema { */ findFieldBranch(path: string | Array) { if (typeof path === 'string') { - path = path.split(","); + path = path.split(','); } let branch = []; @@ -67,7 +66,7 @@ export class ParquetSchema { for (; path.length > 0; path.shift()) { branch.push(n[path[0]]); - let fields = n[path[0]].fields + let fields = n[path[0]].fields; if (path.length > 1 && isDefined(fields)) { n = fields; } @@ -75,10 +74,14 @@ export class ParquetSchema { return branch; } +} -}; - -function buildFields(schema: SchemaDefinition, rLevelParentMax?: number, dLevelParentMax?: number, path?: Array) { +function buildFields( + schema: SchemaDefinition, + rLevelParentMax?: number, + dLevelParentMax?: number, + path?: Array +) { if (!rLevelParentMax) { rLevelParentMax = 0; } @@ -129,11 +132,7 @@ function buildFields(schema: SchemaDefinition, rLevelParentMax?: number, dLevelP isNested: true, statistics: opts.statistics, fieldCount: Object.keys(opts.fields).length, - fields: buildFields( - opts.fields, - rLevelMax, - dLevelMax, - path.concat(name)) + fields: buildFields(opts.fields, rLevelMax, dLevelMax, path.concat(name)), }; if (opts.type == 'LIST' || opts.type == 'MAP') fieldList[name].originalType = opts.type; @@ -141,14 +140,14 @@ function buildFields(schema: SchemaDefinition, rLevelParentMax?: number, dLevelP continue; } - let nameWithPath = (`${name}` || 'missing name') + let nameWithPath = `${name}` || 'missing name'; if (path && path.length > 0) { - nameWithPath = `${path}.${nameWithPath}` + nameWithPath = `${path}.${nameWithPath}`; } const typeDef = opts.type ? parquet_types.getParquetTypeDataObject(opts.type, opts) : undefined; if (!typeDef) { - fieldErrors.push(`Invalid parquet type: ${(opts.type || "missing type")}, for Column: ${nameWithPath}`); + fieldErrors.push(`Invalid parquet type: ${opts.type || 'missing type'}, for Column: ${nameWithPath}`); continue; } @@ -171,8 +170,10 @@ function buildFields(schema: SchemaDefinition, rLevelParentMax?: number, dLevelP if (typeDef.originalType === 'DECIMAL') { // Default scale to 0 per https://github.com/apache/parquet-format/blob/master/LogicalTypes.md#decimal - if (typeof opts.scale === "undefined") opts.scale = 0; - fieldErrors = fieldErrors.concat(errorsForDecimalOpts(typeDef.originalType, typeDef.primitiveType, opts, nameWithPath)); + if (typeof opts.scale === 'undefined') opts.scale = 0; + fieldErrors = fieldErrors.concat( + errorsForDecimalOpts(typeDef.originalType, typeDef.primitiveType, opts, nameWithPath) + ); } /* add to schema */ @@ -189,7 +190,7 @@ function buildFields(schema: SchemaDefinition, rLevelParentMax?: number, dLevelP scale: opts.scale, typeLength: opts.typeLength || typeDef.typeLength, rLevelMax: rLevelMax, - dLevelMax: dLevelMax + dLevelMax: dLevelMax, }; } @@ -206,7 +207,7 @@ function listFields(fields: Record) { for (let k in fields) { list.push(fields[k]); - const nestedFields = fields[k].fields + const nestedFields = fields[k].fields; if (fields[k].isNested && isDefined(nestedFields)) { list = list.concat(listFields(nestedFields)); } @@ -219,37 +220,34 @@ function isDefined(val: T | undefined): val is T { return val !== undefined; } -function errorsForDecimalOpts(type: string, primitiveType: string | undefined, opts: FieldDefinition, columnName: string): string[] { - const fieldErrors = [] - if(opts.precision === undefined || opts.precision < 1) { +function errorsForDecimalOpts( + type: string, + primitiveType: string | undefined, + opts: FieldDefinition, + columnName: string +): string[] { + const fieldErrors = []; + if (opts.precision === undefined || opts.precision < 1) { fieldErrors.push( `invalid schema for type: ${type}, for Column: ${columnName}, precision is required and must be be greater than 0` ); - } - else if (!Number.isInteger(opts.precision)) { - fieldErrors.push( - `invalid schema for type: ${type}, for Column: ${columnName}, precision must be an integer` - ); - } - else if (primitiveType === "INT64" && opts.precision > 18) { + } else if (!Number.isInteger(opts.precision)) { + fieldErrors.push(`invalid schema for type: ${type}, for Column: ${columnName}, precision must be an integer`); + } else if (primitiveType === 'INT64' && opts.precision > 18) { fieldErrors.push( `invalid schema for type: ${type} and primitive type: ${primitiveType} for Column: ${columnName}, can not handle precision over 18` ); } - if (typeof opts.scale === "undefined" || opts.scale < 0) { + if (typeof opts.scale === 'undefined' || opts.scale < 0) { fieldErrors.push( `invalid schema for type: ${type}, for Column: ${columnName}, scale is required to be 0 or greater` ); - } - else if (!Number.isInteger(opts.scale)) { - fieldErrors.push( - `invalid schema for type: ${type}, for Column: ${columnName}, scale must be an integer` - ); - } - else if (opts.precision !== undefined && opts.scale > opts.precision) { + } else if (!Number.isInteger(opts.scale)) { + fieldErrors.push(`invalid schema for type: ${type}, for Column: ${columnName}, scale must be an integer`); + } else if (opts.precision !== undefined && opts.scale > opts.precision) { fieldErrors.push( `invalid schema or precision for type: ${type}, for Column: ${columnName}, precision must be greater than or equal to scale` ); } - return fieldErrors + return fieldErrors; } diff --git a/lib/shred.ts b/lib/shred.ts index 723a32a3..93ef78aa 100644 --- a/lib/shred.ts +++ b/lib/shred.ts @@ -1,5 +1,5 @@ -import * as parquet_types from './types' -import { ParquetSchema } from './schema' +import * as parquet_types from './types'; +import { ParquetSchema } from './schema'; import { Page, PageData, ParquetField } from './declare'; /** @@ -27,13 +27,13 @@ import { Page, PageData, ParquetField } from './declare'; */ export interface RecordBuffer { - columnData?: Record - rowCount?: number, - pageRowCount?: number, - pages?: Record + columnData?: Record; + rowCount?: number; + pageRowCount?: number; + pages?: Record; } -export const shredRecord = function(schema: ParquetSchema, record: Record, buffer: RecordBuffer) { +export const shredRecord = function (schema: ParquetSchema, record: Record, buffer: RecordBuffer) { /* shred the record, this may raise an exception */ var recordShredded: Record = {}; for (let field of schema.fieldList) { @@ -42,7 +42,7 @@ export const shredRecord = function(schema: ParquetSchema, record: Record buffer.columnData![path].distinct_values!.add(value)); + [...recordShredded[path].distinct_values!].forEach((value) => buffer.columnData![path].distinct_values!.add(value)); buffer.columnData![path].count! += recordShredded[path].count!; } }; -function shredRecordInternal(fields: Record, record: Record | null, data: Record, rlvl: number, dlvl: number) { +function shredRecordInternal( + fields: Record, + record: Record | null, + data: Record, + rlvl: number, + dlvl: number +) { for (let fieldName in fields) { const field = fields[fieldName]; const fieldType = field.originalType || field.primitiveType; - const path = field.path.join(',') + const path = field.path.join(','); // fetch values let values: Array = []; - if (record && (fieldName in record) && record[fieldName] !== undefined && record[fieldName] !== null) { + if (record && fieldName in record && record[fieldName] !== undefined && record[fieldName] !== null) { if (Array.isArray(record[fieldName])) { values = record[fieldName] as Array; - } else if(ArrayBuffer.isView(record[fieldName])) { // checks if any typed array + } else if (ArrayBuffer.isView(record[fieldName])) { + // checks if any typed array if (record[fieldName] instanceof Uint8Array) { // wrap in a buffer, since not supported by parquet_thrift values.push(Buffer.from(record[fieldName] as ArrayBuffer)); @@ -124,12 +131,7 @@ function shredRecordInternal(fields: Record, record: Recor // push null if (values.length == 0) { if (field.isNested && isDefined(field.fields)) { - shredRecordInternal( - field.fields, - null, - data, - rlvl, - dlvl); + shredRecordInternal(field.fields, null, data, rlvl, dlvl); } else { data[path].rlevels!.push(rlvl); data[path].dlevels!.push(dlvl); @@ -143,12 +145,7 @@ function shredRecordInternal(fields: Record, record: Recor const rlvl_i = i === 0 ? rlvl : field.rLevelMax; if (field.isNested && isDefined(field.fields)) { - shredRecordInternal( - field.fields, - values[i] as Record, - data, - rlvl_i, - field.dLevelMax); + shredRecordInternal(field.fields, values[i] as Record, data, rlvl_i, field.dLevelMax); } else { data[path].distinct_values!.add(values[i]); data[path].values!.push(parquet_types.toPrimitive(fieldType as string, values[i], field)); @@ -181,7 +178,11 @@ function shredRecordInternal(fields: Record, record: Recor * */ -export const materializeRecords = function(schema: ParquetSchema, buffer: RecordBuffer, records?: Array>) { +export const materializeRecords = function ( + schema: ParquetSchema, + buffer: RecordBuffer, + records?: Array> +) { if (!records) { records = []; } @@ -203,27 +204,31 @@ export const materializeRecords = function(schema: ParquetSchema, buffer: Record let value = null; if (dLevel === field.dLevelMax) { - value = parquet_types.fromPrimitive( - field.originalType || field.primitiveType, - values.next().value, - field); + value = parquet_types.fromPrimitive(field.originalType || field.primitiveType, values.next().value, field); } records[rLevels[0] - 1] = records[rLevels[0] - 1] || {}; materializeRecordField( - records[rLevels[0] - 1] as Record, - fieldBranch, - rLevels.slice(1), - dLevel, - value); + records[rLevels[0] - 1] as Record, + fieldBranch, + rLevels.slice(1), + dLevel, + value + ); } } return records; -} +}; -function materializeRecordField(record: Record, branch: Array, rLevels: Array, dLevel: number, value: Record) { +function materializeRecordField( + record: Record, + branch: Array, + rLevels: Array, + dLevel: number, + value: Record +) { const node = branch[0]; if (dLevel < node.dLevelMax) { @@ -233,39 +238,29 @@ function materializeRecordField(record: Record, branch: Array

1) { - if (node.repetitionType === "REPEATED") { + if (node.repetitionType === 'REPEATED') { if (!(node.name in record)) { record[node.name] = []; } - const recordValue = record[node.name] as Array> + const recordValue = record[node.name] as Array>; while (recordValue.length < rLevels[0] + 1) { recordValue.push({}); } - materializeRecordField( - recordValue[rLevels[0]], - branch.slice(1), - rLevels.slice(1), - dLevel, - value); + materializeRecordField(recordValue[rLevels[0]], branch.slice(1), rLevels.slice(1), dLevel, value); } else { record[node.name] = record[node.name] || {}; - const recordValue = record[node.name] as Record - materializeRecordField( - recordValue, - branch.slice(1), - rLevels, - dLevel, - value); + const recordValue = record[node.name] as Record; + materializeRecordField(recordValue, branch.slice(1), rLevels, dLevel, value); } } else { - if (node.repetitionType === "REPEATED") { + if (node.repetitionType === 'REPEATED') { if (!(node.name in record)) { record[node.name] = []; } - const recordValue = record[node.name] as Array | null> + const recordValue = record[node.name] as Array | null>; while (recordValue.length < rLevels[0] + 1) { recordValue.push(null); @@ -281,4 +276,3 @@ function materializeRecordField(record: Record, branch: Array

(val: T | undefined): val is T { return val !== undefined; } - diff --git a/lib/types.ts b/lib/types.ts index 868fa9a7..fcab2f08 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -1,47 +1,50 @@ 'use strict'; // Thanks to https://github.com/kbajalc/parquets for some of the code. -import { PrimitiveType, OriginalType, ParquetType, FieldDefinition, ParquetField } from "./declare"; -import { Options } from "./codec/types"; -import type { Document as BsonDocument } from "bson"; +import { PrimitiveType, OriginalType, ParquetType, FieldDefinition, ParquetField } from './declare'; +import { Options } from './codec/types'; +import type { Document as BsonDocument } from 'bson'; // BSON uses top level awaits, so use require for now const bsonSerialize = require('bson').serialize; const bsonDeserialize = require('bson').deserialize; type ParquetTypeDataObject = { - primitiveType?: PrimitiveType, - toPrimitive: Function, - fromPrimitive?: Function, - originalType?: OriginalType, - typeLength?: number + primitiveType?: PrimitiveType; + toPrimitive: Function; + fromPrimitive?: Function; + originalType?: OriginalType; + typeLength?: number; }; interface INTERVAL { - months: number, - days: number, - milliseconds: number + months: number; + days: number; + milliseconds: number; } -export function getParquetTypeDataObject(type: ParquetType, field?: ParquetField | Options | FieldDefinition): ParquetTypeDataObject { +export function getParquetTypeDataObject( + type: ParquetType, + field?: ParquetField | Options | FieldDefinition +): ParquetTypeDataObject { if (type === 'DECIMAL') { if (field?.typeLength !== undefined && field?.typeLength !== null) { return { primitiveType: 'FIXED_LEN_BYTE_ARRAY', originalType: 'DECIMAL', typeLength: field.typeLength, - toPrimitive: toPrimitive_FIXED_LEN_BYTE_ARRAY_DECIMAL + toPrimitive: toPrimitive_FIXED_LEN_BYTE_ARRAY_DECIMAL, }; } else if (field?.precision !== undefined && field?.precision !== null && field.precision > 18) { return { primitiveType: 'BYTE_ARRAY', originalType: 'DECIMAL', typeLength: field.typeLength, - toPrimitive: toPrimitive_BYTE_ARRAY_DECIMAL + toPrimitive: toPrimitive_BYTE_ARRAY_DECIMAL, }; } else { return { primitiveType: 'INT64', originalType: 'DECIMAL', - toPrimitive: toPrimitive_INT64 + toPrimitive: toPrimitive_INT64, }; } } else { @@ -78,150 +81,150 @@ const PARQUET_LOGICAL_TYPES = new Set([ 'BSON', 'INTERVAL', 'MAP', - 'LIST' -] satisfies ParquetType[]) + 'LIST', +] satisfies ParquetType[]); const PARQUET_LOGICAL_TYPE_DATA: { [logicalType: string]: ParquetTypeDataObject } = { - 'BOOLEAN': { + BOOLEAN: { primitiveType: 'BOOLEAN', toPrimitive: toPrimitive_BOOLEAN, - fromPrimitive: fromPrimitive_BOOLEAN + fromPrimitive: fromPrimitive_BOOLEAN, }, - 'INT32': { + INT32: { primitiveType: 'INT32', - toPrimitive: toPrimitive_INT32 + toPrimitive: toPrimitive_INT32, }, - 'INT64': { + INT64: { primitiveType: 'INT64', - toPrimitive: toPrimitive_INT64 + toPrimitive: toPrimitive_INT64, }, - 'INT96': { + INT96: { primitiveType: 'INT96', - toPrimitive: toPrimitive_INT96 + toPrimitive: toPrimitive_INT96, }, - 'FLOAT': { + FLOAT: { primitiveType: 'FLOAT', - toPrimitive: toPrimitive_FLOAT + toPrimitive: toPrimitive_FLOAT, }, - 'DOUBLE': { + DOUBLE: { primitiveType: 'DOUBLE', - toPrimitive: toPrimitive_DOUBLE + toPrimitive: toPrimitive_DOUBLE, }, - 'BYTE_ARRAY': { + BYTE_ARRAY: { primitiveType: 'BYTE_ARRAY', - toPrimitive: toPrimitive_BYTE_ARRAY + toPrimitive: toPrimitive_BYTE_ARRAY, }, - 'FIXED_LEN_BYTE_ARRAY': { + FIXED_LEN_BYTE_ARRAY: { primitiveType: 'FIXED_LEN_BYTE_ARRAY', - toPrimitive: toPrimitive_BYTE_ARRAY + toPrimitive: toPrimitive_BYTE_ARRAY, }, - 'UTF8': { + UTF8: { primitiveType: 'BYTE_ARRAY', originalType: 'UTF8', toPrimitive: toPrimitive_UTF8, - fromPrimitive: fromPrimitive_UTF8 + fromPrimitive: fromPrimitive_UTF8, }, - 'ENUM': { + ENUM: { primitiveType: 'BYTE_ARRAY', originalType: 'UTF8', toPrimitive: toPrimitive_UTF8, - fromPrimitive: fromPrimitive_UTF8 + fromPrimitive: fromPrimitive_UTF8, }, - 'TIME_MILLIS': { + TIME_MILLIS: { primitiveType: 'INT32', originalType: 'TIME_MILLIS', - toPrimitive: toPrimitive_TIME_MILLIS + toPrimitive: toPrimitive_TIME_MILLIS, }, - 'TIME_MICROS': { + TIME_MICROS: { primitiveType: 'INT64', originalType: 'TIME_MICROS', - toPrimitive: toPrimitive_TIME_MICROS + toPrimitive: toPrimitive_TIME_MICROS, }, - 'DATE': { + DATE: { primitiveType: 'INT32', originalType: 'DATE', toPrimitive: toPrimitive_DATE, - fromPrimitive: fromPrimitive_DATE + fromPrimitive: fromPrimitive_DATE, }, - 'TIMESTAMP_MILLIS': { + TIMESTAMP_MILLIS: { primitiveType: 'INT64', originalType: 'TIMESTAMP_MILLIS', toPrimitive: toPrimitive_TIMESTAMP_MILLIS, - fromPrimitive: fromPrimitive_TIMESTAMP_MILLIS + fromPrimitive: fromPrimitive_TIMESTAMP_MILLIS, }, - 'TIMESTAMP_MICROS': { + TIMESTAMP_MICROS: { primitiveType: 'INT64', originalType: 'TIMESTAMP_MICROS', toPrimitive: toPrimitive_TIMESTAMP_MICROS, - fromPrimitive: fromPrimitive_TIMESTAMP_MICROS + fromPrimitive: fromPrimitive_TIMESTAMP_MICROS, }, - 'UINT_8': { + UINT_8: { primitiveType: 'INT32', originalType: 'UINT_8', - toPrimitive: toPrimitive_UINT8 + toPrimitive: toPrimitive_UINT8, }, - 'UINT_16': { + UINT_16: { primitiveType: 'INT32', originalType: 'UINT_16', - toPrimitive: toPrimitive_UINT16 + toPrimitive: toPrimitive_UINT16, }, - 'UINT_32': { + UINT_32: { primitiveType: 'INT32', originalType: 'UINT_32', - toPrimitive: toPrimitive_UINT32 + toPrimitive: toPrimitive_UINT32, }, - 'UINT_64': { + UINT_64: { primitiveType: 'INT64', originalType: 'UINT_64', - toPrimitive: toPrimitive_UINT64 + toPrimitive: toPrimitive_UINT64, }, - 'INT_8': { + INT_8: { primitiveType: 'INT32', originalType: 'INT_8', - toPrimitive: toPrimitive_INT8 + toPrimitive: toPrimitive_INT8, }, - 'INT_16': { + INT_16: { primitiveType: 'INT32', originalType: 'INT_16', - toPrimitive: toPrimitive_INT16 + toPrimitive: toPrimitive_INT16, }, - 'INT_32': { + INT_32: { primitiveType: 'INT32', originalType: 'INT_32', - toPrimitive: toPrimitive_INT32 + toPrimitive: toPrimitive_INT32, }, - 'INT_64': { + INT_64: { primitiveType: 'INT64', originalType: 'INT_64', - toPrimitive: toPrimitive_INT64 + toPrimitive: toPrimitive_INT64, }, - 'JSON': { + JSON: { primitiveType: 'BYTE_ARRAY', originalType: 'JSON', toPrimitive: toPrimitive_JSON, - fromPrimitive: fromPrimitive_JSON + fromPrimitive: fromPrimitive_JSON, }, - 'BSON': { + BSON: { primitiveType: 'BYTE_ARRAY', originalType: 'BSON', toPrimitive: toPrimitive_BSON, - fromPrimitive: fromPrimitive_BSON + fromPrimitive: fromPrimitive_BSON, }, - 'INTERVAL': { + INTERVAL: { primitiveType: 'FIXED_LEN_BYTE_ARRAY', originalType: 'INTERVAL', typeLength: 12, toPrimitive: toPrimitive_INTERVAL, - fromPrimitive: fromPrimitive_INTERVAL + fromPrimitive: fromPrimitive_INTERVAL, }, MAP: { - originalType: 'MAP', - toPrimitive: toPrimitive_MAP, + originalType: 'MAP', + toPrimitive: toPrimitive_MAP, }, LIST: { - originalType: 'LIST', - toPrimitive: toPrimitive_LIST, - } + originalType: 'LIST', + toPrimitive: toPrimitive_LIST, + }, }; /** @@ -239,7 +242,7 @@ function isParquetType(type: string | undefined): type is ParquetType { */ export function toPrimitive(type: string | undefined, value: unknown, field?: ParquetField | Options) { if (!isParquetType(type)) { - throw 'invalid type: ' + type || "undefined"; + throw 'invalid type: ' + type || 'undefined'; } return getParquetTypeDataObject(type, field).toPrimitive(value); } @@ -250,12 +253,12 @@ export function toPrimitive(type: string | undefined, value: unknown, field?: Pa */ export function fromPrimitive(type: string | undefined, value: unknown, field?: ParquetField | Options) { if (!isParquetType(type)) { - throw 'invalid type: ' + type || "undefined"; + throw 'invalid type: ' + type || 'undefined'; } - const typeFromPrimitive = getParquetTypeDataObject(type, field).fromPrimitive + const typeFromPrimitive = getParquetTypeDataObject(type, field).fromPrimitive; if (typeFromPrimitive !== undefined) { - return typeFromPrimitive(value) + return typeFromPrimitive(value); } else { return value; } @@ -297,7 +300,7 @@ function toPrimitive_INT8(value: number | bigint | string) { return v; } catch { - throw 'invalid value for INT8: ' + value; + throw 'invalid value for INT8: ' + value; } } @@ -309,7 +312,7 @@ function toPrimitive_UINT8(value: number | bigint | string) { return v; } catch { - throw 'invalid value for UINT8: ' + value; + throw 'invalid value for UINT8: ' + value; } } @@ -321,7 +324,7 @@ function toPrimitive_INT16(value: number | bigint | string) { return v; } catch { - throw 'invalid value for INT16: ' + value; + throw 'invalid value for INT16: ' + value; } } @@ -333,7 +336,7 @@ function toPrimitive_UINT16(value: number | bigint | string) { return v; } catch { - throw 'invalid value for UINT16: ' + value; + throw 'invalid value for UINT16: ' + value; } } @@ -345,11 +348,10 @@ function toPrimitive_INT32(value: number | bigint | string) { return v; } catch { - throw 'invalid value for INT32: ' + value; + throw 'invalid value for INT32: ' + value; } } - function toPrimitive_UINT32(value: number | bigint | string) { try { let v = value; @@ -358,7 +360,7 @@ function toPrimitive_UINT32(value: number | bigint | string) { return v; } catch { - throw 'invalid value for UINT32: ' + value; + throw 'invalid value for UINT32: ' + value; } } @@ -373,7 +375,7 @@ function toPrimitive_INT64(value: number | bigint | string) { return v; } catch { - throw 'invalid value for INT64: ' + value; + throw 'invalid value for INT64: ' + value; } } @@ -387,7 +389,7 @@ function toPrimitive_UINT64(value: number | bigint | string) { return v; } catch { - throw 'invalid value for UINT64: ' + value; + throw 'invalid value for UINT64: ' + value; } } @@ -402,7 +404,7 @@ function toPrimitive_INT96(value: number | bigint | string) { return v; } catch { - throw 'invalid value for INT96: ' + value; + throw 'invalid value for INT96: ' + value; } } @@ -431,7 +433,7 @@ function toPrimitive_UTF8(value: string) { } function fromPrimitive_UTF8(value: string) { - return (value !== undefined && value !== null) ? value.toString() : value; + return value !== undefined && value !== null ? value.toString() : value; } function toPrimitive_JSON(value: object) { @@ -453,10 +455,10 @@ function fromPrimitive_BSON(value: Buffer) { function toNumberInternal(typeName: string, value: string | number): number { let numberValue = 0; switch (typeof value) { - case "string": + case 'string': numberValue = parseInt(value, 10); break; - case "number": + case 'number': numberValue = value; break; default: @@ -466,16 +468,16 @@ function toNumberInternal(typeName: string, value: string | number): number { if (numberValue < 0 || numberValue >= Number.MAX_SAFE_INTEGER) { throw `${typeName} value is out of bounds: ${numberValue}`; } - return numberValue + return numberValue; } function toPrimitive_TIME_MILLIS(value: string | number) { - return toNumberInternal("TIME_MILLIS", value); + return toNumberInternal('TIME_MILLIS', value); } function toPrimitive_TIME_MICROS(value: string | number | bigint) { const v = BigInt(value); - if (v < 0n ) { + if (v < 0n) { throw 'TIME_MICROS value is out of bounds: ' + value; } return v; @@ -488,20 +490,19 @@ function toPrimitive_DATE(value: string | Date | number) { if (value instanceof Date) { return value.getTime() / kMillisPerDay; } - return toNumberInternal("DATE", value ) + return toNumberInternal('DATE', value); } -function fromPrimitive_DATE(value: number ) { +function fromPrimitive_DATE(value: number) { return new Date(+value * kMillisPerDay); } - function toPrimitive_TIMESTAMP_MILLIS(value: string | Date | number) { /* convert from date */ if (value instanceof Date) { return value.getTime(); } - return toNumberInternal("TIMESTAMP_MILLIS", value); + return toNumberInternal('TIMESTAMP_MILLIS', value); } function fromPrimitive_TIMESTAMP_MILLIS(value: number | string | bigint) { @@ -529,13 +530,13 @@ function toPrimitive_TIMESTAMP_MICROS(value: Date | string | number | bigint) { } function fromPrimitive_TIMESTAMP_MICROS(value: number | bigint) { - if (typeof value === 'bigint') return new Date(Number(value / 1000n)); - return new Date(value / 1000); - } + if (typeof value === 'bigint') return new Date(Number(value / 1000n)); + return new Date(value / 1000); +} function toPrimitive_INTERVAL(value: INTERVAL) { if (!value.months || !value.days || !value.milliseconds) { - throw "value for INTERVAL must be object { months: ..., days: ..., milliseconds: ... }"; + throw 'value for INTERVAL must be object { months: ..., days: ..., milliseconds: ... }'; } let buf = Buffer.alloc(12); @@ -556,6 +557,6 @@ function fromPrimitive_INTERVAL(value: string) { function checkValidValue(lowerRange: number | bigint, upperRange: number | bigint, v: number | bigint) { if (v < lowerRange || v > upperRange) { - throw "invalid value" + throw 'invalid value'; } } diff --git a/lib/util.ts b/lib/util.ts index 08230bfb..0cf45453 100644 --- a/lib/util.ts +++ b/lib/util.ts @@ -1,13 +1,13 @@ -import { TTransportCallback } from "thrift"; -import thrift from "thrift" -import fs, { WriteStream } from 'fs' -import * as parquet_thrift from '../gen-nodejs/parquet_types' -import { FileMetaDataExt, WriterOptions } from './declare' -import { Int64 } from "thrift"; -import { type } from "os"; +import { TTransportCallback } from 'thrift'; +import thrift from 'thrift'; +import fs, { WriteStream } from 'fs'; +import * as parquet_thrift from '../gen-nodejs/parquet_types'; +import { FileMetaDataExt, WriterOptions } from './declare'; +import { Int64 } from 'thrift'; +import { type } from 'os'; // Use this so users only need to implement the minimal amount of the WriteStream interface -export type WriteStreamMinimal = Pick; +export type WriteStreamMinimal = Pick; /** * We need to patch Thrift's TFramedTransport class bc the TS type definitions @@ -15,57 +15,74 @@ export type WriteStreamMinimal = Pick; * one. */ class fixedTFramedTransport extends thrift.TFramedTransport { - inBuf: Buffer - readPos: number + inBuf: Buffer; + readPos: number; constructor(inBuf: Buffer) { - super(inBuf) - this.inBuf = inBuf - this.readPos = 0 + super(inBuf); + this.inBuf = inBuf; + this.readPos = 0; } } -type Enums = typeof parquet_thrift.Encoding | typeof parquet_thrift.FieldRepetitionType | typeof parquet_thrift.Type | typeof parquet_thrift.CompressionCodec | typeof parquet_thrift.PageType | typeof parquet_thrift.ConvertedType; - -type ThriftObject = FileMetaDataExt | parquet_thrift.PageHeader | parquet_thrift.ColumnMetaData | parquet_thrift.BloomFilterHeader | parquet_thrift.OffsetIndex | parquet_thrift.ColumnIndex | FileMetaDataExt; +type Enums = + | typeof parquet_thrift.Encoding + | typeof parquet_thrift.FieldRepetitionType + | typeof parquet_thrift.Type + | typeof parquet_thrift.CompressionCodec + | typeof parquet_thrift.PageType + | typeof parquet_thrift.ConvertedType; + +type ThriftObject = + | FileMetaDataExt + | parquet_thrift.PageHeader + | parquet_thrift.ColumnMetaData + | parquet_thrift.BloomFilterHeader + | parquet_thrift.OffsetIndex + | parquet_thrift.ColumnIndex + | FileMetaDataExt; /** Patch PageLocation to be three element array that has getters/setters - * for each of the properties (offset, compressed_page_size, first_row_index) - * This saves space considerably as we do not need to store the full variable - * names for every PageLocation - */ + * for each of the properties (offset, compressed_page_size, first_row_index) + * This saves space considerably as we do not need to store the full variable + * names for every PageLocation + */ const getterSetter = (index: number) => ({ - get: function(this: Array): number { return this[index]; }, - set: function(this: Array, value: number): number { return this[index] = value;} + get: function (this: Array): number { + return this[index]; + }, + set: function (this: Array, value: number): number { + return (this[index] = value); + }, }); -Object.defineProperty(parquet_thrift.PageLocation.prototype,'offset', getterSetter(0)); -Object.defineProperty(parquet_thrift.PageLocation.prototype,'compressed_page_size', getterSetter(1)); -Object.defineProperty(parquet_thrift.PageLocation.prototype,'first_row_index', getterSetter(2)); +Object.defineProperty(parquet_thrift.PageLocation.prototype, 'offset', getterSetter(0)); +Object.defineProperty(parquet_thrift.PageLocation.prototype, 'compressed_page_size', getterSetter(1)); +Object.defineProperty(parquet_thrift.PageLocation.prototype, 'first_row_index', getterSetter(2)); /** * Helper function that serializes a thrift object into a buffer */ -export const serializeThrift = function(obj: ThriftObject) { - let output:Array = [] +export const serializeThrift = function (obj: ThriftObject) { + let output: Array = []; - const callBack:TTransportCallback = function (buf: Buffer | undefined) { - output.push(buf as Buffer) - } + const callBack: TTransportCallback = function (buf: Buffer | undefined) { + output.push(buf as Buffer); + }; - let transport = new thrift.TBufferedTransport(undefined, callBack) + let transport = new thrift.TBufferedTransport(undefined, callBack); - let protocol = new thrift.TCompactProtocol(transport) + let protocol = new thrift.TCompactProtocol(transport); //@ts-ignore, https://issues.apache.org/jira/browse/THRIFT-3872 - obj.write(protocol) - transport.flush() + obj.write(protocol); + transport.flush(); - return Buffer.concat(output) -} + return Buffer.concat(output); +}; -export const decodeThrift = function(obj: ThriftObject, buf: Buffer, offset?: number) { +export const decodeThrift = function (obj: ThriftObject, buf: Buffer, offset?: number) { if (!offset) { - offset = 0 + offset = 0; } var transport = new fixedTFramedTransport(buf); @@ -74,23 +91,23 @@ export const decodeThrift = function(obj: ThriftObject, buf: Buffer, offset?: nu //@ts-ignore, https://issues.apache.org/jira/browse/THRIFT-3872 obj.read(protocol); return transport.readPos - offset; -} +}; /** * Get the number of bits required to store a given value */ -export const getBitWidth = function(val: number) { +export const getBitWidth = function (val: number) { if (val === 0) { return 0; } else { return Math.ceil(Math.log2(val + 1)); } -} +}; /** * FIXME not ideal that this is linear */ -export const getThriftEnum = function(klass: Enums, value: unknown) { +export const getThriftEnum = function (klass: Enums, value: unknown) { for (let k in klass) { if (klass[k] === value) { return k; @@ -98,9 +115,9 @@ export const getThriftEnum = function(klass: Enums, value: unknown) { } throw 'Invalid ENUM value'; -} +}; -export const fopen = function(filePath: string | Buffer | URL): Promise { +export const fopen = function (filePath: string | Buffer | URL): Promise { return new Promise((resolve, reject) => { fs.open(filePath, 'r', (err, fd) => { if (err) { @@ -108,11 +125,11 @@ export const fopen = function(filePath: string | Buffer | URL): Promise } else { resolve(fd); } - }) + }); }); -} +}; -export const fstat = function(filePath: string | Buffer | URL): Promise { +export const fstat = function (filePath: string | Buffer | URL): Promise { return new Promise((resolve, reject) => { fs.stat(filePath, (err, stat) => { if (err) { @@ -120,11 +137,11 @@ export const fstat = function(filePath: string | Buffer | URL): Promise { +export const fread = function (fd: number, position: number | null, length: number): Promise { let buffer = Buffer.alloc(length); return new Promise((resolve, reject) => { @@ -136,9 +153,9 @@ export const fread = function(fd: number, position: number | null, length: numbe } }); }); -} +}; -export const fclose = function(fd: number) { +export const fclose = function (fd: number) { return new Promise((resolve, reject) => { fs.close(fd, (err) => { if (err) { @@ -148,9 +165,9 @@ export const fclose = function(fd: number) { } }); }); -} +}; -export const oswrite = function(os: WriteStreamMinimal, buf: Buffer) { +export const oswrite = function (os: WriteStreamMinimal, buf: Buffer) { return new Promise((resolve, reject) => { os.write(buf, (err: Error | undefined | null) => { if (err) { @@ -160,9 +177,9 @@ export const oswrite = function(os: WriteStreamMinimal, buf: Buffer) { } }); }); -} +}; -export const osend = function(os: WriteStreamMinimal) { +export const osend = function (os: WriteStreamMinimal) { return new Promise((resolve, reject) => { os.end((err: Error) => { if (err) { @@ -172,23 +189,23 @@ export const osend = function(os: WriteStreamMinimal) { } }); }); -} +}; -export const osopen = function(path: string | Buffer | URL, opts?: WriterOptions): Promise { +export const osopen = function (path: string | Buffer | URL, opts?: WriterOptions): Promise { return new Promise((resolve, reject) => { let outputStream = fs.createWriteStream(path, opts); - outputStream.on('open', function(fd) { + outputStream.on('open', function (fd) { resolve(outputStream); }); - outputStream.on('error', function(err) { + outputStream.on('error', function (err) { reject(err); }); }); -} +}; -export const fieldIndexOf = function(arr: Array>, elem: Array) { +export const fieldIndexOf = function (arr: Array>, elem: Array) { for (let j = 0; j < arr.length; ++j) { if (arr[j].length !== elem.length) { continue; @@ -208,8 +225,8 @@ export const fieldIndexOf = function(arr: Array>, elem: Array { - return new Int64(int.valueOf()); + return new Int64(int.valueOf()); }; diff --git a/lib/writer.ts b/lib/writer.ts index 64b29812..ac005162 100644 --- a/lib/writer.ts +++ b/lib/writer.ts @@ -1,11 +1,11 @@ -import stream from 'stream' -import parquet_thrift, {ConvertedType} from '../gen-nodejs/parquet_types' -import * as parquet_shredder from './shred' -import * as parquet_util from './util' -import * as parquet_codec from './codec' -import * as parquet_compression from './compression' -import * as parquet_types from './types' -import * as bloomFilterWriter from "./bloomFilterIO/bloomFilterWriter" +import stream from 'stream'; +import parquet_thrift, { ConvertedType } from '../gen-nodejs/parquet_types'; +import * as parquet_shredder from './shred'; +import * as parquet_util from './util'; +import * as parquet_codec from './codec'; +import * as parquet_compression from './compression'; +import * as parquet_types from './types'; +import * as bloomFilterWriter from './bloomFilterIO/bloomFilterWriter'; import { WriterOptions, ParquetCodec, @@ -13,12 +13,12 @@ import { ColumnMetaDataExt, RowGroupExt, Page, - FieldDefinition -} from './declare' -import {Options} from './codec/types' -import {ParquetSchema} from './schema' -import Int64 from 'node-int64' -import SplitBlockBloomFilter from './bloom/sbbf' + FieldDefinition, +} from './declare'; +import { Options } from './codec/types'; +import { ParquetSchema } from './schema'; +import Int64 from 'node-int64'; +import SplitBlockBloomFilter from './bloom/sbbf'; /** * Parquet File Magic String @@ -48,7 +48,6 @@ const PARQUET_RDLVL_ENCODING = 'RLE'; * are written. */ export class ParquetWriter { - schema: ParquetSchema; envelopeWriter: ParquetEnvelopeWriter | null; rowBuffer: parquet_shredder.RecordBuffer; @@ -74,10 +73,7 @@ export class ParquetWriter { opts = {}; } - let envelopeWriter = await ParquetEnvelopeWriter.openStream( - schema, - outputStream, - opts); + let envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts); return new ParquetWriter(schema, envelopeWriter, opts); } @@ -114,7 +110,7 @@ export class ParquetWriter { const options = { useDataPageV2: this.envelopeWriter.useDataPageV2, - bloomFilters: this.envelopeWriter.bloomFilters + bloomFilters: this.envelopeWriter.bloomFilters, }; if (this.rowBuffer.pageRowCount! >= this.envelopeWriter.pageSize) { await encodePages(this.schema, this.rowBuffer, options); @@ -142,13 +138,15 @@ export class ParquetWriter { if (this.envelopeWriter) { if (this.rowBuffer.rowCount! > 0 || this.rowBuffer.rowCount! >= this.rowGroupSize) { - await encodePages(this.schema, this.rowBuffer, {useDataPageV2: this.envelopeWriter.useDataPageV2, bloomFilters: this.envelopeWriter.bloomFilters}); + await encodePages(this.schema, this.rowBuffer, { + useDataPageV2: this.envelopeWriter.useDataPageV2, + bloomFilters: this.envelopeWriter.bloomFilters, + }); await this.envelopeWriter.writeRowGroup(this.rowBuffer); this.rowBuffer = {}; } - await this.envelopeWriter.writeBloomFilters(); await this.envelopeWriter.writeIndex(); await this.envelopeWriter.writeFooter(this.userMetadata); @@ -185,7 +183,6 @@ export class ParquetWriter { setPageSize(cnt: number) { this.envelopeWriter!.setPageSize(cnt); } - } /** @@ -195,16 +192,16 @@ export class ParquetWriter { * called in the correct order to produce a valid file. */ export class ParquetEnvelopeWriter { - schema: ParquetSchema + schema: ParquetSchema; write: Function; close: Function; - offset: Int64 - rowCount: Int64 - rowGroups: RowGroupExt[] + offset: Int64; + rowCount: Int64; + rowGroups: RowGroupExt[]; pageSize: number; useDataPageV2: boolean; pageIndex: boolean; - bloomFilters: Record // TODO: OR filterCollection + bloomFilters: Record; // TODO: OR filterCollection /** * Create a new parquet envelope writer that writes to the specified stream @@ -223,17 +220,17 @@ export class ParquetEnvelopeWriter { this.rowCount = new Int64(0); this.rowGroups = []; this.pageSize = opts.pageSize || PARQUET_DEFAULT_PAGE_SIZE; - this.useDataPageV2 = ("useDataPageV2" in opts) ? opts.useDataPageV2! : true; + this.useDataPageV2 = 'useDataPageV2' in opts ? opts.useDataPageV2! : true; this.pageIndex = opts.pageIndex!; this.bloomFilters = {}; - (opts.bloomFilters || []).forEach(bloomOption => { - this.bloomFilters[bloomOption.column] = bloomFilterWriter.createSBBF(bloomOption) + (opts.bloomFilters || []).forEach((bloomOption) => { + this.bloomFilters[bloomOption.column] = bloomFilterWriter.createSBBF(bloomOption); }); } writeSection(buf: Buffer) { - this.offset.setValue(this.offset.valueOf() + buf.length) + this.offset.setValue(this.offset.valueOf() + buf.length); return this.write(buf); } @@ -249,15 +246,12 @@ export class ParquetEnvelopeWriter { * shredRecord method */ async writeRowGroup(records: parquet_shredder.RecordBuffer) { - let rgroup = await encodeRowGroup( - this.schema, - records, - { - baseOffset: this.offset, - pageSize: this.pageSize, - useDataPageV2: this.useDataPageV2, - pageIndex: this.pageIndex - }); + let rgroup = await encodeRowGroup(this.schema, records, { + baseOffset: this.offset, + pageSize: this.pageSize, + useDataPageV2: this.useDataPageV2, + pageIndex: this.pageIndex, + }); this.rowCount.setValue(this.rowCount.valueOf() + records.rowCount!); this.rowGroups.push(rgroup.metadata); @@ -265,18 +259,17 @@ export class ParquetEnvelopeWriter { } writeBloomFilters() { - this.rowGroups.forEach(group => { - group.columns.forEach(column => { + this.rowGroups.forEach((group) => { + group.columns.forEach((column) => { if (!column.meta_data?.path_in_schema.length) { - return + return; } const filterName = column.meta_data?.path_in_schema.join(','); if (!(filterName in this.bloomFilters)) { return; } - const serializedBloomFilterData = - bloomFilterWriter.getSerializedBloomFilterData(this.bloomFilters[filterName]); + const serializedBloomFilterData = bloomFilterWriter.getSerializedBloomFilterData(this.bloomFilters[filterName]); bloomFilterWriter.setFilterOffset(column, this.offset); @@ -290,7 +283,7 @@ export class ParquetEnvelopeWriter { */ writeIndex() { this.schema.fieldList.forEach((c, i) => { - this.rowGroups.forEach(group => { + this.rowGroups.forEach((group) => { let column = group.columns[i]; if (!column) return; @@ -325,9 +318,8 @@ export class ParquetEnvelopeWriter { throw 'cannot write parquet file with zero fieldList'; } - return this.writeSection( - encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata)); - }; + return this.writeSection(encodeFooter(this.schema, this.rowCount, this.rowGroups, userMetadata)); + } /** * Set the parquet data page size. The data page size controls the maximum @@ -336,37 +328,35 @@ export class ParquetEnvelopeWriter { setPageSize(cnt: number) { this.pageSize = cnt; } - } /** * Create a parquet transform stream */ export class ParquetTransformer extends stream.Transform { - writer: ParquetWriter; constructor(schema: ParquetSchema, opts = {}) { - super({objectMode: true}); + super({ objectMode: true }); let writeProxy = (function (t) { return function (b: unknown) { t.push(b); - } + }; })(this); this.writer = new ParquetWriter( schema, - new ParquetEnvelopeWriter(schema, writeProxy, function () { - }, new Int64(0), opts), - opts); + new ParquetEnvelopeWriter(schema, writeProxy, function () {}, new Int64(0), opts), + opts + ); } _transform(row: Record, _encoding: string, callback: Function) { if (row) { this.writer.appendRow(row).then( - data => callback(null, data), - err => { + (data) => callback(null, data), + (err) => { const fullErr = new Error(`Error transforming to parquet: ${err.toString()} row:${row}`); fullErr.message = err; callback(fullErr); @@ -378,10 +368,8 @@ export class ParquetTransformer extends stream.Transform { } _flush(callback: (foo: any, bar?: any) => any) { - this.writer.close() - .then(d => callback(null, d), callback); + this.writer.close().then((d) => callback(null, d), callback); } - } /** @@ -410,8 +398,10 @@ function encodeStatisticsValue(value: any, column: ParquetField | Options) { function encodeStatistics(statistics: parquet_thrift.Statistics, column: ParquetField | Options) { statistics = Object.assign({}, statistics); - statistics.min_value = statistics.min_value === undefined ? null : encodeStatisticsValue(statistics.min_value, column); - statistics.max_value = statistics.max_value === undefined ? null : encodeStatisticsValue(statistics.max_value, column); + statistics.min_value = + statistics.min_value === undefined ? null : encodeStatisticsValue(statistics.min_value, column); + statistics.max_value = + statistics.max_value === undefined ? null : encodeStatisticsValue(statistics.max_value, column); statistics.max = statistics.max_value; statistics.min = statistics.min_value; @@ -419,7 +409,12 @@ function encodeStatistics(statistics: parquet_thrift.Statistics, column: Parquet return new parquet_thrift.Statistics(statistics); } -async function encodePages(schema: ParquetSchema, rowBuffer: parquet_shredder.RecordBuffer, opts: { bloomFilters: Record, useDataPageV2: boolean }) {// generic +async function encodePages( + schema: ParquetSchema, + rowBuffer: parquet_shredder.RecordBuffer, + opts: { bloomFilters: Record; useDataPageV2: boolean } +) { + // generic if (!rowBuffer.pageRowCount) { return; } @@ -434,9 +429,9 @@ async function encodePages(schema: ParquetSchema, rowBuffer: parquet_shredder.Re const columnPath = field.path.join(','); const values = rowBuffer.columnData![columnPath]; - if (opts.bloomFilters && (columnPath in opts.bloomFilters)) { + if (opts.bloomFilters && columnPath in opts.bloomFilters) { const splitBlockBloomFilter = opts.bloomFilters[columnPath]; - values.values!.forEach(v => splitBlockBloomFilter.insert(v)); + values.values!.forEach((v) => splitBlockBloomFilter.insert(v)); } let statistics: parquet_thrift.Statistics = {}; @@ -462,14 +457,10 @@ async function encodePages(schema: ParquetSchema, rowBuffer: parquet_shredder.Re values.values!, values.rlevels!, values.dlevels!, - statistics!); + statistics! + ); } else { - page = await encodeDataPage( - field, - values.values || [], - values.rlevels || [], - values.dlevels || [], - statistics!); + page = await encodeDataPage(field, values.values || [], values.rlevels || [], values.dlevels || [], statistics!); } let pages = rowBuffer.pages![field.path.join(',')]; @@ -480,10 +471,9 @@ async function encodePages(schema: ParquetSchema, rowBuffer: parquet_shredder.Re statistics, first_row_index, distinct_values: values.distinct_values!, - num_values: values.dlevels!.length + num_values: values.dlevels!.length, }); - values.distinct_values = new Set(); values.values = []; values.rlevels = []; @@ -497,45 +487,42 @@ async function encodePages(schema: ParquetSchema, rowBuffer: parquet_shredder.Re /** * Encode a parquet data page */ -async function encodeDataPage(column: ParquetField, values: number[], rlevels: number[], dlevels: number[], statistics: parquet_thrift.Statistics) { +async function encodeDataPage( + column: ParquetField, + values: number[], + rlevels: number[], + dlevels: number[], + statistics: parquet_thrift.Statistics +) { /* encode values */ - let valuesBuf = encodeValues( - column.primitiveType!, - column.encoding!, - values, { - bitWidth: column.typeLength, - ...column - }); + let valuesBuf = encodeValues(column.primitiveType!, column.encoding!, values, { + bitWidth: column.typeLength, + ...column, + }); /* encode repetition and definition levels */ let rLevelsBuf = Buffer.alloc(0); if (column.rLevelMax > 0) { - rLevelsBuf = encodeValues( - PARQUET_RDLVL_TYPE, - PARQUET_RDLVL_ENCODING, - rlevels, - {bitWidth: parquet_util.getBitWidth(column.rLevelMax)}); + rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, rlevels, { + bitWidth: parquet_util.getBitWidth(column.rLevelMax), + }); } let dLevelsBuf = Buffer.alloc(0); if (column.dLevelMax > 0) { - dLevelsBuf = encodeValues( - PARQUET_RDLVL_TYPE, - PARQUET_RDLVL_ENCODING, - dlevels, - {bitWidth: parquet_util.getBitWidth(column.dLevelMax)}); + dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, dlevels, { + bitWidth: parquet_util.getBitWidth(column.dLevelMax), + }); } /* build page header */ let pageBody = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]); - pageBody = await parquet_compression.deflate( - column.compression!, - pageBody); + pageBody = await parquet_compression.deflate(column.compression!, pageBody); let pageHeader = new parquet_thrift.PageHeader(); pageHeader.type = parquet_thrift.PageType['DATA_PAGE']; pageHeader.uncompressed_page_size = rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length; - pageHeader.compressed_page_size = pageBody.length + pageHeader.compressed_page_size = pageBody.length; pageHeader.data_page_header = new parquet_thrift.DataPageHeader(); pageHeader.data_page_header.num_values = dlevels.length; if (column.statistics !== false) { @@ -543,10 +530,8 @@ async function encodeDataPage(column: ParquetField, values: number[], rlevels: n } pageHeader.data_page_header.encoding = parquet_thrift.Encoding[column.encoding!]; - pageHeader.data_page_header.definition_level_encoding = - parquet_thrift.Encoding[PARQUET_RDLVL_ENCODING]; - pageHeader.data_page_header.repetition_level_encoding = - parquet_thrift.Encoding[PARQUET_RDLVL_ENCODING]; + pageHeader.data_page_header.definition_level_encoding = parquet_thrift.Encoding[PARQUET_RDLVL_ENCODING]; + pageHeader.data_page_header.repetition_level_encoding = parquet_thrift.Encoding[PARQUET_RDLVL_ENCODING]; /* concat page header, repetition and definition levels and values */ return Buffer.concat([parquet_util.serializeThrift(pageHeader), pageBody]); @@ -555,41 +540,37 @@ async function encodeDataPage(column: ParquetField, values: number[], rlevels: n /** * Encode a parquet data page (v2) */ -async function encodeDataPageV2(column: ParquetField, rowCount: number, values: number[], rlevels: number[], dlevels: number[], statistics: parquet_thrift.Statistics) { +async function encodeDataPageV2( + column: ParquetField, + rowCount: number, + values: number[], + rlevels: number[], + dlevels: number[], + statistics: parquet_thrift.Statistics +) { /* encode values */ - let valuesBuf = encodeValues( - column.primitiveType!, - column.encoding!, - values, { - bitWidth: column.typeLength, - ...column, - }); + let valuesBuf = encodeValues(column.primitiveType!, column.encoding!, values, { + bitWidth: column.typeLength, + ...column, + }); - let valuesBufCompressed = await parquet_compression.deflate( - column.compression!, - valuesBuf); + let valuesBufCompressed = await parquet_compression.deflate(column.compression!, valuesBuf); /* encode repetition and definition levels */ let rLevelsBuf = Buffer.alloc(0); if (column.rLevelMax > 0) { - rLevelsBuf = encodeValues( - PARQUET_RDLVL_TYPE, - PARQUET_RDLVL_ENCODING, - rlevels, { - bitWidth: parquet_util.getBitWidth(column.rLevelMax), - disableEnvelope: true - }); + rLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, rlevels, { + bitWidth: parquet_util.getBitWidth(column.rLevelMax), + disableEnvelope: true, + }); } let dLevelsBuf = Buffer.alloc(0); if (column.dLevelMax > 0) { - dLevelsBuf = encodeValues( - PARQUET_RDLVL_TYPE, - PARQUET_RDLVL_ENCODING, - dlevels, { - bitWidth: parquet_util.getBitWidth(column.dLevelMax), - disableEnvelope: true - }); + dLevelsBuf = encodeValues(PARQUET_RDLVL_TYPE, PARQUET_RDLVL_ENCODING, dlevels, { + bitWidth: parquet_util.getBitWidth(column.dLevelMax), + disableEnvelope: true, + }); } /* build page header */ @@ -604,33 +585,35 @@ async function encodeDataPageV2(column: ParquetField, rowCount: number, values: pageHeader.data_page_header_v2.statistics = encodeStatistics(statistics, column); } - pageHeader.uncompressed_page_size = - rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length; + pageHeader.uncompressed_page_size = rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length; - pageHeader.compressed_page_size = - rLevelsBuf.length + dLevelsBuf.length + valuesBufCompressed.length; + pageHeader.compressed_page_size = rLevelsBuf.length + dLevelsBuf.length + valuesBufCompressed.length; pageHeader.data_page_header_v2.encoding = parquet_thrift.Encoding[column.encoding!]; pageHeader.data_page_header_v2.definition_levels_byte_length = dLevelsBuf.length; pageHeader.data_page_header_v2.repetition_levels_byte_length = rLevelsBuf.length; - pageHeader.data_page_header_v2.is_compressed = - column.compression !== 'UNCOMPRESSED'; + pageHeader.data_page_header_v2.is_compressed = column.compression !== 'UNCOMPRESSED'; /* concat page header, repetition and definition levels and values */ - return Buffer.concat([ - parquet_util.serializeThrift(pageHeader), - rLevelsBuf, - dLevelsBuf, - valuesBufCompressed]); + return Buffer.concat([parquet_util.serializeThrift(pageHeader), rLevelsBuf, dLevelsBuf, valuesBufCompressed]); } - /** * Encode an array of values into a parquet column chunk */ -async function encodeColumnChunk(pages: Page[], opts: { column: ParquetField, baseOffset: number, pageSize: number, rowCount: number, useDataPageV2: boolean, pageIndex: boolean }) { - let pagesBuf = Buffer.concat(pages.map(d => d.page)); +async function encodeColumnChunk( + pages: Page[], + opts: { + column: ParquetField; + baseOffset: number; + pageSize: number; + rowCount: number; + useDataPageV2: boolean; + pageIndex: boolean; + } +) { + let pagesBuf = Buffer.concat(pages.map((d) => d.page)); let num_values = pages.reduce((p, d) => p + d.num_values, 0); let offset = opts.baseOffset; @@ -709,7 +692,7 @@ async function encodeColumnChunk(pages: Page[], opts: { column: ParquetField, ba /* concat metadata header and data pages */ let metadataOffset = opts.baseOffset + pagesBuf.length; let body = Buffer.concat([pagesBuf, parquet_util.serializeThrift(metadata)]); - return {body, metadata, metadataOffset}; + return { body, metadata, metadataOffset }; } /** @@ -727,34 +710,37 @@ async function encodeRowGroup(schema: ParquetSchema, data: parquet_shredder.Reco continue; } - let cchunkData = await encodeColumnChunk( - data.pages![field.path.join(',')], - { - column: field, - baseOffset: opts.baseOffset!.valueOf() + body.length, - pageSize: opts.pageSize || 0, - rowCount: data.rowCount || 0, - useDataPageV2: opts.useDataPageV2 ?? true, - pageIndex: opts.pageIndex ?? true - }); + let cchunkData = await encodeColumnChunk(data.pages![field.path.join(',')], { + column: field, + baseOffset: opts.baseOffset!.valueOf() + body.length, + pageSize: opts.pageSize || 0, + rowCount: data.rowCount || 0, + useDataPageV2: opts.useDataPageV2 ?? true, + pageIndex: opts.pageIndex ?? true, + }); let cchunk = new parquet_thrift.ColumnChunk(); cchunk.file_offset = new Int64(cchunkData.metadataOffset); cchunk.meta_data = cchunkData.metadata; metadata.columns.push(cchunk); - metadata.total_byte_size = new Int64(metadata.total_byte_size.valueOf() + (cchunkData.body.length)); + metadata.total_byte_size = new Int64(metadata.total_byte_size.valueOf() + cchunkData.body.length); body = Buffer.concat([body, cchunkData.body]); } - return {body, metadata}; + return { body, metadata }; } /** * Encode a parquet file metadata footer */ -function encodeFooter(schema: ParquetSchema, rowCount: Int64, rowGroups: RowGroupExt[], userMetadata: Record) { - let metadata = new parquet_thrift.FileMetaData() +function encodeFooter( + schema: ParquetSchema, + rowCount: Int64, + rowGroups: RowGroupExt[], + userMetadata: Record +) { + let metadata = new parquet_thrift.FileMetaData(); metadata.version = PARQUET_VERSION; metadata.created_by = '@dsnp/parquetjs'; metadata.num_rows = rowCount; @@ -763,7 +749,7 @@ function encodeFooter(schema: ParquetSchema, rowCount: Int64, rowGroups: RowGrou metadata.key_value_metadata = []; for (let k in userMetadata) { - let kv = new parquet_thrift.KeyValue() + let kv = new parquet_thrift.KeyValue(); kv.key = k; kv.value = userMetadata[k]; metadata.key_value_metadata.push(kv); @@ -793,7 +779,7 @@ function encodeFooter(schema: ParquetSchema, rowCount: Int64, rowGroups: RowGrou // Support Decimal switch (schemaElem.converted_type) { - case (ConvertedType.DECIMAL): + case ConvertedType.DECIMAL: schemaElem.precision = field.precision; schemaElem.scale = field.scale || 0; break; diff --git a/parquet.ts b/parquet.ts index c8b6a0f2..d0e2cc33 100644 --- a/parquet.ts +++ b/parquet.ts @@ -21,12 +21,12 @@ export const ParquetShredder = shredder; export const ParquetFieldBuilder = fields; export default { - ParquetEnvelopeReader, - ParquetReader, - ParquetEnvelopeWriter, - ParquetWriter, - ParquetTransformer, - ParquetSchema, - ParquetShredder, - ParquetFieldBuilder, -} + ParquetEnvelopeReader, + ParquetReader, + ParquetEnvelopeWriter, + ParquetWriter, + ParquetTransformer, + ParquetSchema, + ParquetShredder, + ParquetFieldBuilder, +}; diff --git a/test/bloomFilterIntegration.ts b/test/bloomFilterIntegration.ts index 178c245b..9b4a428d 100644 --- a/test/bloomFilterIntegration.ts +++ b/test/bloomFilterIntegration.ts @@ -1,31 +1,33 @@ -import {assert} from "chai"; -import parquet from "../parquet"; +import { assert } from 'chai'; +import parquet from '../parquet'; -import parquet_thrift from "../gen-nodejs/parquet_types"; -import {decodeThrift} from "../lib/util"; -import SplitBlockBloomFilter from "../lib/bloom/sbbf"; +import parquet_thrift from '../gen-nodejs/parquet_types'; +import { decodeThrift } from '../lib/util'; +import SplitBlockBloomFilter from '../lib/bloom/sbbf'; const TEST_VTIME = new Date(); -const TEST_FILE = 'fruits-bloomfilter.parquet' +const TEST_FILE = 'fruits-bloomfilter.parquet'; type BloomFilterColumnData = { - sbbf: SplitBlockBloomFilter, - columnName: string, - rowGroupIndex: number, -} + sbbf: SplitBlockBloomFilter; + columnName: string; + rowGroupIndex: number; +}; const sampleColumnHeaders = async (filename: string) => { let reader = await parquet.ParquetReader.openFile(filename); - let column = reader.metadata!.row_groups[0].columns[0]; - let buffer = await reader!.envelopeReader!.read(+column!.meta_data!.data_page_offset, +column!.meta_data!.total_compressed_size); + let buffer = await reader!.envelopeReader!.read( + +column!.meta_data!.data_page_offset, + +column!.meta_data!.total_compressed_size + ); let cursor = { buffer: buffer, offset: 0, - size: buffer.length + size: buffer.length, }; const pages = []; @@ -36,108 +38,108 @@ const sampleColumnHeaders = async (filename: string) => { pages.push(pageHeader); cursor.offset += pageHeader.compressed_page_size; } - return {column, pages}; -} + return { column, pages }; +}; -describe("bloom filter", async function () { +describe('bloom filter', async function () { let row: any; let reader: any; let bloomFilters: Record>; - describe("a nested schema", () => { + describe('a nested schema', () => { const schema = new parquet.ParquetSchema({ - name: {type: "UTF8"}, - quantity: {type: "INT64", optional: true}, - price: {type: "DOUBLE"}, - date: {type: "TIMESTAMP_MICROS"}, - day: {type: "DATE"}, - finger: {type: "FIXED_LEN_BYTE_ARRAY", typeLength: 5}, - inter: {type: "INTERVAL", statistics: false}, + name: { type: 'UTF8' }, + quantity: { type: 'INT64', optional: true }, + price: { type: 'DOUBLE' }, + date: { type: 'TIMESTAMP_MICROS' }, + day: { type: 'DATE' }, + finger: { type: 'FIXED_LEN_BYTE_ARRAY', typeLength: 5 }, + inter: { type: 'INTERVAL', statistics: false }, stock: { repeated: true, fields: { - quantity: {type: "INT64", repeated: true}, - warehouse: {type: "UTF8"}, + quantity: { type: 'INT64', repeated: true }, + warehouse: { type: 'UTF8' }, }, }, - colour: {type: "UTF8", repeated: true}, - meta_json: {type: "BSON", optional: true, statistics: false}, + colour: { type: 'UTF8', repeated: true }, + meta_json: { type: 'BSON', optional: true, statistics: false }, }); before(async function () { const options = { pageSize: 3, bloomFilters: [ { - column: "name", + column: 'name', numFilterBytes: 1024, }, { - column: "quantity", + column: 'quantity', numFilterBytes: 1024, }, { - column: "stock,warehouse", + column: 'stock,warehouse', numFilterBytes: 1024, - } + }, ], }; let writer = await parquet.ParquetWriter.openFile(schema, TEST_FILE, options); await writer.appendRow({ - name: "apples", + name: 'apples', quantity: BigInt(10), price: 2.6, - day: new Date("2017-11-26"), + day: new Date('2017-11-26'), date: new Date(TEST_VTIME.valueOf() + 1000), - finger: "FNORD", - inter: {months: 10, days: 5, milliseconds: 777}, - colour: ["green", "red"], + finger: 'FNORD', + inter: { months: 10, days: 5, milliseconds: 777 }, + colour: ['green', 'red'], }); await writer.appendRow({ - name: "oranges", + name: 'oranges', quantity: BigInt(20), price: 2.7, - day: new Date("2018-03-03"), + day: new Date('2018-03-03'), date: new Date(TEST_VTIME.valueOf() + 2000), - finger: "ABCDE", - inter: {months: 42, days: 23, milliseconds: 777}, - colour: ["orange"], + finger: 'ABCDE', + inter: { months: 42, days: 23, milliseconds: 777 }, + colour: ['orange'], }); await writer.appendRow({ - name: "kiwi", + name: 'kiwi', price: 4.2, quantity: BigInt(15), - day: new Date("2008-11-26"), + day: new Date('2008-11-26'), date: new Date(TEST_VTIME.valueOf() + 8000), - finger: "XCVBN", - inter: {months: 60, days: 1, milliseconds: 99}, + finger: 'XCVBN', + inter: { months: 60, days: 1, milliseconds: 99 }, stock: [ - {quantity: BigInt(42), warehouse: "f"}, - {quantity: BigInt(21), warehouse: "x"}, + { quantity: BigInt(42), warehouse: 'f' }, + { quantity: BigInt(21), warehouse: 'x' }, ], - colour: ["green", "brown", "yellow"], - meta_json: {expected_ship_date: TEST_VTIME.valueOf()}, + colour: ['green', 'brown', 'yellow'], + meta_json: { expected_ship_date: TEST_VTIME.valueOf() }, }); await writer.appendRow({ - name: "banana", + name: 'banana', price: 3.2, - day: new Date("2017-11-26"), + day: new Date('2017-11-26'), date: new Date(TEST_VTIME.valueOf() + 6000), - finger: "FNORD", - inter: {months: 1, days: 15, milliseconds: 888}, - colour: ["yellow"], - meta_json: {shape: "curved"}, + finger: 'FNORD', + inter: { months: 1, days: 15, milliseconds: 888 }, + colour: ['yellow'], + meta_json: { shape: 'curved' }, }); await writer.close(); reader = await parquet.ParquetReader.openFile(TEST_FILE); row = reader.metadata.row_groups[0]; - bloomFilters = await reader.getBloomFiltersFor(["name", "quantity", "stock,warehouse"]); + bloomFilters = await reader.getBloomFiltersFor(['name', 'quantity', 'stock,warehouse']); }); it('contains name and quantity filter', () => { @@ -145,113 +147,83 @@ describe("bloom filter", async function () { assert.deepEqual(columnsFilterNames, ['name', 'quantity', 'stock,warehouse']); }); - it("writes bloom filters for column: name", async function () { + it('writes bloom filters for column: name', async function () { const splitBlockBloomFilter = bloomFilters.name[0].sbbf; - assert.isTrue( - await splitBlockBloomFilter.check(Buffer.from("apples")), - "apples is included in name filter" - ); - assert.isTrue( - await splitBlockBloomFilter.check(Buffer.from("oranges")), - "oranges is included in name filter" - ); - assert.isTrue( - await splitBlockBloomFilter.check(Buffer.from("kiwi")), - "kiwi is included" - ); - assert.isTrue( - await splitBlockBloomFilter.check(Buffer.from("banana")), - "banana is included in name filter" - ); - assert.isFalse( - await splitBlockBloomFilter.check(Buffer.from("taco")), - "taco is NOT included in name filter" - ); + assert.isTrue(await splitBlockBloomFilter.check(Buffer.from('apples')), 'apples is included in name filter'); + assert.isTrue(await splitBlockBloomFilter.check(Buffer.from('oranges')), 'oranges is included in name filter'); + assert.isTrue(await splitBlockBloomFilter.check(Buffer.from('kiwi')), 'kiwi is included'); + assert.isTrue(await splitBlockBloomFilter.check(Buffer.from('banana')), 'banana is included in name filter'); + assert.isFalse(await splitBlockBloomFilter.check(Buffer.from('taco')), 'taco is NOT included in name filter'); }); - it("writes bloom filters for column: quantity", async function () { + it('writes bloom filters for column: quantity', async function () { const splitBlockBloomFilter = bloomFilters.quantity[0].sbbf; - assert.isTrue( - await splitBlockBloomFilter.check(BigInt(10)), - "10n is included in quantity filter" - ); - assert.isTrue( - await splitBlockBloomFilter.check(BigInt(15)), - "15n is included in quantity filter" - ); - assert.isFalse( - await splitBlockBloomFilter.check(BigInt(100)), - "100n is NOT included in quantity filter" - ); + assert.isTrue(await splitBlockBloomFilter.check(BigInt(10)), '10n is included in quantity filter'); + assert.isTrue(await splitBlockBloomFilter.check(BigInt(15)), '15n is included in quantity filter'); + assert.isFalse(await splitBlockBloomFilter.check(BigInt(100)), '100n is NOT included in quantity filter'); }); it('writes bloom filters for stock,warehouse', async () => { const splitBlockBloomFilter = bloomFilters['stock,warehouse'][0].sbbf; - assert.isTrue( - await splitBlockBloomFilter.check(Buffer.from('x')), - "x should be in the warehouse filter" - ); - assert.isTrue( - await splitBlockBloomFilter.check(Buffer.from('f')), - "f should be in the warehouse filter" - ); + assert.isTrue(await splitBlockBloomFilter.check(Buffer.from('x')), 'x should be in the warehouse filter'); + assert.isTrue(await splitBlockBloomFilter.check(Buffer.from('f')), 'f should be in the warehouse filter'); assert.isFalse( await splitBlockBloomFilter.check(Buffer.from('foo')), - "foo should not be in the warehouse filter" + 'foo should not be in the warehouse filter' ); - }) + }); }); - describe("a simple schema with a nested list", () => { + describe('a simple schema with a nested list', () => { const nestedListSchema = new parquet.ParquetSchema({ - name: {type: "UTF8"}, + name: { type: 'UTF8' }, querystring: { - type: "LIST", + type: 'LIST', fields: { list: { repeated: true, fields: { element: { fields: { - key: {type: "UTF8"}, - value: {type: "UTF8"} - } - } - } - } - } - } + key: { type: 'UTF8' }, + value: { type: 'UTF8' }, + }, + }, + }, + }, + }, + }, }); - it("can be written, read and checked", async () => { - const file = "/tmp/issue-98.parquet"; - const nestedListFilterColumn = "querystring,list,element,key"; + it('can be written, read and checked', async () => { + const file = '/tmp/issue-98.parquet'; + const nestedListFilterColumn = 'querystring,list,element,key'; const writer = await parquet.ParquetWriter.openFile(nestedListSchema, file, { - bloomFilters: [ - {column: "name"}, - {column: nestedListFilterColumn}, - ], + bloomFilters: [{ column: 'name' }, { column: nestedListFilterColumn }], }); await writer.appendRow({ - name: "myquery", + name: 'myquery', querystring: { list: [ - {element: {key: "foo", value: "bar"}}, - {element: {key: "foo2", value: "bar2"}}, - {element: {key: "foo3", value: "bar3"}} - ] - } + { element: { key: 'foo', value: 'bar' } }, + { element: { key: 'foo2', value: 'bar2' } }, + { element: { key: 'foo3', value: 'bar3' } }, + ], + }, }); await writer.close(); const reader = await parquet.ParquetReader.openFile(file); - const bloomFilters: Record> = await reader.getBloomFiltersFor(["name", "querystring,list,element,key"]); + const bloomFilters: Record> = await reader.getBloomFiltersFor([ + 'name', + 'querystring,list,element,key', + ]); assert.isDefined(bloomFilters[nestedListFilterColumn]); const bfForNestedList = bloomFilters[nestedListFilterColumn][0].sbbf; assert.equal(bfForNestedList.getNumFilterBlocks(), 935); - const foo2IsThere = await bfForNestedList.check("foo2"); + const foo2IsThere = await bfForNestedList.check('foo2'); assert(foo2IsThere); - }) - }) + }); + }); }); diff --git a/test/bloomFilterReader.test.ts b/test/bloomFilterReader.test.ts index 159165d0..9a1517d9 100644 --- a/test/bloomFilterReader.test.ts +++ b/test/bloomFilterReader.test.ts @@ -1,11 +1,11 @@ -import chai, {expect} from "chai" -import { Int64 } from "thrift"; +import chai, { expect } from 'chai'; +import { Int64 } from 'thrift'; import { parseBloomFilterOffsets } from '../lib/bloomFilterIO/bloomFilterReader'; -import {ColumnChunkData, ColumnChunkExt, ColumnMetaDataExt} from "../lib/declare"; -import XxHasher from "../lib/bloom/xxhasher"; +import { ColumnChunkData, ColumnChunkExt, ColumnMetaDataExt } from '../lib/declare'; +import XxHasher from '../lib/bloom/xxhasher'; const assert = chai.assert; -const emptyOffset = () => new Int64(Buffer.from(""), 0); +const emptyOffset = () => new Int64(Buffer.from(''), 0); const emptyMetaData = (): ColumnMetaDataExt => { return { @@ -22,25 +22,24 @@ const emptyMetaData = (): ColumnMetaDataExt => { dictionary_page_offset: emptyOffset(), statistics: {}, encoding_stats: [], - bloom_filter_offset: emptyOffset() - } -} + bloom_filter_offset: emptyOffset(), + }; +}; -describe("bloomFilterReader", () => { - describe("offsets", () => { +describe('bloomFilterReader', () => { + describe('offsets', () => { let columnChunkDataCollection: Array; - beforeEach(() => { - const metaData: ColumnMetaDataExt = emptyMetaData() - metaData.path_in_schema = ["name"] - metaData.bloom_filter_offset = new Int64(Buffer.from("000000000874", "hex"), 0) + const metaData: ColumnMetaDataExt = emptyMetaData(); + metaData.path_in_schema = ['name']; + metaData.bloom_filter_offset = new Int64(Buffer.from('000000000874', 'hex'), 0); const columnData: ColumnChunkExt = { meta_data: metaData, file_offset: emptyOffset(), - file_path: '' - } + file_path: '', + }; columnChunkDataCollection = [ { @@ -50,11 +49,11 @@ describe("bloomFilterReader", () => { ]; }); - it("returns bloom filter offsets", () => { + it('returns bloom filter offsets', () => { const result = parseBloomFilterOffsets(columnChunkDataCollection); const expected = [ { - columnName: "name", + columnName: 'name', offsetBytes: 2164, rowGroupIndex: 0, }, @@ -62,12 +61,11 @@ describe("bloomFilterReader", () => { expect(result).to.deep.equal(expected); }); - }) - describe("XXHasher", async () => { - it("outputs hex-encoded strings", async () => { - const hasher = await (new XxHasher()); - assert.equal("ee7276ee58e4421c", await hasher.hash64("15")); - }) - }) + }); + describe('XXHasher', async () => { + it('outputs hex-encoded strings', async () => { + const hasher = await new XxHasher(); + assert.equal('ee7276ee58e4421c', await hasher.hash64('15')); + }); + }); }); - diff --git a/test/bloomFilterWriter.test.ts b/test/bloomFilterWriter.test.ts index 3c517e0f..d8284ef7 100644 --- a/test/bloomFilterWriter.test.ts +++ b/test/bloomFilterWriter.test.ts @@ -1,11 +1,11 @@ -import sinon from "sinon" +import sinon from 'sinon'; -import { createSBBF } from "../lib/bloomFilterIO/bloomFilterWriter" -const SplitBlockBloomFilter = require("../lib/bloom/sbbf").default; +import { createSBBF } from '../lib/bloomFilterIO/bloomFilterWriter'; +const SplitBlockBloomFilter = require('../lib/bloom/sbbf').default; -describe("buildFilterBlocks", () => { - describe("when no options are present", () => { - let sbbfMock:sinon.SinonMock; +describe('buildFilterBlocks', () => { + describe('when no options are present', () => { + let sbbfMock: sinon.SinonMock; beforeEach(() => { sbbfMock = sinon.mock(SplitBlockBloomFilter.prototype); @@ -15,24 +15,24 @@ describe("buildFilterBlocks", () => { sbbfMock.verify(); }); - it("calls .init once", () => { - sbbfMock.expects("init").once(); + it('calls .init once', () => { + sbbfMock.expects('init').once(); createSBBF({}); }); - it("does not set false positive rate", () => { - sbbfMock.expects("setOptionNumFilterBytes").never(); + it('does not set false positive rate', () => { + sbbfMock.expects('setOptionNumFilterBytes').never(); createSBBF({}); }); - it("does not set number of distinct", () => { - sbbfMock.expects("setOptionNumDistinct").never(); + it('does not set number of distinct', () => { + sbbfMock.expects('setOptionNumDistinct').never(); createSBBF({}); }); }); - describe("when numFilterBytes is present", () => { - let sbbfMock:sinon.SinonMock; + describe('when numFilterBytes is present', () => { + let sbbfMock: sinon.SinonMock; beforeEach(() => { sbbfMock = sinon.mock(SplitBlockBloomFilter.prototype); @@ -42,24 +42,24 @@ describe("buildFilterBlocks", () => { sbbfMock.verify(); }); - it("calls setOptionNumberFilterBytes once", () => { - sbbfMock.expects("setOptionNumFilterBytes").once().returnsThis(); + it('calls setOptionNumberFilterBytes once', () => { + sbbfMock.expects('setOptionNumFilterBytes').once().returnsThis(); createSBBF({ numFilterBytes: 1024 }); }); - it("does not set number of distinct", () => { - sbbfMock.expects("setOptionNumDistinct").never(); + it('does not set number of distinct', () => { + sbbfMock.expects('setOptionNumDistinct').never(); createSBBF({}); }); - it("calls .init once", () => { - sbbfMock.expects("init").once(); + it('calls .init once', () => { + sbbfMock.expects('init').once(); createSBBF({}); }); }); - describe("when numFilterBytes is NOT present", () => { - let sbbfMock:sinon.SinonMock; + describe('when numFilterBytes is NOT present', () => { + let sbbfMock: sinon.SinonMock; beforeEach(() => { sbbfMock = sinon.mock(SplitBlockBloomFilter.prototype); }); @@ -68,16 +68,16 @@ describe("buildFilterBlocks", () => { sbbfMock.verify(); }); - describe("and falsePositiveRate is present", () => { - it("calls ssbf.setOptionFalsePositiveRate", () => { - sbbfMock.expects("setOptionFalsePositiveRate").once(); + describe('and falsePositiveRate is present', () => { + it('calls ssbf.setOptionFalsePositiveRate', () => { + sbbfMock.expects('setOptionFalsePositiveRate').once(); createSBBF({ falsePositiveRate: 0.1 }); }); }); - describe("and numDistinct is present", () => { - it("calls ssbf.setOptionNumDistinct", () => { - sbbfMock.expects("setOptionNumDistinct").once(); + describe('and numDistinct is present', () => { + it('calls ssbf.setOptionNumDistinct', () => { + sbbfMock.expects('setOptionNumDistinct').once(); createSBBF({ falsePositiveRate: 0.1, numDistinct: 1, diff --git a/test/browser/index.html b/test/browser/index.html index cb258ea3..cc487705 100644 --- a/test/browser/index.html +++ b/test/browser/index.html @@ -1,22 +1,24 @@ - + - + Mocha Tests - - + + - +

- + - - \ No newline at end of file + + diff --git a/test/browser/main.ts b/test/browser/main.ts index 603250df..f79d9b9d 100644 --- a/test/browser/main.ts +++ b/test/browser/main.ts @@ -1,13 +1,29 @@ -import * as parquetjs from "../../dist/browser/parquet.esm"; -import { assert } from "chai"; +import * as parquetjs from '../../dist/browser/parquet.esm'; +import { assert } from 'chai'; -const buffer = require("buffer"); +const buffer = require('buffer'); -describe("Browser tests", () => { - describe("reader", () => { - it("can read snappy compressed data", async () => { +describe('Browser tests', () => { + describe('reader', () => { + it('can read snappy compressed data', async () => { // Data from test/test-files/snappy-compressed.parquet - const uint8Array = [80, 65, 82, 49, 21, 6, 21, 80, 21, 82, 92, 21, 8, 21, 0, 21, 8, 21, 0, 21, 0, 21, 0, 17, 28, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 22, 0, 22, 8, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 0, 0, 0, 40, 32, 5, 0, 0, 0, 104, 101, 108, 108, 111, 1, 9, 104, 119, 111, 114, 108, 100, 6, 0, 0, 0, 98, 97, 110, 97, 110, 97, 8, 0, 0, 0, 49, 112, 111, 97, 52, 98, 112, 102, 21, 12, 25, 37, 6, 0, 25, 24, 16, 99, 111, 109, 112, 114, 101, 115, 115, 101, 100, 83, 116, 114, 105, 110, 103, 21, 2, 22, 8, 22, 206, 1, 22, 206, 1, 38, 8, 60, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 22, 0, 22, 8, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 0, 0, 41, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 25, 24, 5, 119, 111, 114, 108, 100, 0, 25, 28, 22, 8, 21, 206, 1, 22, 0, 0, 0, 21, 2, 25, 44, 72, 4, 114, 111, 111, 116, 21, 2, 0, 21, 12, 37, 0, 24, 16, 99, 111, 109, 112, 114, 101, 115, 115, 101, 100, 83, 116, 114, 105, 110, 103, 37, 0, 0, 22, 8, 25, 28, 25, 28, 38, 214, 1, 28, 21, 12, 25, 37, 6, 0, 25, 24, 16, 99, 111, 109, 112, 114, 101, 115, 115, 101, 100, 83, 116, 114, 105, 110, 103, 21, 2, 22, 8, 22, 206, 1, 22, 206, 1, 38, 8, 60, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 22, 0, 22, 8, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 0, 0, 22, 154, 3, 21, 22, 22, 242, 2, 21, 40, 0, 22, 234, 2, 22, 8, 0, 25, 12, 24, 15, 64, 100, 115, 110, 112, 47, 112, 97, 114, 113, 117, 101, 116, 106, 115, 0, 163, 0, 0, 0, 80, 65, 82, 49]; + const uint8Array = [ + 80, 65, 82, 49, 21, 6, 21, 80, 21, 82, 92, 21, 8, 21, 0, 21, 8, 21, 0, 21, 0, 21, 0, 17, 28, 24, 5, 119, 111, + 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 22, 0, 22, 8, 24, 5, 119, 111, 114, 108, 100, 24, 8, + 49, 112, 111, 97, 52, 98, 112, 102, 0, 0, 0, 40, 32, 5, 0, 0, 0, 104, 101, 108, 108, 111, 1, 9, 104, 119, 111, + 114, 108, 100, 6, 0, 0, 0, 98, 97, 110, 97, 110, 97, 8, 0, 0, 0, 49, 112, 111, 97, 52, 98, 112, 102, 21, 12, 25, + 37, 6, 0, 25, 24, 16, 99, 111, 109, 112, 114, 101, 115, 115, 101, 100, 83, 116, 114, 105, 110, 103, 21, 2, 22, + 8, 22, 206, 1, 22, 206, 1, 38, 8, 60, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, + 22, 0, 22, 8, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 0, 0, 41, 24, 8, 49, + 112, 111, 97, 52, 98, 112, 102, 25, 24, 5, 119, 111, 114, 108, 100, 0, 25, 28, 22, 8, 21, 206, 1, 22, 0, 0, 0, + 21, 2, 25, 44, 72, 4, 114, 111, 111, 116, 21, 2, 0, 21, 12, 37, 0, 24, 16, 99, 111, 109, 112, 114, 101, 115, + 115, 101, 100, 83, 116, 114, 105, 110, 103, 37, 0, 0, 22, 8, 25, 28, 25, 28, 38, 214, 1, 28, 21, 12, 25, 37, 6, + 0, 25, 24, 16, 99, 111, 109, 112, 114, 101, 115, 115, 101, 100, 83, 116, 114, 105, 110, 103, 21, 2, 22, 8, 22, + 206, 1, 22, 206, 1, 38, 8, 60, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 22, 0, + 22, 8, 24, 5, 119, 111, 114, 108, 100, 24, 8, 49, 112, 111, 97, 52, 98, 112, 102, 0, 0, 22, 154, 3, 21, 22, 22, + 242, 2, 21, 40, 0, 22, 234, 2, 22, 8, 0, 25, 12, 24, 15, 64, 100, 115, 110, 112, 47, 112, 97, 114, 113, 117, + 101, 116, 106, 115, 0, 163, 0, 0, 0, 80, 65, 82, 49, + ]; const snappyCompressedBuffer = buffer.Buffer.from(uint8Array); const reader = await parquetjs.ParquetReader.openBuffer(snappyCompressedBuffer); const data: any[] = []; @@ -18,7 +34,7 @@ describe("Browser tests", () => { after(async () => { await reader.close(); - }) + }); }); }); }); diff --git a/test/codec_plain.test.js b/test/codec_plain.test.js index ffb06535..fa60db00 100644 --- a/test/codec_plain.test.js +++ b/test/codec_plain.test.js @@ -4,20 +4,17 @@ const assert = chai.assert; const parquet_codec_plain = require('../lib/codec/plain'); const assert_util = require('./util/assert_util.js'); -describe('ParquetCodec::PLAIN', function() { - - it('should encode BOOLEAN values', function() { - let buf = parquet_codec_plain.encodeValues( - 'BOOLEAN', - [true, false, true, true, false, true, false, false]); +describe('ParquetCodec::PLAIN', function () { + it('should encode BOOLEAN values', function () { + let buf = parquet_codec_plain.encodeValues('BOOLEAN', [true, false, true, true, false, true, false, false]); assert.deepEqual(buf, Buffer.from([0x2d])); // b101101 }); - it('should decode BOOLEAN values', function() { + it('should decode BOOLEAN values', function () { let buf = { offset: 0, - buffer: Buffer.from([0x2d]) // b101101 + buffer: Buffer.from([0x2d]), // b101101 }; let vals = parquet_codec_plain.decodeValues('BOOLEAN', buf, 8, {}); @@ -25,36 +22,85 @@ describe('ParquetCodec::PLAIN', function() { assert.deepEqual(vals, [true, false, true, true, false, true, false, false]); }); - it('should encode INT32 values', function() { - let buf = parquet_codec_plain.encodeValues( - 'INT32', - [42, 17, 23, -1, -2, -3, 9000, 420]); - - assert.deepEqual(buf, Buffer.from([ - 0x2a, 0x00, 0x00, 0x00, // 42 - 0x11, 0x00, 0x00, 0x00, // 17 - 0x17, 0x00, 0x00, 0x00, // 23 - 0xff, 0xff, 0xff, 0xff, // -1 - 0xfe, 0xff, 0xff, 0xff, // -2 - 0xfd, 0xff, 0xff, 0xff, // -3 - 0x28, 0x23, 0x00, 0x00, // 9000 - 0xa4, 0x01, 0x00, 0x00 // 420 - ])); + it('should encode INT32 values', function () { + let buf = parquet_codec_plain.encodeValues('INT32', [42, 17, 23, -1, -2, -3, 9000, 420]); + + assert.deepEqual( + buf, + Buffer.from([ + 0x2a, + 0x00, + 0x00, + 0x00, // 42 + 0x11, + 0x00, + 0x00, + 0x00, // 17 + 0x17, + 0x00, + 0x00, + 0x00, // 23 + 0xff, + 0xff, + 0xff, + 0xff, // -1 + 0xfe, + 0xff, + 0xff, + 0xff, // -2 + 0xfd, + 0xff, + 0xff, + 0xff, // -3 + 0x28, + 0x23, + 0x00, + 0x00, // 9000 + 0xa4, + 0x01, + 0x00, + 0x00, // 420 + ]) + ); }); - it('should decode INT32 values', function() { + it('should decode INT32 values', function () { let buf = { offset: 0, buffer: Buffer.from([ - 0x2a, 0x00, 0x00, 0x00, // 42 - 0x11, 0x00, 0x00, 0x00, // 17 - 0x17, 0x00, 0x00, 0x00, // 23 - 0xff, 0xff, 0xff, 0xff, // -1 - 0xfe, 0xff, 0xff, 0xff, // -2 - 0xfd, 0xff, 0xff, 0xff, // -3 - 0x28, 0x23, 0x00, 0x00, // 9000 - 0xa4, 0x01, 0x00, 0x00 // 420 - ]) + 0x2a, + 0x00, + 0x00, + 0x00, // 42 + 0x11, + 0x00, + 0x00, + 0x00, // 17 + 0x17, + 0x00, + 0x00, + 0x00, // 23 + 0xff, + 0xff, + 0xff, + 0xff, // -1 + 0xfe, + 0xff, + 0xff, + 0xff, // -2 + 0xfd, + 0xff, + 0xff, + 0xff, // -3 + 0x28, + 0x23, + 0x00, + 0x00, // 9000 + 0xa4, + 0x01, + 0x00, + 0x00, // 420 + ]), }; let vals = parquet_codec_plain.decodeValues('INT32', buf, 8, {}); @@ -62,36 +108,149 @@ describe('ParquetCodec::PLAIN', function() { assert.deepEqual(vals, [42, 17, 23, -1, -2, -3, 9000, 420]); }); - it('should encode INT64 values', function() { - let buf = parquet_codec_plain.encodeValues( - 'INT64', - [42n, 17n, 23n, -1n, -2n, -3n, 9000n, 420n]); - - assert.deepEqual(buf, Buffer.from([ - 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 42 - 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 17 - 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 23 - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // -1 - 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // -2 - 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // -3 - 0x28, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9000 - 0xa4, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // 420 - ])); + it('should encode INT64 values', function () { + let buf = parquet_codec_plain.encodeValues('INT64', [42n, 17n, 23n, -1n, -2n, -3n, 9000n, 420n]); + + assert.deepEqual( + buf, + Buffer.from([ + 0x2a, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 42 + 0x11, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 17 + 0x17, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 23 + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, // -1 + 0xfe, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, // -2 + 0xfd, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, // -3 + 0x28, + 0x23, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 9000 + 0xa4, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 420 + ]) + ); }); - it('should decode INT64 values', function() { + it('should decode INT64 values', function () { let buf = { offset: 0, buffer: Buffer.from([ - 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 42 - 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 17 - 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 23 - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // -1 - 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // -2 - 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // -3 - 0x28, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9000 - 0xa4, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // 420 - ]) + 0x2a, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 42 + 0x11, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 17 + 0x17, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 23 + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, // -1 + 0xfe, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, // -2 + 0xfd, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, // -3 + 0x28, + 0x23, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 9000 + 0xa4, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 420 + ]), }; let vals = parquet_codec_plain.decodeValues('INT64', buf, 8, {}); @@ -99,36 +258,213 @@ describe('ParquetCodec::PLAIN', function() { assert.deepEqual(vals, [42n, 17n, 23n, -1n, -2n, -3n, 9000n, 420n]); }); - it('should encode INT96 values', function() { - let buf = parquet_codec_plain.encodeValues( - 'INT96', - [42, 17, 23, -1, -2, -3, 9000, 420]); - - assert.deepEqual(buf, Buffer.from([ - 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 42 - 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 17 - 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 23 - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // -1 - 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // -2 - 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // -3 - 0x28, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9000 - 0xa4, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // 420 - ])); + it('should encode INT96 values', function () { + let buf = parquet_codec_plain.encodeValues('INT96', [42, 17, 23, -1, -2, -3, 9000, 420]); + + assert.deepEqual( + buf, + Buffer.from([ + 0x2a, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 42 + 0x11, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 17 + 0x17, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 23 + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, // -1 + 0xfe, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, // -2 + 0xfd, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, // -3 + 0x28, + 0x23, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 9000 + 0xa4, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 420 + ]) + ); }); - it('should decode INT96 values', function() { + it('should decode INT96 values', function () { let buf = { offset: 0, buffer: Buffer.from([ - 0x2a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 42 - 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 17 - 0x17, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 23 - 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // -1 - 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // -2 - 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // -3 - 0x28, 0x23, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, // 9000 - 0xa4, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 // 420 - ]) + 0x2a, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 42 + 0x11, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 17 + 0x17, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 23 + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, // -1 + 0xfe, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, // -2 + 0xfd, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, + 0xff, // -3 + 0x28, + 0x23, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 9000 + 0xa4, + 0x01, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, // 420 + ]), }; let vals = parquet_codec_plain.decodeValues('INT96', buf, 8, {}); @@ -136,138 +472,298 @@ describe('ParquetCodec::PLAIN', function() { assert.deepEqual(vals, [42, 17, 23, -1, -2, -3, 9000, 420]); }); - it('should encode FLOAT values', function() { - let buf = parquet_codec_plain.encodeValues( - 'FLOAT', - [42.0, 23.5, 17.0, 4.20, 9000]); - - assert.deepEqual(buf, Buffer.from([ - 0x00, 0x00, 0x28, 0x42, // 42.0 - 0x00, 0x00, 0xbc, 0x41, // 23.5 - 0x00, 0x00, 0x88, 0x41, // 17.0 - 0x66, 0x66, 0x86, 0x40, // 4.20 - 0x00, 0xa0, 0x0c, 0x46 // 9000 - ])); + it('should encode FLOAT values', function () { + let buf = parquet_codec_plain.encodeValues('FLOAT', [42.0, 23.5, 17.0, 4.2, 9000]); + + assert.deepEqual( + buf, + Buffer.from([ + 0x00, + 0x00, + 0x28, + 0x42, // 42.0 + 0x00, + 0x00, + 0xbc, + 0x41, // 23.5 + 0x00, + 0x00, + 0x88, + 0x41, // 17.0 + 0x66, + 0x66, + 0x86, + 0x40, // 4.20 + 0x00, + 0xa0, + 0x0c, + 0x46, // 9000 + ]) + ); }); - it('should decode FLOAT values', function() { + it('should decode FLOAT values', function () { let buf = { offset: 0, buffer: Buffer.from([ - 0x00, 0x00, 0x28, 0x42, // 42.0 - 0x00, 0x00, 0xbc, 0x41, // 23.5 - 0x00, 0x00, 0x88, 0x41, // 17.0 - 0x66, 0x66, 0x86, 0x40, // 4.20 - 0x00, 0xa0, 0x0c, 0x46 // 9000 - ]) + 0x00, + 0x00, + 0x28, + 0x42, // 42.0 + 0x00, + 0x00, + 0xbc, + 0x41, // 23.5 + 0x00, + 0x00, + 0x88, + 0x41, // 17.0 + 0x66, + 0x66, + 0x86, + 0x40, // 4.20 + 0x00, + 0xa0, + 0x0c, + 0x46, // 9000 + ]), }; let vals = parquet_codec_plain.decodeValues('FLOAT', buf, 5, {}); assert.equal(buf.offset, 20); - assert_util.assertArrayEqualEpsilon(vals, [42.0, 23.5, 17.0, 4.20, 9000]); + assert_util.assertArrayEqualEpsilon(vals, [42.0, 23.5, 17.0, 4.2, 9000]); }); - it('should encode DOUBLE values', function() { - let buf = parquet_codec_plain.encodeValues( - 'DOUBLE', - [42.0, 23.5, 17.0, 4.20, 9000]); - - assert.deepEqual(buf, Buffer.from([ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x40, // 42.0 - 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x37, 0x40, // 23.5 - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x31, 0x40, // 17.0 - 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x10, 0x40, // 4.20 - 0x00, 0x00, 0x00, 0x00, 0x00, 0x94, 0xc1, 0x40 // 9000 - ])); + it('should encode DOUBLE values', function () { + let buf = parquet_codec_plain.encodeValues('DOUBLE', [42.0, 23.5, 17.0, 4.2, 9000]); + + assert.deepEqual( + buf, + Buffer.from([ + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x45, + 0x40, // 42.0 + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x80, + 0x37, + 0x40, // 23.5 + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x31, + 0x40, // 17.0 + 0xcd, + 0xcc, + 0xcc, + 0xcc, + 0xcc, + 0xcc, + 0x10, + 0x40, // 4.20 + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x94, + 0xc1, + 0x40, // 9000 + ]) + ); }); - it('should decode DOUBLE values', function() { + it('should decode DOUBLE values', function () { let buf = { offset: 0, buffer: Buffer.from([ - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x45, 0x40, // 42.0 - 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x37, 0x40, // 23.5 - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x31, 0x40, // 17.0 - 0xcd, 0xcc, 0xcc, 0xcc, 0xcc, 0xcc, 0x10, 0x40, // 4.20 - 0x00, 0x00, 0x00, 0x00, 0x00, 0x94, 0xc1, 0x40 // 9000 - ]) + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x45, + 0x40, // 42.0 + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x80, + 0x37, + 0x40, // 23.5 + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x31, + 0x40, // 17.0 + 0xcd, + 0xcc, + 0xcc, + 0xcc, + 0xcc, + 0xcc, + 0x10, + 0x40, // 4.20 + 0x00, + 0x00, + 0x00, + 0x00, + 0x00, + 0x94, + 0xc1, + 0x40, // 9000 + ]), }; let vals = parquet_codec_plain.decodeValues('DOUBLE', buf, 5, {}); assert.equal(buf.offset, 40); - assert_util.assertArrayEqualEpsilon(vals, [42.0, 23.5, 17.0, 4.20, 9000]); + assert_util.assertArrayEqualEpsilon(vals, [42.0, 23.5, 17.0, 4.2, 9000]); }); - it('should encode BYTE_ARRAY values', function() { - let buf = parquet_codec_plain.encodeValues( - 'BYTE_ARRAY', - ['one', Buffer.from([0xde, 0xad, 0xbe, 0xef]), 'three']); - - assert.deepEqual(buf, Buffer.from([ - 0x03, 0x00, 0x00, 0x00, // (3) - 0x6f, 0x6e, 0x65, // 'one' - 0x04, 0x00, 0x00, 0x00, // (4) - 0xde, 0xad, 0xbe, 0xef, // 0xdeadbeef - 0x05, 0x00, 0x00, 0x00, // (5) - 0x74, 0x68, 0x72, 0x65, 0x65 // 'three' - ])); + it('should encode BYTE_ARRAY values', function () { + let buf = parquet_codec_plain.encodeValues('BYTE_ARRAY', ['one', Buffer.from([0xde, 0xad, 0xbe, 0xef]), 'three']); + + assert.deepEqual( + buf, + Buffer.from([ + 0x03, + 0x00, + 0x00, + 0x00, // (3) + 0x6f, + 0x6e, + 0x65, // 'one' + 0x04, + 0x00, + 0x00, + 0x00, // (4) + 0xde, + 0xad, + 0xbe, + 0xef, // 0xdeadbeef + 0x05, + 0x00, + 0x00, + 0x00, // (5) + 0x74, + 0x68, + 0x72, + 0x65, + 0x65, // 'three' + ]) + ); }); - it('should decode BYTE_ARRAY values', function() { + it('should decode BYTE_ARRAY values', function () { let buf = { offset: 0, buffer: Buffer.from([ - 0x03, 0x00, 0x00, 0x00, // (3) - 0x6f, 0x6e, 0x65, // 'one' - 0x04, 0x00, 0x00, 0x00, // (4) - 0xde, 0xad, 0xbe, 0xef, // 0xdeadbeef - 0x05, 0x00, 0x00, 0x00, // (5) - 0x74, 0x68, 0x72, 0x65, 0x65 // 'three' - ]) + 0x03, + 0x00, + 0x00, + 0x00, // (3) + 0x6f, + 0x6e, + 0x65, // 'one' + 0x04, + 0x00, + 0x00, + 0x00, // (4) + 0xde, + 0xad, + 0xbe, + 0xef, // 0xdeadbeef + 0x05, + 0x00, + 0x00, + 0x00, // (5) + 0x74, + 0x68, + 0x72, + 0x65, + 0x65, // 'three' + ]), }; let vals = parquet_codec_plain.decodeValues('BYTE_ARRAY', buf, 3, {}); assert.equal(buf.offset, 24); - assert.deepEqual(vals, [ - Buffer.from('one'), - Buffer.from([0xde, 0xad, 0xbe, 0xef]), - Buffer.from('three') - ]); + assert.deepEqual(vals, [Buffer.from('one'), Buffer.from([0xde, 0xad, 0xbe, 0xef]), Buffer.from('three')]); }); - it('should encode FIXED_LEN_BYTE_ARRAY values', function() { + it('should encode FIXED_LEN_BYTE_ARRAY values', function () { let buf = parquet_codec_plain.encodeValues( - 'FIXED_LEN_BYTE_ARRAY', - ['oneoo', Buffer.from([0xde, 0xad, 0xbe, 0xef, 0x42]), 'three'], { - typeLength: 5 - }); - - assert.deepEqual(buf, Buffer.from([ - 0x6f, 0x6e, 0x65, 0x6f, 0x6f, // 'oneoo' - 0xde, 0xad, 0xbe, 0xef, 0x42, // 0xdeadbeef42 - 0x74, 0x68, 0x72, 0x65, 0x65 // 'three' - ])); + 'FIXED_LEN_BYTE_ARRAY', + ['oneoo', Buffer.from([0xde, 0xad, 0xbe, 0xef, 0x42]), 'three'], + { + typeLength: 5, + } + ); + + assert.deepEqual( + buf, + Buffer.from([ + 0x6f, + 0x6e, + 0x65, + 0x6f, + 0x6f, // 'oneoo' + 0xde, + 0xad, + 0xbe, + 0xef, + 0x42, // 0xdeadbeef42 + 0x74, + 0x68, + 0x72, + 0x65, + 0x65, // 'three' + ]) + ); }); - it('should decode FIXED_LEN_BYTE_ARRAY values', function() { + it('should decode FIXED_LEN_BYTE_ARRAY values', function () { let buf = { offset: 0, buffer: Buffer.from([ - 0x6f, 0x6e, 0x65, 0x6f, 0x6f, // 'oneoo' - 0xde, 0xad, 0xbe, 0xef, 0x42, // 0xdeadbeef42 - 0x74, 0x68, 0x72, 0x65, 0x65 // 'three' - ]) + 0x6f, + 0x6e, + 0x65, + 0x6f, + 0x6f, // 'oneoo' + 0xde, + 0xad, + 0xbe, + 0xef, + 0x42, // 0xdeadbeef42 + 0x74, + 0x68, + 0x72, + 0x65, + 0x65, // 'three' + ]), }; let vals = parquet_codec_plain.decodeValues('FIXED_LEN_BYTE_ARRAY', buf, 3, { - typeLength: 5 + typeLength: 5, }); assert.equal(buf.offset, 15); - assert.deepEqual(vals, [ - Buffer.from('oneoo'), - Buffer.from([0xde, 0xad, 0xbe, 0xef, 0x42]), - Buffer.from('three') - ]); + assert.deepEqual(vals, [Buffer.from('oneoo'), Buffer.from([0xde, 0xad, 0xbe, 0xef, 0x42]), Buffer.from('three')]); }); }); diff --git a/test/codec_rle.js b/test/codec_rle.js index 9b8138ed..7b2528ad 100644 --- a/test/codec_rle.js +++ b/test/codec_rle.js @@ -3,122 +3,118 @@ const chai = require('chai'); const assert = chai.assert; const parquet_codec_rle = require('../lib/codec/rle'); -describe('ParquetCodec::RLE', function() { - - it('should encode bitpacked values', function() { - let buf = parquet_codec_rle.encodeValues( - 'INT32', - [0, 1, 2, 3, 4, 5, 6, 7], - { - disableEnvelope: true, - bitWidth: 3 - }); +describe('ParquetCodec::RLE', function () { + it('should encode bitpacked values', function () { + let buf = parquet_codec_rle.encodeValues('INT32', [0, 1, 2, 3, 4, 5, 6, 7], { + disableEnvelope: true, + bitWidth: 3, + }); assert.deepEqual(buf, Buffer.from([0x03, 0x88, 0xc6, 0xfa])); }); - it('should decode bitpacked values', function() { + it('should decode bitpacked values', function () { let vals = parquet_codec_rle.decodeValues( - 'INT32', - { - buffer: Buffer.from([0x03, 0x88, 0xc6, 0xfa]), - offset: 0, - }, - 8, - { - disableEnvelope: true, - bitWidth: 3 - }); + 'INT32', + { + buffer: Buffer.from([0x03, 0x88, 0xc6, 0xfa]), + offset: 0, + }, + 8, + { + disableEnvelope: true, + bitWidth: 3, + } + ); assert.deepEqual(vals, [0, 1, 2, 3, 4, 5, 6, 7]); }); - describe('number of values not a multiple of 8', function() { - it('should encode bitpacked values', function() { - let buf = parquet_codec_rle.encodeValues( - 'INT32', - [0, 1, 2, 3, 4, 5, 6, 7, 6, 5], - { - disableEnvelope: true, - bitWidth: 3 - }); + describe('number of values not a multiple of 8', function () { + it('should encode bitpacked values', function () { + let buf = parquet_codec_rle.encodeValues('INT32', [0, 1, 2, 3, 4, 5, 6, 7, 6, 5], { + disableEnvelope: true, + bitWidth: 3, + }); assert.deepEqual(buf, Buffer.from([0x05, 0x88, 0xc6, 0xfa, 0x2e, 0x00, 0x00])); }); - it('should decode bitpacked values', function() { + it('should decode bitpacked values', function () { let vals = parquet_codec_rle.decodeValues( - 'INT32', - { - buffer: Buffer.from([0x05, 0x88, 0xc6, 0xfa, 0x2e, 0x00, 0x00]), - offset: 0, - }, - 10, - { - disableEnvelope: true, - bitWidth: 3 - }); + 'INT32', + { + buffer: Buffer.from([0x05, 0x88, 0xc6, 0xfa, 0x2e, 0x00, 0x00]), + offset: 0, + }, + 10, + { + disableEnvelope: true, + bitWidth: 3, + } + ); assert.deepEqual(vals, [0, 1, 2, 3, 4, 5, 6, 7, 6, 5]); }); }); - it('should encode repeated values', function() { + it('should encode repeated values', function () { let buf = parquet_codec_rle.encodeValues( - 'INT32', - [1234567, 1234567, 1234567, 1234567, 1234567, 1234567, 1234567, 1234567], - { - disableEnvelope: true, - bitWidth: 21 - }); - - assert.deepEqual(buf, Buffer.from([0x10, 0x87, 0xD6, 0x12])); + 'INT32', + [1234567, 1234567, 1234567, 1234567, 1234567, 1234567, 1234567, 1234567], + { + disableEnvelope: true, + bitWidth: 21, + } + ); + + assert.deepEqual(buf, Buffer.from([0x10, 0x87, 0xd6, 0x12])); }); - it('should decode repeated values', function() { + it('should decode repeated values', function () { let vals = parquet_codec_rle.decodeValues( - 'INT32', - { - buffer: Buffer.from([0x10, 0x87, 0xD6, 0x12]), - offset: 0, - }, - 8, - { - disableEnvelope: true, - bitWidth: 21 - }); - - assert.deepEqual(vals, [1234567, 1234567, 1234567, 1234567, 1234567, 1234567, 1234567, 1234567]); + 'INT32', + { + buffer: Buffer.from([0x10, 0x87, 0xd6, 0x12]), + offset: 0, + }, + 8, + { + disableEnvelope: true, + bitWidth: 21, + } + ); + + assert.deepEqual(vals, [1234567, 1234567, 1234567, 1234567, 1234567, 1234567, 1234567, 1234567]); }); - it('should encode mixed runs', function() { + it('should encode mixed runs', function () { let buf = parquet_codec_rle.encodeValues( - 'INT32', - [0, 1, 2, 3, 4, 5, 6, 7, 4, 4, 4, 4, 4, 4, 4, 4, 0, 1, 2, 3, 4, 5, 6, 7], - { - disableEnvelope: true, - bitWidth: 3 - }); + 'INT32', + [0, 1, 2, 3, 4, 5, 6, 7, 4, 4, 4, 4, 4, 4, 4, 4, 0, 1, 2, 3, 4, 5, 6, 7], + { + disableEnvelope: true, + bitWidth: 3, + } + ); assert.deepEqual(buf, Buffer.from([0x03, 0x88, 0xc6, 0xfa, 0x10, 0x04, 0x03, 0x88, 0xc6, 0xfa])); }); - it('should decode mixed runs', function() { + it('should decode mixed runs', function () { let vals = parquet_codec_rle.decodeValues( - 'INT32', - { - buffer: Buffer.from([0x03, 0x88, 0xc6, 0xfa, 0x10, 0x04, 0x03, 0x88, 0xc6, 0xfa]), - offset: 0, - }, - 24, - { - disableEnvelope: true, - bitWidth: 3 - }); - - assert.deepEqual( - vals, - [0, 1, 2, 3, 4, 5, 6, 7, 4, 4, 4, 4, 4, 4, 4, 4, 0, 1, 2, 3, 4, 5, 6, 7]); + 'INT32', + { + buffer: Buffer.from([0x03, 0x88, 0xc6, 0xfa, 0x10, 0x04, 0x03, 0x88, 0xc6, 0xfa]), + offset: 0, + }, + 24, + { + disableEnvelope: true, + bitWidth: 3, + } + ); + + assert.deepEqual(vals, [0, 1, 2, 3, 4, 5, 6, 7, 4, 4, 4, 4, 4, 4, 4, 4, 0, 1, 2, 3, 4, 5, 6, 7]); }); - }); diff --git a/test/decodeSchema.js b/test/decodeSchema.js index 67790841..fecb9ec0 100644 --- a/test/decodeSchema.js +++ b/test/decodeSchema.js @@ -3,12 +3,13 @@ const chai = require('chai'); const assert = chai.assert; const parquet = require('../parquet'); -describe('ParquetSchema', function() { - it('should handle complex nesting', function() { +describe('ParquetSchema', function () { + it('should handle complex nesting', function () { var metadata = { version: 1, schema: [ - { type: null, + { + type: null, type_length: null, repetition_type: null, name: 'root', @@ -16,8 +17,9 @@ describe('ParquetSchema', function() { converted_type: null, scale: null, precision: null, - field_id: null - }, { + field_id: null, + }, + { type: null, type_length: null, repetition_type: 0, @@ -26,8 +28,9 @@ describe('ParquetSchema', function() { converted_type: null, scale: null, precision: null, - field_id: null - }, { + field_id: null, + }, + { type: null, type_length: null, repetition_type: 0, @@ -36,8 +39,9 @@ describe('ParquetSchema', function() { converted_type: null, scale: null, precision: null, - field_id: null - }, { + field_id: null, + }, + { type: null, type_length: null, repetition_type: 0, @@ -46,8 +50,9 @@ describe('ParquetSchema', function() { converted_type: null, scale: null, precision: null, - field_id: null - }, { + field_id: null, + }, + { type: 6, type_length: null, repetition_type: 0, @@ -56,8 +61,9 @@ describe('ParquetSchema', function() { converted_type: 0, scale: null, precision: null, - field_id: null - }, { + field_id: null, + }, + { type: null, type_length: null, repetition_type: 0, @@ -66,8 +72,9 @@ describe('ParquetSchema', function() { converted_type: null, scale: null, precision: null, - field_id: null - }, { + field_id: null, + }, + { type: 6, type_length: null, repetition_type: 0, @@ -76,8 +83,9 @@ describe('ParquetSchema', function() { converted_type: 0, scale: null, precision: null, - field_id: null - }, { + field_id: null, + }, + { type: 6, type_length: null, repetition_type: 0, @@ -86,8 +94,9 @@ describe('ParquetSchema', function() { converted_type: 0, scale: null, precision: null, - field_id: null - }, { + field_id: null, + }, + { type: 6, type_length: null, repetition_type: 0, @@ -96,155 +105,123 @@ describe('ParquetSchema', function() { converted_type: 0, scale: null, precision: null, - field_id: null - } - ] + field_id: null, + }, + ], }; const expected = { - "a": { - "name": "a", - "path": [ - "a" - ], - "repetitionType": "REQUIRED", - "statistics": undefined, - "rLevelMax": 0, - "dLevelMax": 0, - "isNested": true, - "fieldCount": 2, - "fields": { - "b": { - "name": "b", - "path": [ - "a", - "b" - ], - "repetitionType": "REQUIRED", - "statistics": undefined, - "rLevelMax": 0, - "dLevelMax": 0, - "isNested": true, - "fieldCount": 2, - "fields": { - "c": { - "name": "c", - "path": [ - "a", - "b", - "c" - ], - "repetitionType": "REQUIRED", - "statistics": undefined, - "rLevelMax": 0, - "dLevelMax": 0, - "isNested": true, - "fieldCount": 1, - "fields": { - "d": { - "name": "d", - "primitiveType": "BYTE_ARRAY", - "originalType": "UTF8", - "path": [ - "a", - "b", - "c", - "d" - ], - "repetitionType": "REQUIRED", - "statistics": undefined, - "typeLength": undefined, - "encoding": "PLAIN", - "compression": "UNCOMPRESSED", - "rLevelMax": 0, - "dLevelMax": 0, - "precision": null, - "scale": null - } - } + a: { + name: 'a', + path: ['a'], + repetitionType: 'REQUIRED', + statistics: undefined, + rLevelMax: 0, + dLevelMax: 0, + isNested: true, + fieldCount: 2, + fields: { + b: { + name: 'b', + path: ['a', 'b'], + repetitionType: 'REQUIRED', + statistics: undefined, + rLevelMax: 0, + dLevelMax: 0, + isNested: true, + fieldCount: 2, + fields: { + c: { + name: 'c', + path: ['a', 'b', 'c'], + repetitionType: 'REQUIRED', + statistics: undefined, + rLevelMax: 0, + dLevelMax: 0, + isNested: true, + fieldCount: 1, + fields: { + d: { + name: 'd', + primitiveType: 'BYTE_ARRAY', + originalType: 'UTF8', + path: ['a', 'b', 'c', 'd'], + repetitionType: 'REQUIRED', + statistics: undefined, + typeLength: undefined, + encoding: 'PLAIN', + compression: 'UNCOMPRESSED', + rLevelMax: 0, + dLevelMax: 0, + precision: null, + scale: null, + }, + }, }, - "e": { - "name": "e", - "path": [ - "a", - "b", - "e" - ], - "repetitionType": "REQUIRED", - "statistics": undefined, - "rLevelMax": 0, - "dLevelMax": 0, - "isNested": true, - "fieldCount": 2, - "fields": { - "f": { - "name": "f", - "primitiveType": "BYTE_ARRAY", - "originalType": "UTF8", - "path": [ - "a", - "b", - "e", - "f" - ], - "repetitionType": "REQUIRED", - "statistics": undefined, - "typeLength": undefined, - "encoding": "PLAIN", - "compression": "UNCOMPRESSED", - "rLevelMax": 0, - "dLevelMax": 0, - "precision": null, - "scale": null + e: { + name: 'e', + path: ['a', 'b', 'e'], + repetitionType: 'REQUIRED', + statistics: undefined, + rLevelMax: 0, + dLevelMax: 0, + isNested: true, + fieldCount: 2, + fields: { + f: { + name: 'f', + primitiveType: 'BYTE_ARRAY', + originalType: 'UTF8', + path: ['a', 'b', 'e', 'f'], + repetitionType: 'REQUIRED', + statistics: undefined, + typeLength: undefined, + encoding: 'PLAIN', + compression: 'UNCOMPRESSED', + rLevelMax: 0, + dLevelMax: 0, + precision: null, + scale: null, + }, + g: { + name: 'g', + primitiveType: 'BYTE_ARRAY', + originalType: 'UTF8', + path: ['a', 'b', 'e', 'g'], + repetitionType: 'REQUIRED', + statistics: undefined, + typeLength: undefined, + encoding: 'PLAIN', + compression: 'UNCOMPRESSED', + rLevelMax: 0, + dLevelMax: 0, + precision: null, + scale: null, }, - "g": { - "name": "g", - "primitiveType": "BYTE_ARRAY", - "originalType": "UTF8", - "path": [ - "a", - "b", - "e", - "g" - ], - "repetitionType": "REQUIRED", - "statistics": undefined, - "typeLength": undefined, - "encoding": "PLAIN", - "compression": "UNCOMPRESSED", - "rLevelMax": 0, - "dLevelMax": 0, - "precision": null, - "scale": null - } - } - } - } + }, + }, + }, + }, + h: { + name: 'h', + primitiveType: 'BYTE_ARRAY', + originalType: 'UTF8', + path: ['a', 'h'], + repetitionType: 'REQUIRED', + statistics: undefined, + typeLength: undefined, + encoding: 'PLAIN', + compression: 'UNCOMPRESSED', + rLevelMax: 0, + dLevelMax: 0, + precision: null, + scale: null, }, - "h": { - "name": "h", - "primitiveType": "BYTE_ARRAY", - "originalType": "UTF8", - "path": [ - "a", - "h" - ], - "repetitionType": "REQUIRED", - "statistics": undefined, - "typeLength": undefined, - "encoding": "PLAIN", - "compression": "UNCOMPRESSED", - "rLevelMax": 0, - "dLevelMax": 0, - "precision": null, - "scale": null - } - } - } + }, + }, }; - const reader = new parquet.ParquetReader(metadata,{}); - assert.deepEqual(reader.schema.fields,expected); + const reader = new parquet.ParquetReader(metadata, {}); + assert.deepEqual(reader.schema.fields, expected); }); - }); diff --git a/test/dictionary.js b/test/dictionary.js index b0d1e0fc..c3cd0c48 100644 --- a/test/dictionary.js +++ b/test/dictionary.js @@ -4,9 +4,11 @@ const assert = chai.assert; const parquet = require('../parquet'); const path = require('path'); -describe('dictionary encoding', async function() { - it('should read uncompressed dictionary from spark', async function() { - let reader = await parquet.ParquetReader.openFile(path.resolve(__dirname,'test-files/spark-uncompressed-dict.parquet')); +describe('dictionary encoding', async function () { + it('should read uncompressed dictionary from spark', async function () { + let reader = await parquet.ParquetReader.openFile( + path.resolve(__dirname, 'test-files/spark-uncompressed-dict.parquet') + ); let cursor = reader.getCursor(); let records = []; @@ -14,6 +16,9 @@ describe('dictionary encoding', async function() { records.push(await cursor.next()); } - assert.deepEqual(records.map(d => d.name),['apples','oranges','kiwi','banana','apples']); + assert.deepEqual( + records.map((d) => d.name), + ['apples', 'oranges', 'kiwi', 'banana', 'apples'] + ); }); }); diff --git a/test/fields.test.ts b/test/fields.test.ts index 2b2e739b..dc80a53a 100644 --- a/test/fields.test.ts +++ b/test/fields.test.ts @@ -1,252 +1,251 @@ -import { assert } from "chai" +import { assert } from 'chai'; import { ParquetSchema } from '../parquet'; import * as fields from '../lib/fields'; -describe("Field Builders: Primitive Types", function () { - it("Can use primitive field types: String", function () { - const schema = new ParquetSchema({ - name: fields.createStringField(), - }); - const c = schema.fields.name; - assert.equal(c.name, 'name'); - assert.equal(c.primitiveType, 'BYTE_ARRAY'); - assert.equal(c.originalType, 'UTF8'); - assert.deepEqual(c.path, ['name']); - assert.equal(c.repetitionType, 'OPTIONAL'); - assert.equal(c.encoding, 'PLAIN'); - assert.equal(c.compression, 'UNCOMPRESSED'); - assert.equal(c.rLevelMax, 0); - assert.equal(c.dLevelMax, 1); - assert.equal(!!c.isNested, false); - assert.equal(c.fieldCount, undefined); +describe('Field Builders: Primitive Types', function () { + it('Can use primitive field types: String', function () { + const schema = new ParquetSchema({ + name: fields.createStringField(), }); - - it("Can use primitive field types: Boolean", function () { - const schema = new ParquetSchema({ - name: fields.createBooleanField(), - }); - const c = schema.fields.name; - assert.equal(c.name, 'name'); - assert.equal(c.primitiveType, 'BOOLEAN'); - assert.equal(c.originalType, undefined); - assert.deepEqual(c.path, ['name']); - assert.equal(c.repetitionType, 'OPTIONAL'); - assert.equal(c.encoding, 'PLAIN'); - assert.equal(c.compression, 'UNCOMPRESSED'); - assert.equal(c.rLevelMax, 0); - assert.equal(c.dLevelMax, 1); - assert.equal(!!c.isNested, false); - assert.equal(c.fieldCount, undefined); + const c = schema.fields.name; + assert.equal(c.name, 'name'); + assert.equal(c.primitiveType, 'BYTE_ARRAY'); + assert.equal(c.originalType, 'UTF8'); + assert.deepEqual(c.path, ['name']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 0); + assert.equal(c.dLevelMax, 1); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + }); + + it('Can use primitive field types: Boolean', function () { + const schema = new ParquetSchema({ + name: fields.createBooleanField(), }); - - it("Can use primitive field types: Int32", function () { - const schema = new ParquetSchema({ - name: fields.createIntField(32), - }); - const c = schema.fields.name; - assert.equal(c.name, 'name'); - assert.equal(c.primitiveType, 'INT32'); - assert.equal(c.originalType, undefined); - assert.deepEqual(c.path, ['name']); - assert.equal(c.repetitionType, 'OPTIONAL'); - assert.equal(c.encoding, 'PLAIN'); - assert.equal(c.compression, 'UNCOMPRESSED'); - assert.equal(c.rLevelMax, 0); - assert.equal(c.dLevelMax, 1); - assert.equal(!!c.isNested, false); - assert.equal(c.fieldCount, undefined); + const c = schema.fields.name; + assert.equal(c.name, 'name'); + assert.equal(c.primitiveType, 'BOOLEAN'); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['name']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 0); + assert.equal(c.dLevelMax, 1); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + }); + + it('Can use primitive field types: Int32', function () { + const schema = new ParquetSchema({ + name: fields.createIntField(32), }); - - it("Can use primitive field types: Int64", function () { - const schema = new ParquetSchema({ - name: fields.createIntField(64), - }); - const c = schema.fields.name; - assert.equal(c.name, 'name'); - assert.equal(c.primitiveType, 'INT64'); - assert.equal(c.originalType, undefined); - assert.deepEqual(c.path, ['name']); - assert.equal(c.repetitionType, 'OPTIONAL'); - assert.equal(c.encoding, 'PLAIN'); - assert.equal(c.compression, 'UNCOMPRESSED'); - assert.equal(c.rLevelMax, 0); - assert.equal(c.dLevelMax, 1); - assert.equal(!!c.isNested, false); - assert.equal(c.fieldCount, undefined); + const c = schema.fields.name; + assert.equal(c.name, 'name'); + assert.equal(c.primitiveType, 'INT32'); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['name']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 0); + assert.equal(c.dLevelMax, 1); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + }); + + it('Can use primitive field types: Int64', function () { + const schema = new ParquetSchema({ + name: fields.createIntField(64), }); - - it("Can use primitive field types: Float", function () { - const schema = new ParquetSchema({ - name: fields.createFloatField(), - }); - const c = schema.fields.name; - assert.equal(c.name, 'name'); - assert.equal(c.primitiveType, 'FLOAT'); - assert.equal(c.originalType, undefined); - assert.deepEqual(c.path, ['name']); - assert.equal(c.repetitionType, 'OPTIONAL'); - assert.equal(c.encoding, 'PLAIN'); - assert.equal(c.compression, 'UNCOMPRESSED'); - assert.equal(c.rLevelMax, 0); - assert.equal(c.dLevelMax, 1); - assert.equal(!!c.isNested, false); - assert.equal(c.fieldCount, undefined); + const c = schema.fields.name; + assert.equal(c.name, 'name'); + assert.equal(c.primitiveType, 'INT64'); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['name']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 0); + assert.equal(c.dLevelMax, 1); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + }); + + it('Can use primitive field types: Float', function () { + const schema = new ParquetSchema({ + name: fields.createFloatField(), }); - - it("Can use primitive field types: Double", function () { - const schema = new ParquetSchema({ - name: fields.createDoubleField(), - }); - const c = schema.fields.name; - assert.equal(c.name, 'name'); - assert.equal(c.primitiveType, 'DOUBLE'); - assert.equal(c.originalType, undefined); - assert.deepEqual(c.path, ['name']); - assert.equal(c.repetitionType, 'OPTIONAL'); - assert.equal(c.encoding, 'PLAIN'); - assert.equal(c.compression, 'UNCOMPRESSED'); - assert.equal(c.rLevelMax, 0); - assert.equal(c.dLevelMax, 1); - assert.equal(!!c.isNested, false); - assert.equal(c.fieldCount, undefined); + const c = schema.fields.name; + assert.equal(c.name, 'name'); + assert.equal(c.primitiveType, 'FLOAT'); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['name']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 0); + assert.equal(c.dLevelMax, 1); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + }); + + it('Can use primitive field types: Double', function () { + const schema = new ParquetSchema({ + name: fields.createDoubleField(), }); - - it("Can use primitive field types: Decimal", function () { - const schema = new ParquetSchema({ - name: fields.createDecimalField(3), - }); - const c = schema.fields.name; - assert.equal(c.name, 'name'); - assert.equal(c.primitiveType, 'FLOAT'); - assert.equal(c.originalType, undefined); - assert.deepEqual(c.path, ['name']); - assert.equal(c.repetitionType, 'OPTIONAL'); - assert.equal(c.encoding, 'PLAIN'); - assert.equal(c.compression, 'UNCOMPRESSED'); - assert.equal(c.rLevelMax, 0); - assert.equal(c.dLevelMax, 1); - assert.equal(c.precision, 3); - assert.equal(!!c.isNested, false); - assert.equal(c.fieldCount, undefined); + const c = schema.fields.name; + assert.equal(c.name, 'name'); + assert.equal(c.primitiveType, 'DOUBLE'); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['name']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 0); + assert.equal(c.dLevelMax, 1); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + }); + + it('Can use primitive field types: Decimal', function () { + const schema = new ParquetSchema({ + name: fields.createDecimalField(3), }); - - it("Can use primitive field types: Timestamp", function () { - const schema = new ParquetSchema({ - name: fields.createTimestampField(), - }); - const c = schema.fields.name; - assert.equal(c.name, 'name'); - assert.equal(c.primitiveType, 'INT64'); - assert.equal(c.originalType, 'TIMESTAMP_MILLIS'); - assert.deepEqual(c.path, ['name']); - assert.equal(c.repetitionType, 'OPTIONAL'); - assert.equal(c.encoding, 'PLAIN'); - assert.equal(c.compression, 'UNCOMPRESSED'); - assert.equal(c.rLevelMax, 0); - assert.equal(c.dLevelMax, 1); - assert.equal(!!c.isNested, false); - assert.equal(c.fieldCount, undefined); + const c = schema.fields.name; + assert.equal(c.name, 'name'); + assert.equal(c.primitiveType, 'FLOAT'); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['name']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 0); + assert.equal(c.dLevelMax, 1); + assert.equal(c.precision, 3); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + }); + + it('Can use primitive field types: Timestamp', function () { + const schema = new ParquetSchema({ + name: fields.createTimestampField(), }); + const c = schema.fields.name; + assert.equal(c.name, 'name'); + assert.equal(c.primitiveType, 'INT64'); + assert.equal(c.originalType, 'TIMESTAMP_MILLIS'); + assert.deepEqual(c.path, ['name']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, 'PLAIN'); + assert.equal(c.compression, 'UNCOMPRESSED'); + assert.equal(c.rLevelMax, 0); + assert.equal(c.dLevelMax, 1); + assert.equal(!!c.isNested, false); + assert.equal(c.fieldCount, undefined); + }); }); -describe("Field Builders: Primitive Type options", function () { - it("Can be required", function () { - const schema = new ParquetSchema({ - name: fields.createStringField(false), - }); - const c = schema.fields.name; - assert.equal(c.repetitionType, 'REQUIRED'); +describe('Field Builders: Primitive Type options', function () { + it('Can be required', function () { + const schema = new ParquetSchema({ + name: fields.createStringField(false), }); + const c = schema.fields.name; + assert.equal(c.repetitionType, 'REQUIRED'); + }); - it("Can be compressed", function () { - const schema = new ParquetSchema({ - name: fields.createStringField(true, { compression: "GZIP" }), - }); - const c = schema.fields.name; - assert.equal(c.compression, 'GZIP'); + it('Can be compressed', function () { + const schema = new ParquetSchema({ + name: fields.createStringField(true, { compression: 'GZIP' }), }); + const c = schema.fields.name; + assert.equal(c.compression, 'GZIP'); + }); }); -describe("Field Builders: Structs and Struct List", function () { - it("Struct Field", function () { - const schema = new ParquetSchema({ - name: fields.createStructField({ - foo: fields.createStringField(), - bar: fields.createStringField(), - }), - }); - const c = schema.fields.name; - assert.equal(c.name, 'name'); - assert.equal(c.primitiveType, undefined); - assert.equal(c.originalType, undefined); - assert.deepEqual(c.path, ['name']); - assert.equal(c.repetitionType, 'OPTIONAL'); - assert.equal(c.encoding, undefined); - assert.equal(c.compression, undefined); - assert.equal(c.rLevelMax, 0); - assert.equal(c.dLevelMax, 1); - assert.equal(!!c.isNested, true); - assert.equal(c.fieldCount, 2); +describe('Field Builders: Structs and Struct List', function () { + it('Struct Field', function () { + const schema = new ParquetSchema({ + name: fields.createStructField({ + foo: fields.createStringField(), + bar: fields.createStringField(), + }), }); - - it("Struct List Field", function () { - const schema = new ParquetSchema({ - name: fields.createStructListField({ - foo: fields.createStringField(), - bar: fields.createStringField(), - }), - }); - const c = schema.fields.name; - assert.equal(c.name, 'name'); - assert.equal(c.primitiveType, undefined); - assert.equal(c.originalType, 'LIST'); - assert.deepEqual(c.path, ['name']); - assert.equal(c.repetitionType, 'OPTIONAL'); - assert.equal(c.encoding, undefined); - assert.equal(c.compression, undefined); - assert.equal(c.rLevelMax, 0); - assert.equal(c.dLevelMax, 1); - assert.equal(!!c.isNested, true); - assert.equal(c.fieldCount, 1); + const c = schema.fields.name; + assert.equal(c.name, 'name'); + assert.equal(c.primitiveType, undefined); + assert.equal(c.originalType, undefined); + assert.deepEqual(c.path, ['name']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, undefined); + assert.equal(c.compression, undefined); + assert.equal(c.rLevelMax, 0); + assert.equal(c.dLevelMax, 1); + assert.equal(!!c.isNested, true); + assert.equal(c.fieldCount, 2); + }); + + it('Struct List Field', function () { + const schema = new ParquetSchema({ + name: fields.createStructListField({ + foo: fields.createStringField(), + bar: fields.createStringField(), + }), }); + const c = schema.fields.name; + assert.equal(c.name, 'name'); + assert.equal(c.primitiveType, undefined); + assert.equal(c.originalType, 'LIST'); + assert.deepEqual(c.path, ['name']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, undefined); + assert.equal(c.compression, undefined); + assert.equal(c.rLevelMax, 0); + assert.equal(c.dLevelMax, 1); + assert.equal(!!c.isNested, true); + assert.equal(c.fieldCount, 1); + }); }); -describe("Field Builders: Lists", function () { - it("List Field", function () { - const schema = new ParquetSchema({ - name: fields.createListField("UTF8"), - }); - const c = schema.fields.name; - - assert.equal(c.name, 'name'); - assert.equal(c.primitiveType, undefined); - assert.equal(c.originalType, 'LIST'); - assert.deepEqual(c.path, ['name']); - assert.equal(c.repetitionType, 'OPTIONAL'); - assert.equal(c.encoding, undefined); - assert.equal(c.compression, undefined); - assert.equal(c.rLevelMax, 0); - assert.equal(c.dLevelMax, 1); - assert.equal(!!c.isNested, true); - assert.equal(c.fieldCount, 1); +describe('Field Builders: Lists', function () { + it('List Field', function () { + const schema = new ParquetSchema({ + name: fields.createListField('UTF8'), }); - - it("list field and elements can be required", function () { - const schema = new ParquetSchema({ - group_name: fields.createListField("UTF8", false, { optional: false }), - }); - const groupNameMeta = schema.fields.group_name; - assert.equal(groupNameMeta.repetitionType, 'REQUIRED'); - assert.equal(groupNameMeta.name, "group_name") - - const groupNameListMeta = schema.fieldList[1] - assert.equal(groupNameListMeta.repetitionType, 'REPEATED'); - assert.equal(groupNameListMeta.name, "list"); - - const groupNameElementsMeta = schema.fieldList[2] - assert.equal(groupNameElementsMeta.name, "element") - assert.equal(groupNameElementsMeta.repetitionType, "REQUIRED"); - assert.equal(groupNameElementsMeta.primitiveType, "BYTE_ARRAY"); + const c = schema.fields.name; + + assert.equal(c.name, 'name'); + assert.equal(c.primitiveType, undefined); + assert.equal(c.originalType, 'LIST'); + assert.deepEqual(c.path, ['name']); + assert.equal(c.repetitionType, 'OPTIONAL'); + assert.equal(c.encoding, undefined); + assert.equal(c.compression, undefined); + assert.equal(c.rLevelMax, 0); + assert.equal(c.dLevelMax, 1); + assert.equal(!!c.isNested, true); + assert.equal(c.fieldCount, 1); + }); + + it('list field and elements can be required', function () { + const schema = new ParquetSchema({ + group_name: fields.createListField('UTF8', false, { optional: false }), }); - + const groupNameMeta = schema.fields.group_name; + assert.equal(groupNameMeta.repetitionType, 'REQUIRED'); + assert.equal(groupNameMeta.name, 'group_name'); + + const groupNameListMeta = schema.fieldList[1]; + assert.equal(groupNameListMeta.repetitionType, 'REPEATED'); + assert.equal(groupNameListMeta.name, 'list'); + + const groupNameElementsMeta = schema.fieldList[2]; + assert.equal(groupNameElementsMeta.name, 'element'); + assert.equal(groupNameElementsMeta.repetitionType, 'REQUIRED'); + assert.equal(groupNameElementsMeta.primitiveType, 'BYTE_ARRAY'); + }); }); diff --git a/test/integration.js b/test/integration.js index 6832d69d..1d0258f5 100644 --- a/test/integration.js +++ b/test/integration.js @@ -7,32 +7,32 @@ const parquet = require('../parquet'); const parquet_thrift = require('../gen-nodejs/parquet_types'); const parquet_util = require('../lib/util'); const objectStream = require('object-stream'); -const stream = require('stream') -const {expect} = require("chai"); +const stream = require('stream'); +const { expect } = require('chai'); const TEST_NUM_ROWS = 10000; -const TEST_VTIME = new Date(); +const TEST_VTIME = new Date(); function mkTestSchema(opts) { return new parquet.ParquetSchema({ - name: { type: 'UTF8', compression: opts.compression }, - quantity: { type: 'INT64', optional: true, compression: opts.compression }, - price: { type: 'DOUBLE', compression: opts.compression }, - date: { type: 'TIMESTAMP_MICROS', compression: opts.compression }, - day: { type: 'DATE', compression: opts.compression }, - finger: { type: 'FIXED_LEN_BYTE_ARRAY', compression: opts.compression, typeLength: 5 }, - inter: { type: 'INTERVAL', compression: opts.compression }, + name: { type: 'UTF8', compression: opts.compression }, + quantity: { type: 'INT64', optional: true, compression: opts.compression }, + price: { type: 'DOUBLE', compression: opts.compression }, + date: { type: 'TIMESTAMP_MICROS', compression: opts.compression }, + day: { type: 'DATE', compression: opts.compression }, + finger: { type: 'FIXED_LEN_BYTE_ARRAY', compression: opts.compression, typeLength: 5 }, + inter: { type: 'INTERVAL', compression: opts.compression }, stock: { repeated: true, fields: { quantity: { type: 'INT64', repeated: true }, warehouse: { type: 'UTF8', compression: opts.compression }, - } + }, }, - colour: { type: 'UTF8', repeated: true, compression: opts.compression }, - meta_json: { type: 'BSON', optional: true, compression: opts.compression }, + colour: { type: 'UTF8', repeated: true, compression: opts.compression }, + meta_json: { type: 'BSON', optional: true, compression: opts.compression }, }); -}; +} function mkTestRows(opts) { let rows = []; @@ -44,13 +44,13 @@ function mkTestRows(opts) { price: 2.6, day: new Date('2017-11-26'), date: new Date(TEST_VTIME + 1000 * i), - finger: "FNORD", + finger: 'FNORD', inter: { months: 42, days: 23, milliseconds: 777 }, stock: [ - { quantity: 10n, warehouse: "A" }, - { quantity: 20n, warehouse: "B" } + { quantity: 10n, warehouse: 'A' }, + { quantity: 20n, warehouse: 'B' }, ], - colour: [ 'green', 'red' ] + colour: ['green', 'red'], }); rows.push({ @@ -59,13 +59,13 @@ function mkTestRows(opts) { price: 2.7, day: new Date('2017-11-26'), date: new Date(TEST_VTIME + 2000 * i), - finger: "FNORD", + finger: 'FNORD', inter: { months: 42, days: 23, milliseconds: 777 }, stock: { quantity: [50n, 33n], - warehouse: "X" + warehouse: 'X', }, - colour: [ 'orange' ] + colour: ['orange'], }); rows.push({ @@ -74,14 +74,14 @@ function mkTestRows(opts) { quantity: undefined, day: new Date('2017-11-26'), date: new Date(TEST_VTIME + 8000 * i), - finger: "FNORD", + finger: 'FNORD', inter: { months: 42, days: 23, milliseconds: 777 }, stock: [ - { quantity: 42n, warehouse: "f" }, - { quantity: 20n, warehouse: "x" } + { quantity: 42n, warehouse: 'f' }, + { quantity: 20n, warehouse: 'x' }, ], - colour: [ 'green', 'brown' ], - meta_json: { expected_ship_date: TEST_VTIME } + colour: ['green', 'brown'], + meta_json: { expected_ship_date: TEST_VTIME }, }); rows.push({ @@ -89,10 +89,10 @@ function mkTestRows(opts) { price: 3.2, day: new Date('2017-11-26'), date: new Date(TEST_VTIME + 6000 * i), - finger: "FNORD", + finger: 'FNORD', inter: { months: 42, days: 23, milliseconds: 777 }, - colour: [ 'yellow' ], - meta_json: { shape: 'curved' } + colour: ['yellow'], + meta_json: { shape: 'curved' }, }); } @@ -103,8 +103,8 @@ async function writeTestFile(opts) { let schema = mkTestSchema(opts); let writer = await parquet.ParquetWriter.openFile(schema, 'fruits.parquet', opts); - writer.setMetadata("myuid", "420"); - writer.setMetadata("fnord", "dronf"); + writer.setMetadata('myuid', '420'); + writer.setMetadata('fnord', 'dronf'); let rows = mkTestRows(opts); @@ -118,15 +118,13 @@ async function writeTestFile(opts) { async function writeTestStream(opts) { let schema = mkTestSchema(opts); - var out = new stream.PassThrough() - let writer = await parquet.ParquetWriter.openStream(schema, out, opts) - out.on('data', function(d){ - }) - out.on('end', function(){ - }) + var out = new stream.PassThrough(); + let writer = await parquet.ParquetWriter.openStream(schema, out, opts); + out.on('data', function (d) {}); + out.on('end', function () {}); - writer.setMetadata("myuid", "420"); - writer.setMetadata("fnord", "dronf"); + writer.setMetadata('myuid', '420'); + writer.setMetadata('fnord', 'dronf'); let rows = mkTestRows(opts); @@ -140,12 +138,15 @@ async function writeTestStream(opts) { async function sampleColumnHeaders() { let reader = await parquet.ParquetReader.openFile('fruits.parquet'); let column = reader.metadata.row_groups[0].columns[0]; - let buffer = await reader.envelopeReader.read(+column.meta_data.data_page_offset, +column.meta_data.total_compressed_size); + let buffer = await reader.envelopeReader.read( + +column.meta_data.data_page_offset, + +column.meta_data.total_compressed_size + ); let cursor = { buffer: buffer, offset: 0, - size: buffer.length + size: buffer.length, }; const pages = []; @@ -157,12 +158,16 @@ async function sampleColumnHeaders() { cursor.offset += pageHeader.compressed_page_size; } - return {column, pages}; + return { column, pages }; } async function verifyBloomFilterOffset() { const headers = await sampleColumnHeaders(); - const { column: { meta_data: { bloom_filter_offset } } } = headers; + const { + column: { + meta_data: { bloom_filter_offset }, + }, + } = headers; assert.equal(parseInt(bloom_filter_offset), 4106725); } @@ -171,13 +176,13 @@ async function verifyPages() { let rowCount = 0; const column = await sampleColumnHeaders(); - column.pages.forEach(d => { + column.pages.forEach((d) => { let header = d.data_page_header || d.data_page_header_v2; - assert.isAbove(header.num_values,0); + assert.isAbove(header.num_values, 0); rowCount += header.num_values; }); - assert.isAbove(column.pages.length,1); + assert.isAbove(column.pages.length, 1); assert.equal(rowCount, column.column.meta_data.num_values); } @@ -190,10 +195,10 @@ async function verifyStatistics() { assert.equal(colStats.null_count, 0); assert.equal(colStats.distinct_count, 4); - column.pages.forEach( (d, i) => { + column.pages.forEach((d, i) => { let header = d.data_page_header || d.data_page_header_v2; let pageStats = header.statistics; - assert.equal(pageStats.null_count,0); + assert.equal(pageStats.null_count, 0); assert.equal(pageStats.distinct_count, 4); assert.equal(pageStats.max_value, 'oranges'); assert.equal(pageStats.min_value, 'apples'); @@ -203,7 +208,7 @@ async function verifyStatistics() { async function readTestFile() { let reader = await parquet.ParquetReader.openFile('fruits.parquet'); assert.equal(reader.getRowCount(), TEST_NUM_ROWS * 4); - assert.deepEqual(reader.getMetadata(), { "myuid": "420", "fnord": "dronf" }) + assert.deepEqual(reader.getMetadata(), { myuid: '420', fnord: 'dronf' }); let schema = reader.getSchema(); assert.equal(schema.fieldList.length, 12); @@ -297,13 +302,13 @@ async function readTestFile() { price: 2.6, day: new Date('2017-11-26'), date: new Date(TEST_VTIME + 1000 * i), - finger: Buffer.from("FNORD"), + finger: Buffer.from('FNORD'), inter: { months: 42, days: 23, milliseconds: 777 }, stock: [ - { quantity: [10n], warehouse: "A" }, - { quantity: [20n], warehouse: "B" } + { quantity: [10n], warehouse: 'A' }, + { quantity: [20n], warehouse: 'B' }, ], - colour: [ 'green', 'red' ], + colour: ['green', 'red'], meta_json: null, }); @@ -313,12 +318,10 @@ async function readTestFile() { price: 2.7, day: new Date('2017-11-26'), date: new Date(TEST_VTIME + 2000 * i), - finger: Buffer.from("FNORD"), + finger: Buffer.from('FNORD'), inter: { months: 42, days: 23, milliseconds: 777 }, - stock: [ - { quantity: [50n, 33n], warehouse: "X" } - ], - colour: [ 'orange' ], + stock: [{ quantity: [50n, 33n], warehouse: 'X' }], + colour: ['orange'], meta_json: null, }); @@ -328,14 +331,14 @@ async function readTestFile() { price: 4.2, day: new Date('2017-11-26'), date: new Date(TEST_VTIME + 8000 * i), - finger: Buffer.from("FNORD"), + finger: Buffer.from('FNORD'), inter: { months: 42, days: 23, milliseconds: 777 }, stock: [ - { quantity: [42n], warehouse: "f" }, - { quantity: [20n], warehouse: "x" } + { quantity: [42n], warehouse: 'f' }, + { quantity: [20n], warehouse: 'x' }, ], - colour: [ 'green', 'brown' ], - meta_json: { expected_ship_date: TEST_VTIME } + colour: ['green', 'brown'], + meta_json: { expected_ship_date: TEST_VTIME }, }); assert.deepEqual(await cursor.next(), { @@ -344,11 +347,11 @@ async function readTestFile() { price: 3.2, day: new Date('2017-11-26'), date: new Date(TEST_VTIME + 6000 * i), - finger: Buffer.from("FNORD"), + finger: Buffer.from('FNORD'), inter: { months: 42, days: 23, milliseconds: 777 }, stock: null, - colour: [ 'yellow' ], - meta_json: { shape: 'curved' } + colour: ['yellow'], + meta_json: { shape: 'curved' }, }); } @@ -381,154 +384,153 @@ async function readTestFile() { reader.close(); } -describe('Parquet', function() { +describe('Parquet', function () { this.timeout(60000); - - describe('with defaults', function() { - it('write a test stream', function() { + describe('with defaults', function () { + it('write a test stream', function () { return writeTestStream({}); }); - }) + }); - describe('with DataPageHeaderV1', function() { - it('write a test file', function() { + describe('with DataPageHeaderV1', function () { + it('write a test file', function () { const opts = { useDataPageV2: false, compression: 'UNCOMPRESSED' }; return writeTestFile(opts); }); - it('write a test file and then read it back', function() { + it('write a test file and then read it back', function () { const opts = { useDataPageV2: false, pageSize: 2000, compression: 'UNCOMPRESSED' }; return writeTestFile(opts).then(readTestFile); }); - it('verify that data is split into pages', function() { + it('verify that data is split into pages', function () { return verifyPages(); }); - it('verify statistics', function() { + it('verify statistics', function () { return verifyStatistics(); }); }); - describe('with BloomFilterHeader', function() { + describe('with BloomFilterHeader', function () { const bloomFilters = [ { column: 'name', numFilterBytes: 1024, - } + }, ]; - it('write a test file', function() { + it('write a test file', function () { const opts = { useDataPageV2: true, compression: 'UNCOMPRESSED', bloomFilters }; return writeTestFile(opts); }); - it('write a test file and then read it back', function() { + it('write a test file and then read it back', function () { const opts = { useDataPageV2: true, pageSize: 2000, compression: 'UNCOMPRESSED', bloomFilters }; return writeTestFile(opts).then(readTestFile); }); - it('verify that bloom filter offset is set', function() { + it('verify that bloom filter offset is set', function () { return verifyBloomFilterOffset(); }); }); - describe('with DataPageHeaderV2', function() { - it('write a test file', function() { + describe('with DataPageHeaderV2', function () { + it('write a test file', function () { const opts = { useDataPageV2: true, compression: 'UNCOMPRESSED' }; return writeTestFile(opts); }); - it('write a test file and then read it back', async function() { + it('write a test file and then read it back', async function () { const opts = { useDataPageV2: true, pageSize: 2000, compression: 'UNCOMPRESSED' }; return writeTestFile(opts).then(readTestFile); }); - it('verify that data is split into pages', function() { + it('verify that data is split into pages', function () { return verifyPages(); }); - it('verify statistics', function() { + it('verify statistics', function () { return verifyStatistics(); }); - it('write a test file with GZIP compression', function() { + it('write a test file with GZIP compression', function () { const opts = { useDataPageV2: true, compression: 'GZIP' }; return writeTestFile(opts); }); - it('write a test file with GZIP compression and then read it back', function() { + it('write a test file with GZIP compression and then read it back', function () { const opts = { useDataPageV2: true, compression: 'GZIP' }; return writeTestFile(opts).then(readTestFile); }); - it('write a test file with SNAPPY compression', function() { + it('write a test file with SNAPPY compression', function () { const opts = { useDataPageV2: true, compression: 'SNAPPY' }; return writeTestFile(opts); }); - it('write a test file with SNAPPY compression and then read it back', function() { + it('write a test file with SNAPPY compression and then read it back', function () { const opts = { useDataPageV2: true, compression: 'SNAPPY' }; return writeTestFile(opts).then(readTestFile); }); - it('write a test file with SNAPPY compression and then read it back V2 false', function() { + it('write a test file with SNAPPY compression and then read it back V2 false', function () { const opts = { useDataPageV2: false, compression: 'SNAPPY' }; return writeTestFile(opts).then(readTestFile); }); - it('write a test file with BROTLI compression', async function() { + it('write a test file with BROTLI compression', async function () { const opts = { useDataPageV2: true, compression: 'BROTLI' }; return await writeTestFile(opts); }); - it('write a test file with BROTLI compression and then read it back', async function() { + it('write a test file with BROTLI compression and then read it back', async function () { const opts = { useDataPageV2: true, compression: 'BROTLI' }; return await writeTestFile(opts).then(readTestFile); }); - it('write a Uint8Array field and then read it back', async function() { + it('write a Uint8Array field and then read it back', async function () { const opts = { useDataPageV2: true, compression: 'UNCOMPRESSED' }; const schema = new parquet.ParquetSchema({ data: { type: 'BYTE_ARRAY', compression: opts.compression }, }); let writer = await parquet.ParquetWriter.openFile(schema, 'fruits.parquet', opts); - writer.setMetadata("myuid", "420"); - writer.setMetadata("fnord", "dronf"); + writer.setMetadata('myuid', '420'); + writer.setMetadata('fnord', 'dronf'); - await writer.appendRow({ data: Uint8Array.from([12345,365]), }); + await writer.appendRow({ data: Uint8Array.from([12345, 365]) }); await writer.close(); let reader = await parquet.ParquetReader.openFile('fruits.parquet'); assert.equal(reader.getRowCount(), 1); - assert.deepEqual(reader.getMetadata(), { "myuid": "420", "fnord": "dronf" }) + assert.deepEqual(reader.getMetadata(), { myuid: '420', fnord: 'dronf' }); let readSchema = reader.getSchema(); assert.equal(readSchema.fieldList.length, 1); assert(schema.fields.data); let cursor = reader.getCursor(); - assert.deepEqual(await cursor.next(), { data: Uint8Array.from([12345,365]) }); + assert.deepEqual(await cursor.next(), { data: Uint8Array.from([12345, 365]) }); assert.equal(await cursor.next(), null); }); const non_exhaustive_unsupported_list = [ - { data: Uint16Array.from([12345,365]), }, - { data: Uint32Array.from([12345,365]), }, - { data: Float32Array.from([12345,365]), }, - { data: Float64Array.from([12345,365]), }, - ] + { data: Uint16Array.from([12345, 365]) }, + { data: Uint32Array.from([12345, 365]) }, + { data: Float32Array.from([12345, 365]) }, + { data: Float64Array.from([12345, 365]) }, + ]; non_exhaustive_unsupported_list.forEach((row) => { - it('unsupported typed array '+ row.data.constructor.name +' should throw error', async function() { + it('unsupported typed array ' + row.data.constructor.name + ' should throw error', async function () { const opts = { useDataPageV2: true, compression: 'UNCOMPRESSED' }; const schema = new parquet.ParquetSchema({ data: { type: 'BYTE_ARRAY', compression: opts.compression }, }); let writer = await parquet.ParquetWriter.openFile(schema, 'fruits.parquet', opts); - let gotError = false + let gotError = false; try { await writer.appendRow(row); await writer.close(); @@ -536,60 +538,55 @@ describe('Parquet', function() { gotError = true; expect(e).to.match(/is not supported/); } - expect(gotError).to.eq(true) + expect(gotError).to.eq(true); }); - }) - + }); }); - describe('using the Stream/Transform API', function() { - - it('write a test file', async function() { + describe('using the Stream/Transform API', function () { + it('write a test file', async function () { const opts = { useDataPageV2: true, compression: 'GZIP' }; let schema = mkTestSchema(opts); let transform = new parquet.ParquetTransformer(schema, opts); - transform.writer.setMetadata("myuid", "420"); - transform.writer.setMetadata("fnord", "dronf"); + transform.writer.setMetadata('myuid', '420'); + transform.writer.setMetadata('fnord', 'dronf'); var ostream = fs.createWriteStream('fruits_stream.parquet'); let istream = objectStream.fromArray(mkTestRows()); istream.pipe(transform).pipe(ostream); }); - it('an error in transform is emitted in stream', async function() { + it('an error in transform is emitted in stream', async function () { const opts = { useDataPageV2: true, compression: 'GZIP' }; let schema = mkTestSchema(opts); let transform = new parquet.ParquetTransformer(schema, opts); - transform.writer.setMetadata("myuid", "420"); - transform.writer.setMetadata("fnord", "dronf"); + transform.writer.setMetadata('myuid', '420'); + transform.writer.setMetadata('fnord', 'dronf'); var ostream = fs.createWriteStream('fruits_stream.parquet'); let testRows = mkTestRows(); testRows[4].quantity = 'N/A'; let istream = objectStream.fromArray(testRows); - return new Promise( (resolve, reject) => { - setTimeout(() => resolve('no_error'),1000); - istream - .pipe(transform) - .on('error', reject) - .pipe(ostream) - .on('finish',resolve); - }) - .then( - () => { throw new Error('Should emit error'); }, - () => undefined + return new Promise((resolve, reject) => { + setTimeout(() => resolve('no_error'), 1000); + istream.pipe(transform).on('error', reject).pipe(ostream).on('finish', resolve); + }).then( + () => { + throw new Error('Should emit error'); + }, + () => undefined ); }); }); - describe('Decimal schema', function() { + describe('Decimal schema', function () { const schema = new parquet.ParquetSchema({ zero_column: { type: 'DECIMAL', precision: 10, scale: 0 }, no_scale_column: { type: 'DECIMAL', precision: 10 }, scale_64_column: { type: 'DECIMAL', precision: 10, scale: 2 }, scale_32_column: { type: 'DECIMAL', precision: 8, scale: 2 }, - fixed_length_column: { type: 'DECIMAL', typeLength: 4, precision: 20, scale: 2}, - non_fixed_length_column: { type: 'DECIMAL', precision: 20, scale: 2}, + fixed_length_column: { type: 'DECIMAL', typeLength: 4, precision: 20, scale: 2 }, + non_fixed_length_column: { type: 'DECIMAL', precision: 20, scale: 2 }, }); const rowData = { @@ -601,8 +598,8 @@ describe('Parquet', function() { non_fixed_length_column: Buffer.from([[0x64]]), // 0x64 = 100 = 1 * 10 ** 2 = value * 10 ** scale }; - it('write a test file with decimals in v1 data page and read it back', async function() { - const file = "decimal-test-v1.parquet"; + it('write a test file with decimals in v1 data page and read it back', async function () { + const file = 'decimal-test-v1.parquet'; const opts = { useDataPageV2: false }; const writer = await parquet.ParquetWriter.openFile(schema, file, opts); @@ -619,12 +616,12 @@ describe('Parquet', function() { scale_64_column: 3.34, // Scale 2 scale_32_column: 3.3, fixed_length_column: Buffer.from([0x0, 0x0, 0x0, 0x64]), - non_fixed_length_column: Buffer.from([0x64]) - }) + non_fixed_length_column: Buffer.from([0x64]), + }); }); - it('write a test file with decimals in v2 data page and read it back', async function() { - const file = "decimal-test-v2.parquet"; + it('write a test file with decimals in v2 data page and read it back', async function () { + const file = 'decimal-test-v2.parquet'; const opts = { useDataPageV2: true }; const writer = await parquet.ParquetWriter.openFile(schema, file, opts); @@ -641,9 +638,8 @@ describe('Parquet', function() { scale_64_column: 3.34, // Scale 2 scale_32_column: 3.3, fixed_length_column: Buffer.from([0x0, 0x0, 0x0, 0x64]), - non_fixed_length_column: Buffer.from([0x64]) - }) + non_fixed_length_column: Buffer.from([0x64]), + }); }); }); }); - diff --git a/test/jsonSchema.test.ts b/test/jsonSchema.test.ts index b0c9de5f..1dc5360c 100644 --- a/test/jsonSchema.test.ts +++ b/test/jsonSchema.test.ts @@ -1,6 +1,6 @@ import fs from 'fs'; import path from 'path'; -import { assert, expect } from "chai"; +import { assert, expect } from 'chai'; import { JSONSchema4 } from 'json-schema'; import addressSchema from './test-files/address.schema.json'; import arraySchema from './test-files/array.schema.json'; @@ -13,37 +13,40 @@ const update = false; // Super Simple snapshot testing const checkSnapshot = (actual: any, snapshot: string, update = false) => { if (update) { - fs.writeFileSync(path.resolve("test", snapshot), JSON.stringify(JSON.parse(JSON.stringify(actual)), null, 2) + "\n"); - expect(`Updated the contents of "${snapshot}"`).to.equal(""); + fs.writeFileSync( + path.resolve('test', snapshot), + JSON.stringify(JSON.parse(JSON.stringify(actual)), null, 2) + '\n' + ); + expect(`Updated the contents of "${snapshot}"`).to.equal(''); } else { const expected = require(snapshot); expect(JSON.parse(JSON.stringify(actual))).to.deep.equal(expected); } -} +}; -describe("Json Schema Conversion", function () { - it("Simple Schema", function () { +describe('Json Schema Conversion', function () { + it('Simple Schema', function () { const js = addressSchema as JSONSchema4; const ps = ParquetSchema.fromJsonSchema(js); checkSnapshot(ps, './test-files/address.schema.result.json', update); }); - it("Arrays", function () { + it('Arrays', function () { const js = arraySchema as JSONSchema4; const ps = ParquetSchema.fromJsonSchema(js); checkSnapshot(ps, './test-files/array.schema.result.json', update); }); - it("Objects", function () { + it('Objects', function () { const js = objectSchema as JSONSchema4; const ps = ParquetSchema.fromJsonSchema(js); checkSnapshot(ps, './test-files/object.schema.result.json', update); }); - it("Nested Objects", function () { + it('Nested Objects', function () { const js = objectNestedSchema as JSONSchema4; const ps = ParquetSchema.fromJsonSchema(js); @@ -51,78 +54,77 @@ describe("Json Schema Conversion", function () { }); }); -describe("Json Schema Conversion Test File", async function () { - +describe('Json Schema Conversion Test File', async function () { const parquetSchema = ParquetSchema.fromJsonSchema({ - "type": "object", - "properties": { - "string_field": { "type": "string" }, - "int_field": { "type": "integer" }, - "number_field": { "type": "number" }, - "array_field": { - "type": "array", - "items": { "type": "string" }, - "additionalItems": false + type: 'object', + properties: { + string_field: { type: 'string' }, + int_field: { type: 'integer' }, + number_field: { type: 'number' }, + array_field: { + type: 'array', + items: { type: 'string' }, + additionalItems: false, }, - "timestamp_array_field": { - "type": "array", - "items": { - "type": "string", - "format": "date-time" + timestamp_array_field: { + type: 'array', + items: { + type: 'string', + format: 'date-time', }, - "additionalItems": false, + additionalItems: false, }, - "timestamp_field": { - "type": "string", - "format": "date-time" + timestamp_field: { + type: 'string', + format: 'date-time', }, - "obj_field": { - "type": "object", - "properties": { - "sub1": { - "type": "string" + obj_field: { + type: 'object', + properties: { + sub1: { + type: 'string', + }, + sub2: { + type: 'string', }, - "sub2": { - "type": "string" - } }, - "additionalProperties": false + additionalProperties: false, }, - "struct_field": { - "type": "array", - "items": { - "type": "object", - "properties": { - "sub3": { "type": "string" }, - "sub4": { "type": "string" }, - "sub5": { - "type": "object", - "properties": { - "sub6": { "type": "string" }, - "sub7": { "type": "string" } + struct_field: { + type: 'array', + items: { + type: 'object', + properties: { + sub3: { type: 'string' }, + sub4: { type: 'string' }, + sub5: { + type: 'object', + properties: { + sub6: { type: 'string' }, + sub7: { type: 'string' }, }, - "additionalProperties": false + additionalProperties: false, + }, + sub8: { + type: 'array', + items: { type: 'string' }, }, - "sub8": { - "type": "array", - "items": { "type": "string" } - } }, - "additionalProperties": false + additionalProperties: false, }, - "additionalItems": false - } + additionalItems: false, + }, }, - "additionalProperties": false + additionalProperties: false, }); const row1 = { string_field: 'string value', int_field: 10n, number_field: 2.5, - timestamp_array_field: { list: [{ element: new Date("2023-01-01 GMT") }] }, + timestamp_array_field: { list: [{ element: new Date('2023-01-01 GMT') }] }, - timestamp_field: new Date("2023-01-01 GMT"), + timestamp_field: new Date('2023-01-01 GMT'), array_field: { list: [{ element: 'array_field val1' }, { element: 'array_field val2' }], diff --git a/test/lib/bufferReader.test.js b/test/lib/bufferReader.test.js index 5682f09f..e0c4a084 100644 --- a/test/lib/bufferReader.test.js +++ b/test/lib/bufferReader.test.js @@ -1,169 +1,186 @@ -import chai, { expect } from "chai" -import sinon from "sinon" -import sinonChai from "sinon-chai"; +import chai, { expect } from 'chai'; +import sinon from 'sinon'; +import sinonChai from 'sinon-chai'; import sinonChaiInOrder from 'sinon-chai-in-order'; -import BufferReader from "../../lib/bufferReader" -import { ParquetEnvelopeReader } from "../../lib/reader"; +import BufferReader from '../../lib/bufferReader'; +import { ParquetEnvelopeReader } from '../../lib/reader'; chai.use(sinonChai); chai.use(sinonChaiInOrder); -describe("bufferReader", () => { +describe('bufferReader', () => { let reader; beforeEach(() => { const mockEnvelopeReader = sinon.fake(); reader = new BufferReader(mockEnvelopeReader, {}); - }) - describe("#read", async () => { - describe("given that reader is scheduled", () => { - it("adds an item to the queue", () => { + }); + describe('#read', async () => { + describe('given that reader is scheduled', () => { + it('adds an item to the queue', () => { const offset = 1; const length = 2; reader.read(offset, length); expect(reader.queue.length).to.eql(1); - }) - }) - }) + }); + }); + }); - describe("#processQueue", () => { - it("only enqueues an item and reads on flushing the queue", async () => { + describe('#processQueue', () => { + it('only enqueues an item and reads on flushing the queue', async () => { const mockResolve = sinon.spy(); const mockResolve2 = sinon.spy(); - reader.envelopeReader = { readFn: sinon.fake.returns(Buffer.from("buffer", "utf8")) } - - reader.queue = [{ - offset: 1, - length: 1, - resolve: mockResolve, - }, { - offset: 2, - length: 4, - resolve: mockResolve2, - }]; + reader.envelopeReader = { readFn: sinon.fake.returns(Buffer.from('buffer', 'utf8')) }; + + reader.queue = [ + { + offset: 1, + length: 1, + resolve: mockResolve, + }, + { + offset: 2, + length: 4, + resolve: mockResolve2, + }, + ]; await reader.processQueue(); + sinon.assert.calledWith(mockResolve, Buffer.from('b', 'utf8')); + sinon.assert.calledWith(mockResolve2, Buffer.from('uffe', 'utf8')); + }); - sinon.assert.calledWith(mockResolve, Buffer.from("b", "utf8")); - sinon.assert.calledWith(mockResolve2, Buffer.from("uffe", "utf8")); - }) - - it("enqueues items and then reads them", async () => { + it('enqueues items and then reads them', async () => { const mockResolve = sinon.spy(); const mockResolve2 = sinon.spy(); reader.maxLength = 1; - reader.envelopeReader = { readFn: sinon.fake.returns(Buffer.from("buffer", "utf8")) } - - reader.queue = [{ - offset: 1, - length: 1, - resolve: mockResolve, - }, { - offset: 2, - length: 4, - resolve: mockResolve2, - }]; + reader.envelopeReader = { readFn: sinon.fake.returns(Buffer.from('buffer', 'utf8')) }; + + reader.queue = [ + { + offset: 1, + length: 1, + resolve: mockResolve, + }, + { + offset: 2, + length: 4, + resolve: mockResolve2, + }, + ]; await reader.processQueue(); - sinon.assert.calledWith(mockResolve, Buffer.from("b", "utf8")); - sinon.assert.calledWith(mockResolve2, Buffer.from("uffe", "utf8")); - }) + sinon.assert.calledWith(mockResolve, Buffer.from('b', 'utf8')); + sinon.assert.calledWith(mockResolve2, Buffer.from('uffe', 'utf8')); + }); - it("enqueues items and reads them in order", async () => { + it('enqueues items and reads them in order', async () => { const mockResolve = sinon.spy(); - reader.envelopeReader = { readFn: sinon.fake.returns(Buffer.from("thisisalargebuffer", "utf8")) } - - reader.queue = [{ - offset: 1, - length: 4, - resolve: mockResolve, - }, { - offset: 5, - length: 2, - resolve: mockResolve, - }, { - offset: 7, - length: 1, - resolve: mockResolve, - }, { - offset: 8, - length: 5, - resolve: mockResolve, - }, { - offset: 13, - length: 6, - resolve: mockResolve, - } + reader.envelopeReader = { readFn: sinon.fake.returns(Buffer.from('thisisalargebuffer', 'utf8')) }; + + reader.queue = [ + { + offset: 1, + length: 4, + resolve: mockResolve, + }, + { + offset: 5, + length: 2, + resolve: mockResolve, + }, + { + offset: 7, + length: 1, + resolve: mockResolve, + }, + { + offset: 8, + length: 5, + resolve: mockResolve, + }, + { + offset: 13, + length: 6, + resolve: mockResolve, + }, ]; await reader.processQueue(); - expect(mockResolve).inOrder.to.have.been.calledWith(Buffer.from("this", "utf8")) - .subsequently.calledWith(Buffer.from("is", "utf8")) - .subsequently.calledWith(Buffer.from("a", "utf8")) - .subsequently.calledWith(Buffer.from("large", "utf8")) - .subsequently.calledWith(Buffer.from("buffer", "utf8")); - }) + expect(mockResolve) + .inOrder.to.have.been.calledWith(Buffer.from('this', 'utf8')) + .subsequently.calledWith(Buffer.from('is', 'utf8')) + .subsequently.calledWith(Buffer.from('a', 'utf8')) + .subsequently.calledWith(Buffer.from('large', 'utf8')) + .subsequently.calledWith(Buffer.from('buffer', 'utf8')); + }); - it("should read even if the maxSpan has been exceeded", async () => { + it('should read even if the maxSpan has been exceeded', async () => { const mockResolve = sinon.spy(); reader.maxSpan = 5; - reader.envelopeReader = { readFn: sinon.fake.returns(Buffer.from("willslicefrombeginning", "utf8")) } - - reader.queue = [{ - offset: 1, - length: 4, - resolve: mockResolve, - }, { - offset: 10, - length: 4, - resolve: mockResolve, - }, { - offset: 10, - length: 9, - resolve: mockResolve, - }, { - offset: 10, - length: 13, - resolve: mockResolve, - }, { - offset: 10, - length: 22, - resolve: mockResolve, - } + reader.envelopeReader = { readFn: sinon.fake.returns(Buffer.from('willslicefrombeginning', 'utf8')) }; + + reader.queue = [ + { + offset: 1, + length: 4, + resolve: mockResolve, + }, + { + offset: 10, + length: 4, + resolve: mockResolve, + }, + { + offset: 10, + length: 9, + resolve: mockResolve, + }, + { + offset: 10, + length: 13, + resolve: mockResolve, + }, + { + offset: 10, + length: 22, + resolve: mockResolve, + }, ]; await reader.processQueue(); - expect(mockResolve).inOrder.to.have.been.calledWith(Buffer.from("will", "utf8")) - .subsequently.calledWith(Buffer.from("will", "utf8")) - .subsequently.calledWith(Buffer.from("willslice", "utf8")) - .subsequently.calledWith(Buffer.from("willslicefrom", "utf8")) - .subsequently.calledWith(Buffer.from("willslicefrombeginning", "utf8")); - }) - }) -}) - -describe("bufferReader Integration Tests", () => { + expect(mockResolve) + .inOrder.to.have.been.calledWith(Buffer.from('will', 'utf8')) + .subsequently.calledWith(Buffer.from('will', 'utf8')) + .subsequently.calledWith(Buffer.from('willslice', 'utf8')) + .subsequently.calledWith(Buffer.from('willslicefrom', 'utf8')) + .subsequently.calledWith(Buffer.from('willslicefrombeginning', 'utf8')); + }); + }); +}); + +describe('bufferReader Integration Tests', () => { let reader; let envelopeReader; - describe("Reading a file", async () => { + describe('Reading a file', async () => { beforeEach(async () => { - envelopeReader = await ParquetEnvelopeReader.openFile("./test/lib/test.txt", {}); + envelopeReader = await ParquetEnvelopeReader.openFile('./test/lib/test.txt', {}); reader = new BufferReader(envelopeReader); - }) + }); - it("should properly read the file", async () => { + it('should properly read the file', async () => { const buffer = await reader.read(0, 5); const buffer2 = await reader.read(6, 5); const buffer3 = await reader.read(12, 5); - expect(buffer).to.eql(Buffer.from("Lorem")); - expect(buffer2).to.eql(Buffer.from("ipsum")); - expect(buffer3).to.eql(Buffer.from("dolor")); - }) - }) -}) + expect(buffer).to.eql(Buffer.from('Lorem')); + expect(buffer2).to.eql(Buffer.from('ipsum')); + expect(buffer3).to.eql(Buffer.from('dolor')); + }); + }); +}); diff --git a/test/list.js b/test/list.js index fb48c62f..e11d62f2 100644 --- a/test/list.js +++ b/test/list.js @@ -3,7 +3,6 @@ const chai = require('chai'); const assert = chai.assert; const parquet = require('../parquet'); - /* This test creates a test file that has an annotated LIST wrapper that works with AWS Athena Currently the schema (and the input data) needs to follow the specification for an annotated list @@ -48,23 +47,29 @@ describe('struct list', async function () { element: { fields: { a: { type: 'UTF8' }, - b: { type: 'INT64' } - } - } - } - } - } - } + b: { type: 'INT64' }, + }, + }, + }, + }, + }, + }, }); const row1 = { id: 'Row1', - test: { list: [{ element: { a: 'test1', b: 1n } }, { element: { a: 'test2', b: 2n } }, { element: { a: 'test3', b: 3n } }] } + test: { + list: [ + { element: { a: 'test1', b: 1n } }, + { element: { a: 'test2', b: 2n } }, + { element: { a: 'test3', b: 3n } }, + ], + }, }; const row2 = { id: 'Row2', - test: { list: [{ element: { a: 'test4', b: 4n } }] } + test: { list: [{ element: { a: 'test4', b: 4n } }] }, }; before(async function () { @@ -105,22 +110,22 @@ describe('array list', async function () { repeated: true, fields: { element: { - type: 'UTF8' - } - } - } - } - } + type: 'UTF8', + }, + }, + }, + }, + }, }); const row1 = { id: 'Row1', - test: { list: [{ element: "abcdef" }, { element: "fedcba" }] } + test: { list: [{ element: 'abcdef' }, { element: 'fedcba' }] }, }; const row2 = { id: 'Row2', - test: { list: [{ element: "ghijkl" }, { element: "lkjihg" }] } + test: { list: [{ element: 'ghijkl' }, { element: 'lkjihg' }] }, }; before(async function () { @@ -148,4 +153,3 @@ describe('array list', async function () { assert.deepEqual(row, row2); }); }); - diff --git a/test/metadata-cache.js b/test/metadata-cache.js index 01b1b9cd..d5aed503 100644 --- a/test/metadata-cache.js +++ b/test/metadata-cache.js @@ -4,43 +4,46 @@ const assert = chai.assert; const path = require('path'); const parquet = require('../parquet'); -describe('metadata-cache', function() { +describe('metadata-cache', function () { let metadata; - before(async function() { - const reader = await parquet.ParquetReader.openFile(path.join(__dirname,'test-files','fruits.parquet')); + before(async function () { + const reader = await parquet.ParquetReader.openFile(path.join(__dirname, 'test-files', 'fruits.parquet')); for (let i = 0; i < reader.metadata.row_groups.length; i++) { const rowGroup = reader.metadata.row_groups[i]; for (let j = 0; j < rowGroup.columns.length; j++) { const column = rowGroup.columns[j]; try { - await reader.envelopeReader.readOffsetIndex(column.meta_data.path_in_schema.join(','), rowGroup, {cache: true}); - await reader.envelopeReader.readColumnIndex(column.meta_data.path_in_schema.join(','), rowGroup, {cache: true}); + await reader.envelopeReader.readOffsetIndex(column.meta_data.path_in_schema.join(','), rowGroup, { + cache: true, + }); + await reader.envelopeReader.readColumnIndex(column.meta_data.path_in_schema.join(','), rowGroup, { + cache: true, + }); column.offset_index_offset = undefined; column.offset_index_length = undefined; column.column_index_offset = undefined; column.column_index_length = undefined; - } catch(e) {} + } catch (e) {} } } const metaDataTxt = await reader.exportMetadata(); metadata = JSON.parse(metaDataTxt); }); - it('should work', async function() { - const reader = await parquet.ParquetReader.openFile(path.join(__dirname,'test-files','fruits.parquet'),{ - metadata: metadata + it('should work', async function () { + const reader = await parquet.ParquetReader.openFile(path.join(__dirname, 'test-files', 'fruits.parquet'), { + metadata: metadata, }); const column = reader.metadata.row_groups[0].columns[2]; // verify that the json metadata is loaded - assert.equal(reader.metadata.json,true); + assert.equal(reader.metadata.json, true); const data = await reader.envelopeReader.readPage(column, 1, []); - assert.equal(data.length,2000); - assert.deepEqual(data[0],{price: 2.6}); - assert.deepEqual(data[1],{price: 2.7}); - assert.deepEqual(data[2],{price: 4.2}); + assert.equal(data.length, 2000); + assert.deepEqual(data[0], { price: 2.6 }); + assert.deepEqual(data[1], { price: 2.7 }); + assert.deepEqual(data[2], { price: 4.2 }); }); - }); diff --git a/test/mocks/handlers.js b/test/mocks/handlers.js index 6729f347..aebaf5f2 100644 --- a/test/mocks/handlers.js +++ b/test/mocks/handlers.js @@ -1,58 +1,44 @@ -const { http } = require("msw"); -const fs = require("fs"); -const fsPromises = require("fs/promises"); -const util = require("util"); -const path = require("path"); +const { http } = require('msw'); +const fs = require('fs'); +const fsPromises = require('fs/promises'); +const util = require('util'); +const path = require('path'); const readPromsify = util.promisify(fs.read); -const rangeHandle = http.get( - "http://fruits-bloomfilter.parquet", - async ({ request }) => { - const fd = fs.openSync( - path.resolve(__dirname, "../../fruits-bloomfilter.parquet"), - "r" - ); - - const { size: fileSize } = await fsPromises.stat( - path.resolve(__dirname, "../../fruits-bloomfilter.parquet") - ); - - const rangeHeader = request.headers.get("range"); - if (!rangeHeader) { - return new Response("", { - headers: { - "Content-Length": fileSize - } - }); - } - - const [start, end] = rangeHeader - .replace(/bytes=/, "") - .split("-") - .map(Number); - const chunk = end - start + 1; - - const { bytesRead, buffer } = await readPromsify( - fd, - Buffer.alloc(chunk), - 0, - chunk, - start - ); - - const headers = { - 'Accept-Ranges': "bytes", - 'Content-Ranges': `bytes ${start}-${end}/${bytesRead}`, - 'Content-Type': "application/octet-stream", - 'Content-Length': fileSize, - }; - - return new Response(buffer, { - status: 206, - headers, - }) +const rangeHandle = http.get('http://fruits-bloomfilter.parquet', async ({ request }) => { + const fd = fs.openSync(path.resolve(__dirname, '../../fruits-bloomfilter.parquet'), 'r'); + + const { size: fileSize } = await fsPromises.stat(path.resolve(__dirname, '../../fruits-bloomfilter.parquet')); + + const rangeHeader = request.headers.get('range'); + if (!rangeHeader) { + return new Response('', { + headers: { + 'Content-Length': fileSize, + }, + }); } -); + + const [start, end] = rangeHeader + .replace(/bytes=/, '') + .split('-') + .map(Number); + const chunk = end - start + 1; + + const { bytesRead, buffer } = await readPromsify(fd, Buffer.alloc(chunk), 0, chunk, start); + + const headers = { + 'Accept-Ranges': 'bytes', + 'Content-Ranges': `bytes ${start}-${end}/${bytesRead}`, + 'Content-Type': 'application/octet-stream', + 'Content-Length': fileSize, + }; + + return new Response(buffer, { + status: 206, + headers, + }); +}); const handlers = [rangeHandle]; diff --git a/test/mocks/server.js b/test/mocks/server.js index ae85009e..e5f9a958 100644 --- a/test/mocks/server.js +++ b/test/mocks/server.js @@ -1,4 +1,4 @@ -const { setupServer } = require("msw/node"); -const handlers = require("./handlers"); +const { setupServer } = require('msw/node'); +const handlers = require('./handlers'); module.exports = setupServer(...handlers); diff --git a/test/reader.js b/test/reader.js index 28e6f75e..fdb69047 100644 --- a/test/reader.js +++ b/test/reader.js @@ -1,18 +1,18 @@ -"use strict"; -const chai = require("chai"); -const path = require("path"); +'use strict'; +const chai = require('chai'); +const path = require('path'); const assert = chai.assert; -const parquet = require("../parquet"); -const server = require("./mocks/server"); -const {mockClient} = require("aws-sdk-client-mock"); -const {S3Client, HeadObjectCommand, GetObjectCommand} = require("@aws-sdk/client-s3"); -const {Readable} = require("stream"); -const {sdkStreamMixin} = require("@smithy/util-stream"); -const {createReadStream} = require("fs"); -const {ParquetReader} = require("../parquet"); - -describe("ParquetReader", () => { - describe("#openUrl", () => { +const parquet = require('../parquet'); +const server = require('./mocks/server'); +const { mockClient } = require('aws-sdk-client-mock'); +const { S3Client, HeadObjectCommand, GetObjectCommand } = require('@aws-sdk/client-s3'); +const { Readable } = require('stream'); +const { sdkStreamMixin } = require('@smithy/util-stream'); +const { createReadStream } = require('fs'); +const { ParquetReader } = require('../parquet'); + +describe('ParquetReader', () => { + describe('#openUrl', () => { before(() => { server.listen(); }); @@ -25,105 +25,89 @@ describe("ParquetReader", () => { server.close(); }); - it("reads parquet files via http", async () => { - const reader = await parquet.ParquetReader.openUrl( - "http://fruits-bloomfilter.parquet" - ); + it('reads parquet files via http', async () => { + const reader = await parquet.ParquetReader.openUrl('http://fruits-bloomfilter.parquet'); const cursor = await reader.getCursor(); - assert.deepOwnInclude( - await cursor.next(), - { - name: "apples", - quantity: 10n, - price: 2.6, - day: new Date("2017-11-26"), - finger: Buffer.from("FNORD"), - inter: {months: 10, days: 5, milliseconds: 777}, - colour: ["green", "red"], - } - ); - - assert.deepOwnInclude( - await cursor.next(), - { - name: "oranges", - quantity: 20n, - price: 2.7, - day: new Date("2018-03-03"), - finger: Buffer.from("ABCDE"), - inter: {months: 42, days: 23, milliseconds: 777}, - colour: ["orange"], - } - ); + assert.deepOwnInclude(await cursor.next(), { + name: 'apples', + quantity: 10n, + price: 2.6, + day: new Date('2017-11-26'), + finger: Buffer.from('FNORD'), + inter: { months: 10, days: 5, milliseconds: 777 }, + colour: ['green', 'red'], + }); - assert.deepOwnInclude( - await cursor.next(), - { - name: "kiwi", - quantity: 15n, - price: 4.2, - day: new Date("2008-11-26"), - finger: Buffer.from("XCVBN"), - inter: {months: 60, days: 1, milliseconds: 99}, - colour: ["green", "brown", "yellow"], - stock: [ - { - quantity: [42n], - warehouse: "f", - }, - { - quantity: [21n], - warehouse: "x", - }, - ] - } - ); + assert.deepOwnInclude(await cursor.next(), { + name: 'oranges', + quantity: 20n, + price: 2.7, + day: new Date('2018-03-03'), + finger: Buffer.from('ABCDE'), + inter: { months: 42, days: 23, milliseconds: 777 }, + colour: ['orange'], + }); - assert.deepOwnInclude( - await cursor.next(), - { - name: "banana", - price: 3.2, - day: new Date("2017-11-26"), - finger: Buffer.from("FNORD"), - inter: {months: 1, days: 15, milliseconds: 888}, - colour: ["yellow"], - meta_json: { - shape: "curved", + assert.deepOwnInclude(await cursor.next(), { + name: 'kiwi', + quantity: 15n, + price: 4.2, + day: new Date('2008-11-26'), + finger: Buffer.from('XCVBN'), + inter: { months: 60, days: 1, milliseconds: 99 }, + colour: ['green', 'brown', 'yellow'], + stock: [ + { + quantity: [42n], + warehouse: 'f', }, - } - ); + { + quantity: [21n], + warehouse: 'x', + }, + ], + }); + + assert.deepOwnInclude(await cursor.next(), { + name: 'banana', + price: 3.2, + day: new Date('2017-11-26'), + finger: Buffer.from('FNORD'), + inter: { months: 1, days: 15, milliseconds: 888 }, + colour: ['yellow'], + meta_json: { + shape: 'curved', + }, + }); assert.deepEqual(null, await cursor.next()); }); }); - describe("#asyncIterator", () => { - it("responds to for await", async () => { - const reader = await parquet.ParquetReader.openFile( - path.join(__dirname, 'test-files', 'fruits.parquet') - ); + describe('#asyncIterator', () => { + it('responds to for await', async () => { + const reader = await parquet.ParquetReader.openFile(path.join(__dirname, 'test-files', 'fruits.parquet')); let counter = 0; - for await(const record of reader) { + for await (const record of reader) { counter++; } assert.equal(counter, 40000); - }) + }); }); - describe("#handleDecimal", () => { - it("loads parquet with columns configured as DECIMAL", async () => { + describe('#handleDecimal', () => { + it('loads parquet with columns configured as DECIMAL', async () => { const reader = await parquet.ParquetReader.openFile( path.join(__dirname, 'test-files', 'valid-decimal-columns.parquet') ); - const data = [] - for await(const record of reader) { - data.push(record) + const data = []; + for await (const record of reader) { + data.push(record); } assert.equal(data.length, 4); @@ -132,7 +116,7 @@ describe("ParquetReader", () => { // handling null values assert.equal(data[2].over_9_digits, undefined); assert.equal(data[2].under_9_digits, undefined); - }) + }); }); describe('ParquetReader with S3', () => { describe('V3', () => { @@ -144,13 +128,13 @@ describe("ParquetReader", () => { const headStream = new Readable(); headStream.push('PAR1'); headStream.push(null); - const headSdkStream = sdkStreamMixin(headStream) + const headSdkStream = sdkStreamMixin(headStream); - const footStream = createReadStream(srcFile, {start: 2842, end: 2849}) - const footSdkStream= sdkStreamMixin(footStream); + const footStream = createReadStream(srcFile, { start: 2842, end: 2849 }); + const footSdkStream = sdkStreamMixin(footStream); - const metadataStream = createReadStream(srcFile, {start: 2608, end: 2841}); - const metaDataSdkStream = sdkStreamMixin(metadataStream) + const metadataStream = createReadStream(srcFile, { start: 2608, end: 2841 }); + const metaDataSdkStream = sdkStreamMixin(metadataStream); const stream = createReadStream(srcFile); @@ -158,26 +142,26 @@ describe("ParquetReader", () => { const sdkStream = sdkStreamMixin(stream); // mock all the way down to where metadata is being read - s3Mock.on(HeadObjectCommand) - .resolves({ContentLength: 2849}); + s3Mock.on(HeadObjectCommand).resolves({ ContentLength: 2849 }); - s3Mock.on(GetObjectCommand,) - .resolves({Body: sdkStream}); + s3Mock.on(GetObjectCommand).resolves({ Body: sdkStream }); - s3Mock.on(GetObjectCommand, {Range: 'bytes=0-3', Key: 'foo', Bucket: 'bar'}) - .resolves({Body: headSdkStream}); + s3Mock + .on(GetObjectCommand, { Range: 'bytes=0-3', Key: 'foo', Bucket: 'bar' }) + .resolves({ Body: headSdkStream }); - s3Mock.on(GetObjectCommand, {Range: 'bytes=2841-2848', Key: 'foo', Bucket: 'bar'}) - .resolves({Body: footSdkStream}); + s3Mock + .on(GetObjectCommand, { Range: 'bytes=2841-2848', Key: 'foo', Bucket: 'bar' }) + .resolves({ Body: footSdkStream }); - s3Mock.on(GetObjectCommand, {Range: 'bytes=2607-2840', Key: 'foo', Bucket: 'bar'}) - .resolves({Body: metaDataSdkStream}); + s3Mock + .on(GetObjectCommand, { Range: 'bytes=2607-2840', Key: 'foo', Bucket: 'bar' }) + .resolves({ Body: metaDataSdkStream }); const s3 = new S3Client({}); - let res = await ParquetReader.openS3(s3, {Key: 'foo', Bucket: 'bar'}); + let res = await ParquetReader.openS3(s3, { Key: 'foo', Bucket: 'bar' }); assert(res.envelopeReader); }); - }) + }); }); - }); diff --git a/test/readme-examples.test.ts b/test/readme-examples.test.ts index ee87c48b..20b60c5e 100644 --- a/test/readme-examples.test.ts +++ b/test/readme-examples.test.ts @@ -1,21 +1,21 @@ -import { expect } from "chai"; +import { expect } from 'chai'; import { ParquetSchema } from '../parquet'; -describe("Readme Encoding Examples", function () { - it("PLAIN should work", function () { +describe('Readme Encoding Examples', function () { + it('PLAIN should work', function () { const ps = new ParquetSchema({ name: { type: 'UTF8', encoding: 'PLAIN' }, }); - expect(ps).to.be.a("object"); - expect(ps.schema.name.encoding).to.eq("PLAIN"); + expect(ps).to.be.a('object'); + expect(ps.schema.name.encoding).to.eq('PLAIN'); }); - it("RLE should work", function () { + it('RLE should work', function () { const ps = new ParquetSchema({ age: { type: 'UINT_32', encoding: 'RLE', typeLength: 7 }, }); - expect(ps).to.be.a("object"); + expect(ps).to.be.a('object'); expect(ps.schema.age.typeLength).to.eq(7); }); }); diff --git a/test/reference-test/README.md b/test/reference-test/README.md index 39a79953..c21c791e 100644 --- a/test/reference-test/README.md +++ b/test/reference-test/README.md @@ -9,4 +9,3 @@ This assumes that parquetjs is in the same folder as the clone of parquet-testin 1. `git clone git@github.com:apache/parquet-testing.git` 1. `cd ../parquetjs` 1. `cp ../parquet-testing/data/*.parquet ./test/reference-test/files/` - diff --git a/test/reference-test/read-all.test.ts b/test/reference-test/read-all.test.ts index 492e4055..045ad7ba 100644 --- a/test/reference-test/read-all.test.ts +++ b/test/reference-test/read-all.test.ts @@ -1,6 +1,6 @@ -import { expect } from "chai"; -import path from "node:path"; -import fs from "node:fs"; +import { expect } from 'chai'; +import path from 'node:path'; +import fs from 'node:fs'; import parquet from '../../parquet'; @@ -27,10 +27,10 @@ const unsupported = [ 'large_string_map.brotli.parquet', // BUG? ]; -describe("Read Test for all files", function () { - - const listOfFiles = fs.readdirSync(path.join(__dirname, 'files')) - .filter(x => x.endsWith(".parquet") && !unsupported.includes(x)); +describe('Read Test for all files', function () { + const listOfFiles = fs + .readdirSync(path.join(__dirname, 'files')) + .filter((x) => x.endsWith('.parquet') && !unsupported.includes(x)); for (const filename of listOfFiles) { if (onlyTest && onlyTest !== filename) continue; @@ -39,10 +39,10 @@ describe("Read Test for all files", function () { const schema = reader.getSchema(); expect(schema.fieldList).to.have.length.greaterThan(0); const cursor = reader.getCursor(); - const record = await cursor.next() as any; + const record = (await cursor.next()) as any; // Expect the same keys as top-level fields - const expectedRecordKeys = schema.fieldList.filter(x => x.path.length === 1).map(x => x.name); + const expectedRecordKeys = schema.fieldList.filter((x) => x.path.length === 1).map((x) => x.name); expect(Object.keys(record)).to.deep.equal(expectedRecordKeys); - }) + }); } }); diff --git a/test/sbbf.ts b/test/sbbf.ts index 12c8cb1f..a8350e2d 100644 --- a/test/sbbf.ts +++ b/test/sbbf.ts @@ -1,297 +1,282 @@ import Long from 'long'; -import {expect} from "chai" -import * as sinon from "sinon" -import {Done} from "mocha" +import { expect } from 'chai'; +import * as sinon from 'sinon'; +import { Done } from 'mocha'; -import SplitBlockBloomFilter from "../lib/bloom/sbbf"; +import SplitBlockBloomFilter from '../lib/bloom/sbbf'; const times = (n: number, fn: Function) => { - return Array(n).map(() => fn()); -} + return Array(n).map(() => fn()); +}; const random = (min: number, max: number) => { - min = Math.ceil(min); - max = Math.floor(max); - return Math.floor(Math.random() * (max - min + 1) + min); -} + min = Math.ceil(min); + max = Math.floor(max); + return Math.floor(Math.random() * (max - min + 1) + min); +}; -describe("Split Block Bloom Filters", () => { - const expectedDefaultBytes = 29920 +describe('Split Block Bloom Filters', () => { + const expectedDefaultBytes = 29920; - it("Mask works", function () { - const testMaskX = Long.fromString("deadbeef", true, 16); - const testMaskRes = SplitBlockBloomFilter.mask(testMaskX) + it('Mask works', function () { + const testMaskX = Long.fromString('deadbeef', true, 16); + const testMaskRes = SplitBlockBloomFilter.mask(testMaskX); - // all mask values should have exactly one bit set - const expectedVals = [ - 1 << 29, - 1 << 15, - 1 << 12, - 1 << 14, - 1 << 13, - 1 << 25, - 1 << 24, - 1 << 21 - ] - for (let i = 0; i < expectedVals.length; i++) { - expect(testMaskRes[i]).to.eq(expectedVals[i]) - } - }) - it("block insert + check works", function () { - let blk = SplitBlockBloomFilter.initBlock() - let isInsertedX: Long = Long.fromString("6f6f6f6f6", true, 16) - let isInsertedY: Long = Long.fromString("deadbeef", true, 16) - let notInsertedZ: Long = Long.fromNumber(3) + // all mask values should have exactly one bit set + const expectedVals = [1 << 29, 1 << 15, 1 << 12, 1 << 14, 1 << 13, 1 << 25, 1 << 24, 1 << 21]; + for (let i = 0; i < expectedVals.length; i++) { + expect(testMaskRes[i]).to.eq(expectedVals[i]); + } + }); + it('block insert + check works', function () { + let blk = SplitBlockBloomFilter.initBlock(); + let isInsertedX: Long = Long.fromString('6f6f6f6f6', true, 16); + let isInsertedY: Long = Long.fromString('deadbeef', true, 16); + let notInsertedZ: Long = Long.fromNumber(3); - SplitBlockBloomFilter.blockInsert(blk, isInsertedX) + SplitBlockBloomFilter.blockInsert(blk, isInsertedX); - expect(SplitBlockBloomFilter.blockCheck(blk, isInsertedX)).to.eq(true) - expect(SplitBlockBloomFilter.blockCheck(blk, isInsertedY)).to.eq(false) - expect(SplitBlockBloomFilter.blockCheck(blk, notInsertedZ)).to.eq(false) + expect(SplitBlockBloomFilter.blockCheck(blk, isInsertedX)).to.eq(true); + expect(SplitBlockBloomFilter.blockCheck(blk, isInsertedY)).to.eq(false); + expect(SplitBlockBloomFilter.blockCheck(blk, notInsertedZ)).to.eq(false); - SplitBlockBloomFilter.blockInsert(blk, isInsertedY) - expect(SplitBlockBloomFilter.blockCheck(blk, isInsertedY)).to.eq(true) - expect(SplitBlockBloomFilter.blockCheck(blk, notInsertedZ)).to.eq(false) + SplitBlockBloomFilter.blockInsert(blk, isInsertedY); + expect(SplitBlockBloomFilter.blockCheck(blk, isInsertedY)).to.eq(true); + expect(SplitBlockBloomFilter.blockCheck(blk, notInsertedZ)).to.eq(false); - times(50, () => { - SplitBlockBloomFilter.blockInsert( - blk, - new Long(random(5, 2 ** 30), random(0, 2 ** 30), true) - ) - }) + times(50, () => { + SplitBlockBloomFilter.blockInsert(blk, new Long(random(5, 2 ** 30), random(0, 2 ** 30), true)); + }); - expect(SplitBlockBloomFilter.blockCheck(blk, notInsertedZ)).to.eq(false) - }) + expect(SplitBlockBloomFilter.blockCheck(blk, notInsertedZ)).to.eq(false); + }); - const exes = [ - new Long(0xFFFFFFFF, 0x7FFFFFFF, true), - new Long(0xABCDEF98, 0x70000000, true), - new Long(0xDEADBEEF, 0x7FFFFFFF, true), - new Long(0x0, 0x7FFFFFFF, true), - new Long(0xC0FFEE3, 0x0, true), - new Long(0x0, 0x1, true), - new Long(793516929, -2061372197, true) // regression test; this one was failing get blockIndex - ] - const badVal = Long.fromNumber(0xfafafafa, true) + const exes = [ + new Long(0xffffffff, 0x7fffffff, true), + new Long(0xabcdef98, 0x70000000, true), + new Long(0xdeadbeef, 0x7fffffff, true), + new Long(0x0, 0x7fffffff, true), + new Long(0xc0ffee3, 0x0, true), + new Long(0x0, 0x1, true), + new Long(793516929, -2061372197, true), // regression test; this one was failing get blockIndex + ]; + const badVal = Long.fromNumber(0xfafafafa, true); - it("filter insert + check works", function () { - const filter = new SplitBlockBloomFilter().setOptionNumFilterBytes(9999).init() - Promise.all(exes.map((x) => { - filter.insert(x) - })).then(_ => { - exes.forEach((x) => { - filter.check(x).then(isPresent => { - expect(isPresent).to.eq(true) - }) - }) - }) - filter.check(badVal).then(isPresent => expect(isPresent).to.eq(false)) - }) - it("number of filter bytes is set to defaults on init", async function () { - const filter = new SplitBlockBloomFilter().init() - expect(filter.getNumFilterBytes()).to.eq(expectedDefaultBytes) - }) + it('filter insert + check works', function () { + const filter = new SplitBlockBloomFilter().setOptionNumFilterBytes(9999).init(); + Promise.all( + exes.map((x) => { + filter.insert(x); + }) + ).then((_) => { + exes.forEach((x) => { + filter.check(x).then((isPresent) => { + expect(isPresent).to.eq(true); + }); + }); + }); + filter.check(badVal).then((isPresent) => expect(isPresent).to.eq(false)); + }); + it('number of filter bytes is set to defaults on init', async function () { + const filter = new SplitBlockBloomFilter().init(); + expect(filter.getNumFilterBytes()).to.eq(expectedDefaultBytes); + }); - describe("setOptionNumBytes", function () { - it("does not set invalid values", function () { - const filter = new SplitBlockBloomFilter().init() - const filterBytes = filter.getNumFilterBytes() - const badZees = [-1, 512, 1023] + describe('setOptionNumBytes', function () { + it('does not set invalid values', function () { + const filter = new SplitBlockBloomFilter().init(); + const filterBytes = filter.getNumFilterBytes(); + const badZees = [-1, 512, 1023]; - badZees.forEach((bz) => { - const spy = sinon.spy(console, "error") - filter.setOptionNumFilterBytes(bz) - expect(filter.getNumFilterBytes()).to.eq(filterBytes) - expect(spy.calledOnce) - spy.restore() - }) - }) - it("sets filter bytes to next power of 2", function () { - let filter = new SplitBlockBloomFilter().init() - expect(filter.getNumFilterBytes()).to.eq(expectedDefaultBytes) + badZees.forEach((bz) => { + const spy = sinon.spy(console, 'error'); + filter.setOptionNumFilterBytes(bz); + expect(filter.getNumFilterBytes()).to.eq(filterBytes); + expect(spy.calledOnce); + spy.restore(); + }); + }); + it('sets filter bytes to next power of 2', function () { + let filter = new SplitBlockBloomFilter().init(); + expect(filter.getNumFilterBytes()).to.eq(expectedDefaultBytes); - filter = new SplitBlockBloomFilter() - .setOptionNumFilterBytes(1024) - .init() - expect(filter.getNumFilterBytes()).to.eq(1024) + filter = new SplitBlockBloomFilter().setOptionNumFilterBytes(1024).init(); + expect(filter.getNumFilterBytes()).to.eq(1024); - filter = new SplitBlockBloomFilter().setOptionNumFilterBytes(1025).init() - expect(filter.getNumFilterBytes()).to.eq(2048) + filter = new SplitBlockBloomFilter().setOptionNumFilterBytes(1025).init(); + expect(filter.getNumFilterBytes()).to.eq(2048); - const below2 = 2 ** 12 - 1 - filter = new SplitBlockBloomFilter().setOptionNumFilterBytes(below2).init() - expect(filter.getNumFilterBytes()).to.eq(2 ** 12) - }) - it("can't be set twice after initializing", function () { - const spy = sinon.spy(console, "error") - const filter = new SplitBlockBloomFilter() - .setOptionNumFilterBytes(333333) - .setOptionNumFilterBytes(2 ** 20) - .init() - expect(spy.notCalled) - filter.setOptionNumFilterBytes(44444) - expect(spy.calledOnce) - expect(filter.getNumFilterBytes()).to.eq(2 ** 20) - spy.restore() - }) - }) + const below2 = 2 ** 12 - 1; + filter = new SplitBlockBloomFilter().setOptionNumFilterBytes(below2).init(); + expect(filter.getNumFilterBytes()).to.eq(2 ** 12); + }); + it("can't be set twice after initializing", function () { + const spy = sinon.spy(console, 'error'); + const filter = new SplitBlockBloomFilter() + .setOptionNumFilterBytes(333333) + .setOptionNumFilterBytes(2 ** 20) + .init(); + expect(spy.notCalled); + filter.setOptionNumFilterBytes(44444); + expect(spy.calledOnce); + expect(filter.getNumFilterBytes()).to.eq(2 ** 20); + spy.restore(); + }); + }); - describe("setOptionFalsePositiveRate", function () { - it("can be set", function () { - const filter = new SplitBlockBloomFilter().setOptionFalsePositiveRate(.001010) - expect(filter.getFalsePositiveRate()).to.eq(.001010) - }) - it("can't be set twice after initializing", function () { - const spy = sinon.spy(console, "error") - const filter = new SplitBlockBloomFilter() - .setOptionFalsePositiveRate(.001010) - .setOptionFalsePositiveRate(.002) - .init() - expect(spy.notCalled) - filter.setOptionFalsePositiveRate(.0099) - expect(spy.calledOnce) - expect(filter.getFalsePositiveRate()).to.eq(.002) - spy.restore() - }) - }) + describe('setOptionFalsePositiveRate', function () { + it('can be set', function () { + const filter = new SplitBlockBloomFilter().setOptionFalsePositiveRate(0.00101); + expect(filter.getFalsePositiveRate()).to.eq(0.00101); + }); + it("can't be set twice after initializing", function () { + const spy = sinon.spy(console, 'error'); + const filter = new SplitBlockBloomFilter() + .setOptionFalsePositiveRate(0.00101) + .setOptionFalsePositiveRate(0.002) + .init(); + expect(spy.notCalled); + filter.setOptionFalsePositiveRate(0.0099); + expect(spy.calledOnce); + expect(filter.getFalsePositiveRate()).to.eq(0.002); + spy.restore(); + }); + }); - describe("setOptionNumDistinct", function () { - it("can be set", function () { - const filter = new SplitBlockBloomFilter().setOptionNumDistinct(10000) - expect(filter.getNumDistinct()).to.eq(10000) - }) - it("can't be set twice after initializing", function () { - const spy = sinon.spy(console, "error") - const filter = new SplitBlockBloomFilter() - .setOptionNumDistinct(10000) - .setOptionNumDistinct(9999) - expect(spy.notCalled) - filter.init().setOptionNumDistinct(38383) - expect(filter.getNumDistinct()).to.eq(9999) - expect(spy.calledOnce) - spy.restore() - }) - }) + describe('setOptionNumDistinct', function () { + it('can be set', function () { + const filter = new SplitBlockBloomFilter().setOptionNumDistinct(10000); + expect(filter.getNumDistinct()).to.eq(10000); + }); + it("can't be set twice after initializing", function () { + const spy = sinon.spy(console, 'error'); + const filter = new SplitBlockBloomFilter().setOptionNumDistinct(10000).setOptionNumDistinct(9999); + expect(spy.notCalled); + filter.init().setOptionNumDistinct(38383); + expect(filter.getNumDistinct()).to.eq(9999); + expect(spy.calledOnce); + spy.restore(); + }); + }); - describe("init", function () { - it("does not allocate filter twice", function () { - const spy = sinon.spy(console, "error") - new SplitBlockBloomFilter().setOptionNumFilterBytes(1024).init().init() - expect(spy.calledOnce) - spy.restore() - }) - it("allocates the filter", function () { - const filter = new SplitBlockBloomFilter().setOptionNumFilterBytes(1024).init() - expect(filter.getNumFilterBlocks()).to.eq(32) - expect(filter.getFilter().length).to.eq(32) - }) - }) - describe("optimal number of blocks", function () { - // Some general ideas of what size filters are needed for different parameters - // Note there is a small but non-negligible difference between this and what - // is stated in https://github.com/apache/parquet-format/blob/master/BloomFilter.md - it("can be called", function () { - expect(SplitBlockBloomFilter.optimalNumOfBlocks(13107, 0.0004)).to.eq(869) - expect(SplitBlockBloomFilter.optimalNumOfBlocks(26214, 0.0126)).to.eq(949) - expect(SplitBlockBloomFilter.optimalNumOfBlocks(52428, 0.18)).to.eq(997) + describe('init', function () { + it('does not allocate filter twice', function () { + const spy = sinon.spy(console, 'error'); + new SplitBlockBloomFilter().setOptionNumFilterBytes(1024).init().init(); + expect(spy.calledOnce); + spy.restore(); + }); + it('allocates the filter', function () { + const filter = new SplitBlockBloomFilter().setOptionNumFilterBytes(1024).init(); + expect(filter.getNumFilterBlocks()).to.eq(32); + expect(filter.getFilter().length).to.eq(32); + }); + }); + describe('optimal number of blocks', function () { + // Some general ideas of what size filters are needed for different parameters + // Note there is a small but non-negligible difference between this and what + // is stated in https://github.com/apache/parquet-format/blob/master/BloomFilter.md + it('can be called', function () { + expect(SplitBlockBloomFilter.optimalNumOfBlocks(13107, 0.0004)).to.eq(869); + expect(SplitBlockBloomFilter.optimalNumOfBlocks(26214, 0.0126)).to.eq(949); + expect(SplitBlockBloomFilter.optimalNumOfBlocks(52428, 0.18)).to.eq(997); - expect(SplitBlockBloomFilter.optimalNumOfBlocks(25000, 0.001)).to.eq(1427) - expect(SplitBlockBloomFilter.optimalNumOfBlocks(50000, 0.0001)).to.eq(4111) - expect(SplitBlockBloomFilter.optimalNumOfBlocks(50000, 0.00001)).to.eq(5773) - expect(SplitBlockBloomFilter.optimalNumOfBlocks(100000, 0.000001)).to.eq(15961) - }) + expect(SplitBlockBloomFilter.optimalNumOfBlocks(25000, 0.001)).to.eq(1427); + expect(SplitBlockBloomFilter.optimalNumOfBlocks(50000, 0.0001)).to.eq(4111); + expect(SplitBlockBloomFilter.optimalNumOfBlocks(50000, 0.00001)).to.eq(5773); + expect(SplitBlockBloomFilter.optimalNumOfBlocks(100000, 0.000001)).to.eq(15961); + }); - it("sets good values", function (done: Done) { - const numDistinct = 100000 - const fpr = 0.01 - const filter = new SplitBlockBloomFilter() - .setOptionNumDistinct(numDistinct) - .setOptionFalsePositiveRate(fpr) - .init() + it('sets good values', function (done: Done) { + const numDistinct = 100000; + const fpr = 0.01; + const filter = new SplitBlockBloomFilter() + .setOptionNumDistinct(numDistinct) + .setOptionFalsePositiveRate(fpr) + .init(); - times(numDistinct, () => { - const hashValue = new Long(random(0, 2 ** 30), random(0, 2 ** 30), true) - filter.insert(hashValue).then(_ => { - filter.check(hashValue).then(r => { - if (!r) { - done(`expected ${hashValue} to be present, but it wasn't`) - } - }) - }) - }) + times(numDistinct, () => { + const hashValue = new Long(random(0, 2 ** 30), random(0, 2 ** 30), true); + filter.insert(hashValue).then((_) => { + filter.check(hashValue).then((r) => { + if (!r) { + done(`expected ${hashValue} to be present, but it wasn't`); + } + }); + }); + }); - let falsePositive = 0 - times(numDistinct, function () { - const notInFilter = new Long(random(0, 2 ** 30), random(0, 2 ** 30), true) - filter.check(notInFilter).then(r => { - if (r) falsePositive++ - }) - }) + let falsePositive = 0; + times(numDistinct, function () { + const notInFilter = new Long(random(0, 2 ** 30), random(0, 2 ** 30), true); + filter.check(notInFilter).then((r) => { + if (r) falsePositive++; + }); + }); - if (falsePositive > 0) console.log("Found false positive: ", falsePositive) - expect(falsePositive < (numDistinct * fpr)) - done() - }).timeout(10000) - }) + if (falsePositive > 0) console.log('Found false positive: ', falsePositive); + expect(falsePositive < numDistinct * fpr); + done(); + }).timeout(10000); + }); - /** - * Some of these test cases may seem redundant or superfluous. They're put here to - * suggest how filter data might be inserted, or not. - */ + /** + * Some of these test cases may seem redundant or superfluous. They're put here to + * suggest how filter data might be inserted, or not. + */ - const pojo = { - name: "William Shakespeare", - preferredName: "Shakesey", - url: "http://placekitten.com/800/600" - } + const pojo = { + name: 'William Shakespeare', + preferredName: 'Shakesey', + url: 'http://placekitten.com/800/600', + }; - describe("insert, check", function () { - type testCase = { name: string, val: any } - const testCases: Array = [ - {name: "boolean", val: true}, - {name: "int number", val: 23423}, - {name: "float number", val: 23334.23}, - {name: "string", val: "hello hello hello"}, - {name: "UInt8Array", val: Uint8Array.from([0x1, 0x4, 0xa, 0xb])}, - {name: "Long", val: new Long(random(0, 2 ** 30), random(0, 2 ** 30), true)}, - {name: "Buffer", val: Buffer.from("Hello Hello Hello")}, - {name: "BigInt", val: BigInt(1234324434440)}, - {name: "stringified object", val: JSON.stringify(pojo)}, - {name: "stringified array", val: [383838, 222, 5898, 1, 0].toString()} - ] - const filter = new SplitBlockBloomFilter().setOptionNumDistinct(1000).init() - testCases.forEach(tc => { - it(`works for a ${tc.name} type`, async function () { - await filter.insert(tc.val) - const isPresent = await filter.check(tc.val) - expect(isPresent).to.eq(true) - }) - }) - }) + describe('insert, check', function () { + type testCase = { name: string; val: any }; + const testCases: Array = [ + { name: 'boolean', val: true }, + { name: 'int number', val: 23423 }, + { name: 'float number', val: 23334.23 }, + { name: 'string', val: 'hello hello hello' }, + { name: 'UInt8Array', val: Uint8Array.from([0x1, 0x4, 0xa, 0xb]) }, + { name: 'Long', val: new Long(random(0, 2 ** 30), random(0, 2 ** 30), true) }, + { name: 'Buffer', val: Buffer.from('Hello Hello Hello') }, + { name: 'BigInt', val: BigInt(1234324434440) }, + { name: 'stringified object', val: JSON.stringify(pojo) }, + { name: 'stringified array', val: [383838, 222, 5898, 1, 0].toString() }, + ]; + const filter = new SplitBlockBloomFilter().setOptionNumDistinct(1000).init(); + testCases.forEach((tc) => { + it(`works for a ${tc.name} type`, async function () { + await filter.insert(tc.val); + const isPresent = await filter.check(tc.val); + expect(isPresent).to.eq(true); + }); + }); + }); - describe("insert throws on unsupported type", async function () { + describe('insert throws on unsupported type', async function () { + const throwCases = [ + { name: 'POJO', val: pojo }, + { name: 'Array', val: [383838, 222, 5898, 1, 0] }, + { name: 'Uint32Array', val: new Uint32Array(8).fill(39383) }, + { name: 'Set', val: new Set().add('foo').add(5).add([1, 2, 3]) }, + { name: 'Map', val: new Map() }, + ]; + const filter = new SplitBlockBloomFilter().setOptionNumDistinct(1000).init(); - const throwCases = [ - {name: "POJO", val: pojo}, - {name: "Array", val: [383838, 222, 5898, 1, 0]}, - {name: "Uint32Array", val: new Uint32Array(8).fill(39383)}, - {name: "Set", val: (new Set()).add("foo").add(5).add([1, 2, 3])}, - {name: "Map", val: new Map()} - ] - const filter = new SplitBlockBloomFilter().setOptionNumDistinct(1000).init() - - throwCases.forEach((tc) => { - it(`throws on type ${tc.name}`, async function () { - let gotError = false - try { - await filter.insert(tc.val) - } catch (e) { - if (e instanceof Error) { - gotError = true - expect(e.message).to.match(/unsupported type:/) - } - } - expect(gotError).to.eq(true) - }) - }) - }) -}) + throwCases.forEach((tc) => { + it(`throws on type ${tc.name}`, async function () { + let gotError = false; + try { + await filter.insert(tc.val); + } catch (e) { + if (e instanceof Error) { + gotError = true; + expect(e.message).to.match(/unsupported type:/); + } + } + expect(gotError).to.eq(true); + }); + }); + }); +}); diff --git a/test/schema.js b/test/schema.js index 4dc9b3b3..c696637f 100644 --- a/test/schema.js +++ b/test/schema.js @@ -3,9 +3,8 @@ const chai = require('chai'); const assert = chai.assert; const parquet = require('../parquet'); -describe('ParquetSchema', function() { - - it('should assign correct defaults in a simple flat schema', function() { +describe('ParquetSchema', function () { + it('should assign correct defaults in a simple flat schema', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, quantity: { type: 'INT64' }, @@ -61,10 +60,9 @@ describe('ParquetSchema', function() { assert.equal(!!c.isNested, false); assert.equal(c.fieldCount, undefined); } - }); - it('should assign correct defaults in a flat schema with optional fieldList', function() { + it('should assign correct defaults in a flat schema with optional fieldList', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, quantity: { type: 'INT64', optional: true }, @@ -122,7 +120,7 @@ describe('ParquetSchema', function() { } }); - it('should assign correct defaults in a flat schema with repeated fieldList', function() { + it('should assign correct defaults in a flat schema with repeated fieldList', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, quantity: { type: 'INT64', repeated: true }, @@ -180,14 +178,14 @@ describe('ParquetSchema', function() { } }); - it('should assign correct defaults in a nested schema without repetition modifiers', function() { + it('should assign correct defaults in a nested schema without repetition modifiers', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, stock: { fields: { quantity: { type: 'INT64' }, warehouse: { type: 'UTF8' }, - } + }, }, price: { type: 'DOUBLE' }, }); @@ -275,7 +273,7 @@ describe('ParquetSchema', function() { } }); - it('should assign correct defaults in a nested schema with optional fields', function() { + it('should assign correct defaults in a nested schema with optional fields', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, stock: { @@ -283,7 +281,7 @@ describe('ParquetSchema', function() { fields: { quantity: { type: 'INT64', optional: true }, warehouse: { type: 'UTF8' }, - } + }, }, price: { type: 'DOUBLE' }, }); @@ -371,7 +369,7 @@ describe('ParquetSchema', function() { } }); - it('should assign correct defaults in a nested schema with repeated fields', function() { + it('should assign correct defaults in a nested schema with repeated fields', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, stock: { @@ -379,7 +377,7 @@ describe('ParquetSchema', function() { fields: { quantity: { type: 'INT64', optional: true }, warehouse: { type: 'UTF8' }, - } + }, }, price: { type: 'DOUBLE' }, }); @@ -467,33 +465,33 @@ describe('ParquetSchema', function() { } }); - it('should indicate which column had an invalid type in a simple flat schema', function() { + it('should indicate which column had an invalid type in a simple flat schema', function () { assert.throws(() => { new parquet.ParquetSchema({ - quantity: {type: 'UNKNOWN'}, - }) + quantity: { type: 'UNKNOWN' }, + }); }, 'Invalid parquet type: UNKNOWN, for Column: quantity'); }); - it('should indicate each column which has an invalid type in a simple flat schema', function() { + it('should indicate each column which has an invalid type in a simple flat schema', function () { assert.throws(() => { new parquet.ParquetSchema({ - quantity: {type: 'UNKNOWN'}, - value: {type: 'UNKNOWN'}, - }) + quantity: { type: 'UNKNOWN' }, + value: { type: 'UNKNOWN' }, + }); }, 'Invalid parquet type: UNKNOWN, for Column: quantity\nInvalid parquet type: UNKNOWN, for Column: value'); }); - it('should indicate each column which has an invalid type when one is correct in a simple flat schema', function() { + it('should indicate each column which has an invalid type when one is correct in a simple flat schema', function () { assert.throws(() => { new parquet.ParquetSchema({ - quantity: {type: 'INT32'}, - value: {type: 'UNKNOWN'}, - }) + quantity: { type: 'INT32' }, + value: { type: 'UNKNOWN' }, + }); }, 'Invalid parquet type: UNKNOWN, for Column: value'); }); - it('should indicate each column which has an invalid type in a nested schema', function() { + it('should indicate each column which has an invalid type in a nested schema', function () { assert.throws(() => { new parquet.ParquetSchema({ name: { type: 'UTF8' }, @@ -501,83 +499,82 @@ describe('ParquetSchema', function() { fields: { quantity: { type: 'UNKNOWN' }, warehouse: { type: 'UNKNOWN' }, - } + }, }, price: { type: 'UNKNOWN' }, - }) + }); }, 'Invalid parquet type: UNKNOWN, for Column: stock.quantity\nInvalid parquet type: UNKNOWN, for Column: stock.warehouse'); }); - it('should indicate which column had an invalid encoding in a simple flat schema', function() { + it('should indicate which column had an invalid encoding in a simple flat schema', function () { assert.throws(() => { new parquet.ParquetSchema({ - quantity: {type: 'INT32', encoding: 'UNKNOWN'}, - }) + quantity: { type: 'INT32', encoding: 'UNKNOWN' }, + }); }, 'Unsupported parquet encoding: UNKNOWN, for Column: quantity'); }); - it('should indicate which column had an invalid compression type in a simple flat schema', function() { + it('should indicate which column had an invalid compression type in a simple flat schema', function () { assert.throws(() => { new parquet.ParquetSchema({ - quantity: {type: 'INT32', compression: 'UNKNOWN'}, - }) + quantity: { type: 'INT32', compression: 'UNKNOWN' }, + }); }, 'Unsupported compression method: UNKNOWN, for Column: quantity'); }); - it('should throw error given decimal with no precision', function() { + it('should throw error given decimal with no precision', function () { assert.throws(() => { new parquet.ParquetSchema({ - test_decimal_col: {type: 'DECIMAL', scale: 4}, - }) + test_decimal_col: { type: 'DECIMAL', scale: 4 }, + }); }, 'invalid schema for type: DECIMAL, for Column: test_decimal_col, precision is required and must be be greater than 0'); }); - it('should NOT throw error given decimal with no scale', function() { + it('should NOT throw error given decimal with no scale', function () { assert.doesNotThrow(() => { new parquet.ParquetSchema({ - test_decimal_col: {type: 'DECIMAL', precision: 4}, - }) + test_decimal_col: { type: 'DECIMAL', precision: 4 }, + }); }); }); - it('should throw error given decimal with negative precision', function() { + it('should throw error given decimal with negative precision', function () { assert.throws(() => { new parquet.ParquetSchema({ - decimal_column: {type: 'DECIMAL', precision: -1, scale: 0}, - }) + decimal_column: { type: 'DECIMAL', precision: -1, scale: 0 }, + }); }, 'invalid schema for type: DECIMAL, for Column: decimal_column, precision is required and must be be greater than 0'); }); - it('should throw error given decimal with a non-integer precision', function() { + it('should throw error given decimal with a non-integer precision', function () { assert.throws(() => { new parquet.ParquetSchema({ - decimal_column: {type: 'DECIMAL', precision: 6.1, scale: 5}, - }) + decimal_column: { type: 'DECIMAL', precision: 6.1, scale: 5 }, + }); }, 'invalid schema for type: DECIMAL, for Column: decimal_column, precision must be an integer'); }); - it('should throw error given decimal with a non-integer scale', function() { + it('should throw error given decimal with a non-integer scale', function () { assert.throws(() => { new parquet.ParquetSchema({ - decimal_column: {type: 'DECIMAL', precision: 6, scale: 5.1}, - }) + decimal_column: { type: 'DECIMAL', precision: 6, scale: 5.1 }, + }); }, 'invalid schema for type: DECIMAL, for Column: decimal_column, scale must be an integer'); }); - it('should throw error given decimal with negative scale', function() { + it('should throw error given decimal with negative scale', function () { assert.throws(() => { new parquet.ParquetSchema({ - decimal_column: {type: 'DECIMAL', precision: 6, scale: -1}, - }) + decimal_column: { type: 'DECIMAL', precision: 6, scale: -1 }, + }); }, 'invalid schema for type: DECIMAL, for Column: decimal_column, scale is required to be 0 or greater'); }); - it('should throw error given decimal with scale > precision', function() { + it('should throw error given decimal with scale > precision', function () { assert.throws(() => { new parquet.ParquetSchema({ - decimal_column: {type: 'DECIMAL', precision: 5, scale: 6}, - }) + decimal_column: { type: 'DECIMAL', precision: 5, scale: 6 }, + }); }, 'invalid schema or precision for type: DECIMAL, for Column: decimal_column, precision must be greater than or equal to scale'); }); - }); diff --git a/test/shred.js b/test/shred.js index 593285c9..de23c93c 100644 --- a/test/shred.js +++ b/test/shred.js @@ -3,20 +3,18 @@ const chai = require('chai'); const assert = chai.assert; const parquet = require('../parquet'); -describe('ParquetShredder', function() { - - it('should shred a single simple record', function() { +describe('ParquetShredder', function () { + it('should shred a single simple record', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, quantity: { type: 'INT64' }, price: { type: 'DOUBLE' }, }); - let buf = {}; { - let rec = { name: "apple", quantity: 10, price: 23.5 }; + let rec = { name: 'apple', quantity: 10, price: 23.5 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } @@ -24,7 +22,10 @@ describe('ParquetShredder', function() { assert.equal(buf.rowCount, 1); assert.deepEqual(colData.name.dlevels, [0]); assert.deepEqual(colData.name.rlevels, [0]); - assert.deepEqual(colData.name.values.map((x) => x.toString()), ["apple"]); + assert.deepEqual( + colData.name.values.map((x) => x.toString()), + ['apple'] + ); assert.deepEqual(colData.quantity.dlevels, [0]); assert.deepEqual(colData.quantity.rlevels, [0]); assert.deepEqual(colData.quantity.values, [10]); @@ -33,28 +34,27 @@ describe('ParquetShredder', function() { assert.deepEqual(colData.price.values, [23.5]); }); - it('should shred a list of simple records', function() { + it('should shred a list of simple records', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, quantity: { type: 'INT64' }, price: { type: 'DOUBLE' }, }); - let buf = {}; { - let rec = { name: "apple", quantity: 10, price: 23.5 }; + let rec = { name: 'apple', quantity: 10, price: 23.5 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "orange", quantity: 20, price: 17.1 }; + let rec = { name: 'orange', quantity: 20, price: 17.1 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "banana", quantity: 15, price: 42 }; + let rec = { name: 'banana', quantity: 15, price: 42 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } @@ -62,7 +62,10 @@ describe('ParquetShredder', function() { assert.equal(buf.rowCount, 3); assert.deepEqual(colData.name.dlevels, [0, 0, 0]); assert.deepEqual(colData.name.rlevels, [0, 0, 0]); - assert.deepEqual(colData.name.values.map((x) => x.toString()), ["apple", "orange", "banana"]); + assert.deepEqual( + colData.name.values.map((x) => x.toString()), + ['apple', 'orange', 'banana'] + ); assert.deepEqual(colData.quantity.dlevels, [0, 0, 0]); assert.deepEqual(colData.quantity.rlevels, [0, 0, 0]); assert.deepEqual(colData.quantity.values, [10, 20, 15]); @@ -71,28 +74,27 @@ describe('ParquetShredder', function() { assert.deepEqual(colData.price.values, [23.5, 17.1, 42]); }); - it('should shred a list of simple records with optional scalar fields', function() { + it('should shred a list of simple records with optional scalar fields', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, quantity: { type: 'INT64', optional: true }, price: { type: 'DOUBLE' }, }); - let buf = {}; { - let rec = { name: "apple", quantity: 10, price: 23.5 }; + let rec = { name: 'apple', quantity: 10, price: 23.5 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "orange", price: 17.1 }; + let rec = { name: 'orange', price: 17.1 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "banana", quantity: 15, price: 42 }; + let rec = { name: 'banana', quantity: 15, price: 42 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } @@ -100,7 +102,10 @@ describe('ParquetShredder', function() { assert.equal(buf.rowCount, 3); assert.deepEqual(colData.name.dlevels, [0, 0, 0]); assert.deepEqual(colData.name.rlevels, [0, 0, 0]); - assert.deepEqual(colData.name.values.map((x) => x.toString()), ["apple", "orange", "banana"]); + assert.deepEqual( + colData.name.values.map((x) => x.toString()), + ['apple', 'orange', 'banana'] + ); assert.deepEqual(colData.quantity.dlevels, [1, 0, 1]); assert.deepEqual(colData.quantity.rlevels, [0, 0, 0]); assert.deepEqual(colData.quantity.values, [10, 15]); @@ -109,28 +114,27 @@ describe('ParquetShredder', function() { assert.deepEqual(colData.price.values, [23.5, 17.1, 42]); }); - it('should shred a list of simple records with repeated scalar fields', function() { + it('should shred a list of simple records with repeated scalar fields', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, colours: { type: 'UTF8', repeated: true }, price: { type: 'DOUBLE' }, }); - let buf = {}; { - let rec = { name: "apple", price: 23.5, colours: ["red", "green"] }; + let rec = { name: 'apple', price: 23.5, colours: ['red', 'green'] }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "orange", price: 17.1, colours: ["orange"] }; + let rec = { name: 'orange', price: 17.1, colours: ['orange'] }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "banana", price: 42, colours: ["yellow"] }; + let rec = { name: 'banana', price: 42, colours: ['yellow'] }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } @@ -138,11 +142,17 @@ describe('ParquetShredder', function() { assert.equal(buf.rowCount, 3); assert.deepEqual(colData.name.dlevels, [0, 0, 0]); assert.deepEqual(colData.name.rlevels, [0, 0, 0]); - assert.deepEqual(colData.name.values.map((x) => x.toString()), ["apple", "orange", "banana"]); + assert.deepEqual( + colData.name.values.map((x) => x.toString()), + ['apple', 'orange', 'banana'] + ); assert.deepEqual(colData.name.count, 3); assert.deepEqual(colData.colours.dlevels, [1, 1, 1, 1]); assert.deepEqual(colData.colours.rlevels, [0, 1, 0, 0]); - assert.deepEqual(colData.colours.values.map((x) => x.toString()), ["red", "green", "orange", "yellow"]); + assert.deepEqual( + colData.colours.values.map((x) => x.toString()), + ['red', 'green', 'orange', 'yellow'] + ); assert.deepEqual(colData.colours.count, 4); assert.deepEqual(colData.price.dlevels, [0, 0, 0]); assert.deepEqual(colData.price.rlevels, [0, 0, 0]); @@ -150,28 +160,27 @@ describe('ParquetShredder', function() { assert.deepEqual(colData.price.count, 3); }); - it('should shred a nested record without repetition modifiers', function() { + it('should shred a nested record without repetition modifiers', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, stock: { fields: { quantity: { type: 'INT64' }, warehouse: { type: 'UTF8' }, - } + }, }, price: { type: 'DOUBLE' }, }); - let buf = {}; { - let rec = { name: "apple", stock: { quantity: 10, warehouse: "A" }, price: 23.5 }; + let rec = { name: 'apple', stock: { quantity: 10, warehouse: 'A' }, price: 23.5 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "banana", stock: { quantity: 20, warehouse: "B" }, price: 42.0 }; + let rec = { name: 'banana', stock: { quantity: 20, warehouse: 'B' }, price: 42.0 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } @@ -179,40 +188,45 @@ describe('ParquetShredder', function() { assert.equal(buf.rowCount, 2); assert.deepEqual(colData[['name']].dlevels, [0, 0]); assert.deepEqual(colData[['name']].rlevels, [0, 0]); - assert.deepEqual(colData[['name']].values.map((x) => x.toString()), ["apple", "banana"]); + assert.deepEqual( + colData[['name']].values.map((x) => x.toString()), + ['apple', 'banana'] + ); assert.deepEqual(colData[['stock', 'quantity']].dlevels, [0, 0]); assert.deepEqual(colData[['stock', 'quantity']].rlevels, [0, 0]); assert.deepEqual(colData[['stock', 'quantity']].values, [10, 20]); assert.deepEqual(colData[['stock', 'warehouse']].dlevels, [0, 0]); assert.deepEqual(colData[['stock', 'warehouse']].rlevels, [0, 0]); - assert.deepEqual(colData[['stock', 'warehouse']].values.map((x) => x.toString()), ["A", "B"]); + assert.deepEqual( + colData[['stock', 'warehouse']].values.map((x) => x.toString()), + ['A', 'B'] + ); assert.deepEqual(colData[['price']].dlevels, [0, 0]); assert.deepEqual(colData[['price']].rlevels, [0, 0]); assert.deepEqual(colData[['price']].values, [23.5, 42.0]); }); - it('should shred a nested record with optional fields', function() { + it('should shred a nested record with optional fields', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, stock: { fields: { quantity: { type: 'INT64', optional: true }, warehouse: { type: 'UTF8' }, - } + }, }, price: { type: 'DOUBLE' }, }); - let buf = {}; { - let rec = { name: "apple", stock: { quantity: 10, warehouse: "A" }, price: 23.5 }; + let rec = { name: 'apple', stock: { quantity: 10, warehouse: 'A' }, price: 23.5 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "banana", stock: { warehouse: "B" }, price: 42.0 }; + let rec = { name: 'banana', stock: { warehouse: 'B' }, price: 42.0 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } @@ -220,19 +234,25 @@ describe('ParquetShredder', function() { assert.equal(buf.rowCount, 2); assert.deepEqual(colData[['name']].dlevels, [0, 0]); assert.deepEqual(colData[['name']].rlevels, [0, 0]); - assert.deepEqual(colData[['name']].values.map((x) => x.toString()), ["apple", "banana"]); + assert.deepEqual( + colData[['name']].values.map((x) => x.toString()), + ['apple', 'banana'] + ); assert.deepEqual(colData[['stock', 'quantity']].dlevels, [1, 0]); assert.deepEqual(colData[['stock', 'quantity']].rlevels, [0, 0]); assert.deepEqual(colData[['stock', 'quantity']].values, [10]); assert.deepEqual(colData[['stock', 'warehouse']].dlevels, [0, 0]); assert.deepEqual(colData[['stock', 'warehouse']].rlevels, [0, 0]); - assert.deepEqual(colData[['stock', 'warehouse']].values.map((x) => x.toString()), ["A", "B"]); + assert.deepEqual( + colData[['stock', 'warehouse']].values.map((x) => x.toString()), + ['A', 'B'] + ); assert.deepEqual(colData[['price']].dlevels, [0, 0]); assert.deepEqual(colData[['price']].rlevels, [0, 0]); assert.deepEqual(colData[['price']].values, [23.5, 42.0]); }); - it('should shred a nested record with nested optional fields', function() { + it('should shred a nested record with nested optional fields', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, stock: { @@ -240,26 +260,25 @@ describe('ParquetShredder', function() { fields: { quantity: { type: 'INT64', optional: true }, warehouse: { type: 'UTF8' }, - } + }, }, price: { type: 'DOUBLE' }, }); - let buf = {}; { - let rec = { name: "apple", stock: { quantity: 10, warehouse: "A" }, price: 23.5 }; + let rec = { name: 'apple', stock: { quantity: 10, warehouse: 'A' }, price: 23.5 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "orange" , price: 17.0 }; + let rec = { name: 'orange', price: 17.0 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "banana", stock: { warehouse: "B" }, price: 42.0 }; + let rec = { name: 'banana', stock: { warehouse: 'B' }, price: 42.0 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } @@ -267,45 +286,50 @@ describe('ParquetShredder', function() { assert.equal(buf.rowCount, 3); assert.deepEqual(colData[['name']].dlevels, [0, 0, 0]); assert.deepEqual(colData[['name']].rlevels, [0, 0, 0]); - assert.deepEqual(colData[['name']].values.map((x) => x.toString()), ["apple", "orange", "banana"]); + assert.deepEqual( + colData[['name']].values.map((x) => x.toString()), + ['apple', 'orange', 'banana'] + ); assert.deepEqual(colData[['stock', 'quantity']].dlevels, [2, 0, 1]); assert.deepEqual(colData[['stock', 'quantity']].rlevels, [0, 0, 0]); assert.deepEqual(colData[['stock', 'quantity']].values, [10]); assert.deepEqual(colData[['stock', 'warehouse']].dlevels, [1, 0, 1]); assert.deepEqual(colData[['stock', 'warehouse']].rlevels, [0, 0, 0]); - assert.deepEqual(colData[['stock', 'warehouse']].values.map((x) => x.toString()), ["A", "B"]); + assert.deepEqual( + colData[['stock', 'warehouse']].values.map((x) => x.toString()), + ['A', 'B'] + ); assert.deepEqual(colData[['price']].dlevels, [0, 0, 0]); assert.deepEqual(colData[['price']].rlevels, [0, 0, 0]); assert.deepEqual(colData[['price']].values, [23.5, 17.0, 42.0]); }); - it('should shred a nested record with repeated fields', function() { + it('should shred a nested record with repeated fields', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, stock: { fields: { quantity: { type: 'INT64', repeated: true }, warehouse: { type: 'UTF8' }, - } + }, }, price: { type: 'DOUBLE' }, }); - let buf = {}; { - let rec = { name: "apple", stock: { quantity: 10, warehouse: "A" }, price: 23.5 }; + let rec = { name: 'apple', stock: { quantity: 10, warehouse: 'A' }, price: 23.5 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "orange", stock: { quantity: [50, 75], warehouse: "B" }, price: 17.0 }; + let rec = { name: 'orange', stock: { quantity: [50, 75], warehouse: 'B' }, price: 17.0 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "banana", stock: { warehouse: "C" }, price: 42.0 }; + let rec = { name: 'banana', stock: { warehouse: 'C' }, price: 42.0 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } @@ -313,19 +337,25 @@ describe('ParquetShredder', function() { assert.equal(buf.rowCount, 3); assert.deepEqual(colData[['name']].dlevels, [0, 0, 0]); assert.deepEqual(colData[['name']].rlevels, [0, 0, 0]); - assert.deepEqual(colData[['name']].values.map((x) => x.toString()), ["apple", "orange", "banana"]); + assert.deepEqual( + colData[['name']].values.map((x) => x.toString()), + ['apple', 'orange', 'banana'] + ); assert.deepEqual(colData[['stock', 'quantity']].dlevels, [1, 1, 1, 0]); assert.deepEqual(colData[['stock', 'quantity']].rlevels, [0, 0, 1, 0]); assert.deepEqual(colData[['stock', 'quantity']].values, [10, 50, 75]); assert.deepEqual(colData[['stock', 'warehouse']].dlevels, [0, 0, 0]); assert.deepEqual(colData[['stock', 'warehouse']].rlevels, [0, 0, 0]); - assert.deepEqual(colData[['stock', 'warehouse']].values.map((x) => x.toString()), ["A", "B", "C"]); + assert.deepEqual( + colData[['stock', 'warehouse']].values.map((x) => x.toString()), + ['A', 'B', 'C'] + ); assert.deepEqual(colData[['price']].dlevels, [0, 0, 0]); assert.deepEqual(colData[['price']].rlevels, [0, 0, 0]); assert.deepEqual(colData[['price']].values, [23.5, 17.0, 42.0]); }); - it('should shred a nested record with nested repeated fields', function() { + it('should shred a nested record with nested repeated fields', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, stock: { @@ -333,31 +363,37 @@ describe('ParquetShredder', function() { fields: { quantity: { type: 'INT64', repeated: true }, warehouse: { type: 'UTF8' }, - } + }, }, price: { type: 'DOUBLE' }, }); - let buf = {}; { - let rec = { name: "apple", stock: [{ quantity: 10, warehouse: "A" }, { quantity: 20, warehouse: "B" } ], price: 23.5 }; + let rec = { + name: 'apple', + stock: [ + { quantity: 10, warehouse: 'A' }, + { quantity: 20, warehouse: 'B' }, + ], + price: 23.5, + }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "orange", stock: { quantity: [50, 75], warehouse: "X" }, price: 17.0 }; + let rec = { name: 'orange', stock: { quantity: [50, 75], warehouse: 'X' }, price: 17.0 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "kiwi", price: 99.0 }; + let rec = { name: 'kiwi', price: 99.0 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } { - let rec = { name: "banana", stock: { warehouse: "C" }, price: 42.0 }; + let rec = { name: 'banana', stock: { warehouse: 'C' }, price: 42.0 }; parquet.ParquetShredder.shredRecord(schema, rec, buf); } @@ -365,19 +401,25 @@ describe('ParquetShredder', function() { assert.equal(buf.rowCount, 4); assert.deepEqual(colData[['name']].dlevels, [0, 0, 0, 0]); assert.deepEqual(colData[['name']].rlevels, [0, 0, 0, 0]); - assert.deepEqual(colData[['name']].values.map((x) => x.toString()), ["apple", "orange", "kiwi", "banana"]); + assert.deepEqual( + colData[['name']].values.map((x) => x.toString()), + ['apple', 'orange', 'kiwi', 'banana'] + ); assert.deepEqual(colData[['stock', 'quantity']].dlevels, [2, 2, 2, 2, 0, 1]); assert.deepEqual(colData[['stock', 'quantity']].rlevels, [0, 1, 0, 2, 0, 0]); assert.deepEqual(colData[['stock', 'quantity']].values, [10, 20, 50, 75]); assert.deepEqual(colData[['stock', 'warehouse']].dlevels, [1, 1, 1, 0, 1]); assert.deepEqual(colData[['stock', 'warehouse']].rlevels, [0, 1, 0, 0, 0]); - assert.deepEqual(colData[['stock', 'warehouse']].values.map((x) => x.toString()), ["A", "B", "X", "C"]); + assert.deepEqual( + colData[['stock', 'warehouse']].values.map((x) => x.toString()), + ['A', 'B', 'X', 'C'] + ); assert.deepEqual(colData[['price']].dlevels, [0, 0, 0, 0]); assert.deepEqual(colData[['price']].rlevels, [0, 0, 0, 0]); assert.deepEqual(colData[['price']].values, [23.5, 17.0, 99.0, 42.0]); }); - it('should materialize a nested record with scalar repeated fields', function() { + it('should materialize a nested record with scalar repeated fields', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, price: { type: 'DOUBLE', repeated: true }, @@ -385,50 +427,42 @@ describe('ParquetShredder', function() { let buffer = { rowCount: 4, - columnData: {} + columnData: {}, }; buffer.columnData['name'] = { dlevels: [0, 0, 0, 0], rlevels: [0, 0, 0, 0], - values:[ + values: [ Buffer.from([97, 112, 112, 108, 101]), Buffer.from([111, 114, 97, 110, 103, 101]), Buffer.from([107, 105, 119, 105]), - Buffer.from([98, 97, 110, 97, 110, 97]) + Buffer.from([98, 97, 110, 97, 110, 97]), ], - count:4 + count: 4, }; buffer.columnData['price'] = { dlevels: [1, 1, 1, 1, 1, 1], rlevels: [0, 0, 1, 0, 1, 0], values: [23.5, 17, 23, 99, 100, 42], - count: 6 + count: 6, }; let records = parquet.ParquetShredder.materializeRecords(schema, buffer); assert.equal(records.length, 4); - assert.deepEqual( - records[0], - { name: "apple", price: [23.5] }); + assert.deepEqual(records[0], { name: 'apple', price: [23.5] }); - assert.deepEqual( - records[1], - { name: "orange", price: [17, 23] }); + assert.deepEqual(records[1], { name: 'orange', price: [17, 23] }); - assert.deepEqual( - records[2], - { name: "kiwi", price: [99, 100] }); + assert.deepEqual(records[2], { name: 'kiwi', price: [99, 100] }); - assert.deepEqual( - records[3], - { name: "banana", price: [42] }); + assert.deepEqual(records[3], { name: 'banana', price: [42] }); }); - it('should materialize a nested record with nested repeated fields', function() { + it('should materialize a nested record with nested repeated fields', function () { var schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, stock: { @@ -436,121 +470,109 @@ describe('ParquetShredder', function() { fields: { quantity: { type: 'INT64', repeated: true }, warehouse: { type: 'UTF8' }, - } + }, }, price: { type: 'DOUBLE' }, }); let buffer = { rowCount: 4, - columnData: {} + columnData: {}, }; buffer.columnData['name'] = { dlevels: [0, 0, 0, 0], rlevels: [0, 0, 0, 0], - values:[ + values: [ Buffer.from([97, 112, 112, 108, 101]), Buffer.from([111, 114, 97, 110, 103, 101]), Buffer.from([107, 105, 119, 105]), - Buffer.from([98, 97, 110, 97, 110, 97]) + Buffer.from([98, 97, 110, 97, 110, 97]), ], - count:4 + count: 4, }; - buffer.columnData[['stock', 'quantity']] = { + buffer.columnData[['stock', 'quantity']] = { dlevels: [2, 2, 2, 2, 0, 1], rlevels: [0, 1, 0, 2, 0, 0], values: [10, 20, 50, 75], - count: 6 + count: 6, }; - buffer.columnData[['stock', 'warehouse']] = { + buffer.columnData[['stock', 'warehouse']] = { dlevels: [1, 1, 1, 0, 1], rlevels: [0, 1, 0, 0, 0], - values: [ - Buffer.from([65]), - Buffer.from([66]), - Buffer.from([88]), - Buffer.from([67]) - ], - count: 5 + values: [Buffer.from([65]), Buffer.from([66]), Buffer.from([88]), Buffer.from([67])], + count: 5, }; buffer.columnData['price'] = { dlevels: [0, 0, 0, 0], rlevels: [0, 0, 0, 0], values: [23.5, 17, 99, 42], - count: 4 + count: 4, }; let records = parquet.ParquetShredder.materializeRecords(schema, buffer); assert.equal(records.length, 4); - assert.deepEqual( - records[0], - { name: "apple", stock: [{ quantity: [10], warehouse: "A" }, { quantity: [20], warehouse: "B" } ], price: 23.5 }); + assert.deepEqual(records[0], { + name: 'apple', + stock: [ + { quantity: [10], warehouse: 'A' }, + { quantity: [20], warehouse: 'B' }, + ], + price: 23.5, + }); - assert.deepEqual( - records[1], - { name: "orange", stock: [{ quantity: [50, 75], warehouse: "X" }], price: 17.0 }); + assert.deepEqual(records[1], { name: 'orange', stock: [{ quantity: [50, 75], warehouse: 'X' }], price: 17.0 }); - assert.deepEqual( - records[2], - { name: "kiwi", price: 99.0, stock: null }); + assert.deepEqual(records[2], { name: 'kiwi', price: 99.0, stock: null }); - assert.deepEqual( - records[3], - { name: "banana", stock: [{ quantity: null, warehouse: "C" }], price: 42.0 }); + assert.deepEqual(records[3], { name: 'banana', stock: [{ quantity: null, warehouse: 'C' }], price: 42.0 }); }); - it('should materialize a static nested record with blank optional value', function() { + it('should materialize a static nested record with blank optional value', function () { var schema = new parquet.ParquetSchema({ fruit: { fields: { name: { type: 'UTF8' }, - colour: { type: 'UTF8', optional: true } - } - } + colour: { type: 'UTF8', optional: true }, + }, + }, }); let buffer = { rowCount: 1, - columnData: {} + columnData: {}, }; buffer.columnData['fruit'] = { dlevels: [], rlevels: [], values: [], - count: 0 + count: 0, }; buffer.columnData['fruit,name'] = { dlevels: [0], rlevels: [0], - values: [ - Buffer.from([97, 112, 112, 108, 101]) - ], - count: 1 + values: [Buffer.from([97, 112, 112, 108, 101])], + count: 1, }; buffer.columnData['fruit,colour'] = { dlevels: [0], rlevels: [0], values: [], - count: 1 + count: 1, }; let records = parquet.ParquetShredder.materializeRecords(schema, buffer); assert.equal(records.length, 1); - assert.deepEqual( - records[0], - { fruit: { name: "apple", colour: null } }); - + assert.deepEqual(records[0], { fruit: { name: 'apple', colour: null } }); }); - }); diff --git a/test/statistics.js b/test/statistics.js index a3239212..16e8f4a4 100644 --- a/test/statistics.js +++ b/test/statistics.js @@ -2,34 +2,33 @@ const chai = require('chai'); const assert = chai.assert; const parquet = require('../parquet'); -const TEST_VTIME = new Date(); +const TEST_VTIME = new Date(); const schema = new parquet.ParquetSchema({ - name: { type: 'UTF8' }, + name: { type: 'UTF8' }, //quantity: { type: 'INT64', encoding: 'RLE', typeLength: 6, optional: true }, // parquet-mr actually doesnt support this - quantity: { type: 'INT64', optional: true }, - price: { type: 'DOUBLE' }, - date: { type: 'TIMESTAMP_MICROS' }, - day: { type: 'DATE' }, - finger: { type: 'FIXED_LEN_BYTE_ARRAY', typeLength: 5 }, - inter: { type: 'INTERVAL', statistics: false }, + quantity: { type: 'INT64', optional: true }, + price: { type: 'DOUBLE' }, + date: { type: 'TIMESTAMP_MICROS' }, + day: { type: 'DATE' }, + finger: { type: 'FIXED_LEN_BYTE_ARRAY', typeLength: 5 }, + inter: { type: 'INTERVAL', statistics: false }, stock: { repeated: true, fields: { quantity: { type: 'INT64', repeated: true }, warehouse: { type: 'UTF8' }, - } + }, }, - colour: { type: 'UTF8', repeated: true }, - meta_json: { type: 'BSON', optional: true, statistics: false}, + colour: { type: 'UTF8', repeated: true }, + meta_json: { type: 'BSON', optional: true, statistics: false }, }); - -describe('statistics', async function() { +describe('statistics', async function () { let row, reader; - before(async function(){ - let writer = await parquet.ParquetWriter.openFile(schema, 'fruits-statistics.parquet', {pageSize: 3}); + before(async function () { + let writer = await parquet.ParquetWriter.openFile(schema, 'fruits-statistics.parquet', { pageSize: 3 }); await writer.appendRow({ name: 'apples', @@ -37,13 +36,13 @@ describe('statistics', async function() { price: 2.6, day: new Date('2017-11-26'), date: new Date(TEST_VTIME + 1000), - finger: "FNORD", + finger: 'FNORD', inter: { months: 10, days: 5, milliseconds: 777 }, stock: [ - { quantity: 10n, warehouse: "A" }, - { quantity: 20n, warehouse: "B" } + { quantity: 10n, warehouse: 'A' }, + { quantity: 20n, warehouse: 'B' }, ], - colour: [ 'green', 'red' ] + colour: ['green', 'red'], }); await writer.appendRow({ @@ -52,13 +51,13 @@ describe('statistics', async function() { price: 2.7, day: new Date('2018-03-03'), date: new Date(TEST_VTIME + 2000), - finger: "ABCDE", + finger: 'ABCDE', inter: { months: 42, days: 23, milliseconds: 777 }, stock: { quantity: [50n, 33n, 34n, 35n, 36n], - warehouse: "X" + warehouse: 'X', }, - colour: [ 'orange' ] + colour: ['orange'], }); await writer.appendRow({ @@ -67,14 +66,14 @@ describe('statistics', async function() { quantity: 15n, day: new Date('2008-11-26'), date: new Date(TEST_VTIME + 8000), - finger: "XCVBN", + finger: 'XCVBN', inter: { months: 60, days: 1, milliseconds: 99 }, stock: [ - { quantity: 42n, warehouse: "f" }, - { quantity: 21n, warehouse: "x" } + { quantity: 42n, warehouse: 'f' }, + { quantity: 21n, warehouse: 'x' }, ], - colour: [ 'green', 'brown', 'yellow' ], - meta_json: { expected_ship_date: TEST_VTIME } + colour: ['green', 'brown', 'yellow'], + meta_json: { expected_ship_date: TEST_VTIME }, }); await writer.appendRow({ @@ -82,10 +81,10 @@ describe('statistics', async function() { price: 3.2, day: new Date('2017-11-26'), date: new Date(TEST_VTIME + 6000), - finger: "FNORD", + finger: 'FNORD', inter: { months: 1, days: 15, milliseconds: 888 }, - colour: [ 'yellow'], - meta_json: { shape: 'curved' } + colour: ['yellow'], + meta_json: { shape: 'curved' }, }); await writer.close(); @@ -93,21 +92,19 @@ describe('statistics', async function() { row = reader.metadata.row_groups[0]; }); - it('column statistics should match input', async function() { + it('column statistics should match input', async function () { const rowStats = (path) => - row.columns.find( - d => d.meta_data.path_in_schema.join(',') == path - ).meta_data.statistics; + row.columns.find((d) => d.meta_data.path_in_schema.join(',') == path).meta_data.statistics; - assert.equal(rowStats('name').min_value,'apples'); - assert.equal(rowStats('name').max_value,'oranges'); - assert.equal(+rowStats('name').distinct_count,4); - assert.equal(+rowStats('name').null_count,0); + assert.equal(rowStats('name').min_value, 'apples'); + assert.equal(rowStats('name').max_value, 'oranges'); + assert.equal(+rowStats('name').distinct_count, 4); + assert.equal(+rowStats('name').null_count, 0); - assert.equal(rowStats('quantity').min_value,10); - assert.equal(rowStats('quantity').max_value,20); - assert.equal(+rowStats('quantity').distinct_count,3); - assert.equal(+rowStats('quantity').null_count,1); + assert.equal(rowStats('quantity').min_value, 10); + assert.equal(rowStats('quantity').max_value, 20); + assert.equal(+rowStats('quantity').distinct_count, 3); + assert.equal(+rowStats('quantity').null_count, 1); assert.equal(rowStats('price').min_value, 2.6); assert.equal(rowStats('price').max_value, 4.2); @@ -143,13 +140,12 @@ describe('statistics', async function() { assert.equal(rowStats('meta_json'), null); }); - it('columnIndex statistics should match input', async function() { - + it('columnIndex statistics should match input', async function () { /* we split the data into pages by 3, so we should have page 1 with 3 recs and page 2 with 1 */ const name = await reader.envelopeReader.readColumnIndex('name', row); - assert.deepEqual(name.min_values, ['apples','banana']); - assert.deepEqual(name.max_values, ['oranges','banana']); + assert.deepEqual(name.min_values, ['apples', 'banana']); + assert.deepEqual(name.max_values, ['oranges', 'banana']); assert.deepEqual(name.null_pages, [false, false]); assert.deepEqual(name.boundary_order, 0); @@ -163,66 +159,69 @@ describe('statistics', async function() { assert.deepEqual(price.min_values, [2.6, 3.2]); assert.deepEqual(price.max_values, [4.2, 3.2]); assert.deepEqual(price.null_pages, [false, false]); - assert.deepEqual(price.boundary_order, 0) + assert.deepEqual(price.boundary_order, 0); const day = await reader.envelopeReader.readColumnIndex('day', row); - assert.deepEqual(day.min_values, [ new Date('2008-11-26'), new Date('2017-11-26') ]); - assert.deepEqual(day.max_values, [ new Date('2018-03-03'), new Date('2017-11-26') ]); + assert.deepEqual(day.min_values, [new Date('2008-11-26'), new Date('2017-11-26')]); + assert.deepEqual(day.max_values, [new Date('2018-03-03'), new Date('2017-11-26')]); assert.deepEqual(day.null_pages, [false, false]); - assert.deepEqual(day.boundary_order, 0) + assert.deepEqual(day.boundary_order, 0); const finger = await reader.envelopeReader.readColumnIndex('finger', row); - assert.deepEqual(finger.min_values, [ Buffer.from('ABCDE'), Buffer.from('FNORD') ]); - assert.deepEqual(finger.max_values, [ Buffer.from('XCVBN'), Buffer.from('FNORD')]); + assert.deepEqual(finger.min_values, [Buffer.from('ABCDE'), Buffer.from('FNORD')]); + assert.deepEqual(finger.max_values, [Buffer.from('XCVBN'), Buffer.from('FNORD')]); assert.deepEqual(finger.null_pages, [false, false]); - assert.deepEqual(finger.boundary_order, 0) + assert.deepEqual(finger.boundary_order, 0); const stockQuantity = await reader.envelopeReader.readColumnIndex('stock,quantity', row); - assert.deepEqual(stockQuantity.min_values, [ 10n, undefined ]); - assert.deepEqual(stockQuantity.max_values, [ 50n, undefined ]); + assert.deepEqual(stockQuantity.min_values, [10n, undefined]); + assert.deepEqual(stockQuantity.max_values, [50n, undefined]); assert.deepEqual(stockQuantity.null_pages, [false, false]); - assert.deepEqual(stockQuantity.boundary_order, 0) + assert.deepEqual(stockQuantity.boundary_order, 0); const stockWarehouse = await reader.envelopeReader.readColumnIndex('stock,warehouse', row); - assert.deepEqual(stockWarehouse.min_values, [ 'A', undefined ]); - assert.deepEqual(stockWarehouse.max_values, [ 'x', undefined ]); + assert.deepEqual(stockWarehouse.min_values, ['A', undefined]); + assert.deepEqual(stockWarehouse.max_values, ['x', undefined]); assert.deepEqual(stockWarehouse.null_pages, [false, false]); - assert.deepEqual(stockWarehouse.boundary_order, 0) + assert.deepEqual(stockWarehouse.boundary_order, 0); const colour = await reader.envelopeReader.readColumnIndex('colour', row); - assert.deepEqual(colour.min_values, [ 'brown', 'yellow' ]); - assert.deepEqual(colour.max_values, [ 'yellow', 'yellow' ]); + assert.deepEqual(colour.min_values, ['brown', 'yellow']); + assert.deepEqual(colour.max_values, ['yellow', 'yellow']); assert.deepEqual(colour.null_pages, [false, false]); - assert.deepEqual(colour.boundary_order, 0) + assert.deepEqual(colour.boundary_order, 0); - const inter = await reader.envelopeReader.readColumnIndex('inter', row).catch(e => e); - assert.equal(inter.message,'Column Index Missing'); + const inter = await reader.envelopeReader.readColumnIndex('inter', row).catch((e) => e); + assert.equal(inter.message, 'Column Index Missing'); - const meta_json = await reader.envelopeReader.readColumnIndex('meta_json', row).catch(e => e); - assert.equal(meta_json.message,'Column Index Missing'); + const meta_json = await reader.envelopeReader.readColumnIndex('meta_json', row).catch((e) => e); + assert.equal(meta_json.message, 'Column Index Missing'); }); - it('Setting pageIndex: false results in no column_index and no offset_index', async function() { - let writer = await parquet.ParquetWriter.openFile(schema, 'fruits-no-index.parquet', {pageSize: 3, pageIndex: false}); + it('Setting pageIndex: false results in no column_index and no offset_index', async function () { + let writer = await parquet.ParquetWriter.openFile(schema, 'fruits-no-index.parquet', { + pageSize: 3, + pageIndex: false, + }); await writer.appendRow({ name: 'apples', quantity: 10n, price: 2.6, day: new Date('2017-11-26'), date: new Date(TEST_VTIME + 1000), - finger: "FNORD", + finger: 'FNORD', inter: { months: 10, days: 5, milliseconds: 777 }, stock: [ - { quantity: 10n, warehouse: "A" }, - { quantity: 20n, warehouse: "B" } + { quantity: 10n, warehouse: 'A' }, + { quantity: 20n, warehouse: 'B' }, ], - colour: [ 'green', 'red' ], - meta_json: { expected_ship_date: TEST_VTIME } + colour: ['green', 'red'], + meta_json: { expected_ship_date: TEST_VTIME }, }); await writer.close(); let reader2 = await parquet.ParquetReader.openFile('fruits-no-index.parquet'); - reader2.metadata.row_groups[0].columns.forEach(column => { + reader2.metadata.row_groups[0].columns.forEach((column) => { assert.equal(column.offset_index_offset, null); assert.equal(column.offset_index_length, null); assert.equal(column.column_index_offset, null); diff --git a/test/test-files.js b/test/test-files.js index 1fab5a34..1e1a5bd9 100644 --- a/test/test-files.js +++ b/test/test-files.js @@ -4,17 +4,18 @@ const fs = require('fs'); const assert = chai.assert; const path = require('path'); const parquet = require('../parquet'); -const {promisify} = require('util'); +const { promisify } = require('util'); -describe('test-files', function() { +describe('test-files', function () { let csv; async function readData(file, count) { - let records = [],record; + let records = [], + record; let i = 0; - const reader = await parquet.ParquetReader.openFile(path.join(__dirname,'test-files',file)); + const reader = await parquet.ParquetReader.openFile(path.join(__dirname, 'test-files', file)); const cursor = reader.getCursor(); - while ( (record = await cursor.next()) && (!count || i++ < count)) { + while ((record = await cursor.next()) && (!count || i++ < count)) { records.push(record); } return records; @@ -28,8 +29,8 @@ describe('test-files', function() { function bufferToString(d) { if (d instanceof Buffer) { return d.toString(); - } else if (typeof d === 'object'){ - Object.keys(d).forEach(key => { + } else if (typeof d === 'object') { + Object.keys(d).forEach((key) => { d[key] = bufferToString(d[key], key); }); return d; @@ -45,105 +46,122 @@ describe('test-files', function() { records = bufferToString(records); - assert.deepEqual(records, csv.map(d => d.reduce( (p,d,i) => { - p[fields[i]] = isNaN(d) ? d : +d; - return p; - },{}))); + assert.deepEqual( + records, + csv.map((d) => + d.reduce((p, d, i) => { + p[fields[i]] = isNaN(d) ? d : +d; + return p; + }, {}) + ) + ); } - before(async function() { - csv = await promisify(fs.readFile)(path.join(__dirname,'test-files','nation.csv')); - csv = csv.toString().split('\n').filter(d => d.length).map(d => d.split('|')); + before(async function () { + csv = await promisify(fs.readFile)(path.join(__dirname, 'test-files', 'nation.csv')); + csv = csv + .toString() + .split('\n') + .filter((d) => d.length) + .map((d) => d.split('|')); }); - it('customer.impala.parquet loads', async function() { + it('customer.impala.parquet loads', async function () { this.timeout(5000); - let data = await readData('customer.impala.parquet',100); + let data = await readData('customer.impala.parquet', 100); bufferToString(data); - const expected = require(path.join(__dirname,'test-files','customer.impala.json')).map(el => { return { ...el, c_custkey: BigInt(el.c_custkey)}}); - assert.deepEqual(data,expected); + const expected = require(path.join(__dirname, 'test-files', 'customer.impala.json')).map((el) => { + return { ...el, c_custkey: BigInt(el.c_custkey) }; + }); + assert.deepEqual(data, expected); }); - it('gzip-nation.impala.parquet loads', async function() { - await check('gzip-nation.impala.parquet',['n_nationkey','n_name','n_regionkey','n_comment']); + it('gzip-nation.impala.parquet loads', async function () { + await check('gzip-nation.impala.parquet', ['n_nationkey', 'n_name', 'n_regionkey', 'n_comment']); }); // repeated values - it.skip('nation.dict.parquet loads', async function() { - await check('nation.dict.parquet',['nation_key','name','region_key','comment_col']); + it.skip('nation.dict.parquet loads', async function () { + await check('nation.dict.parquet', ['nation_key', 'name', 'region_key', 'comment_col']); }); - it('nation.impala.parquet loads', async function() { - await check('nation.impala.parquet', ['n_nationkey','n_name','n_regionkey','n_comment']); + it('nation.impala.parquet loads', async function () { + await check('nation.impala.parquet', ['n_nationkey', 'n_name', 'n_regionkey', 'n_comment']); }); - it('nation.plain.parquet loads', async function() { + it('nation.plain.parquet loads', async function () { let records = await readData('nation.plain.parquet'); - await check(records,['nation_key','name','region_key','comment_col']); + await check(records, ['nation_key', 'name', 'region_key', 'comment_col']); }); - it('snappy-nation.impala.parquet loads', async function() { - await check('snappy-nation.impala.parquet', ['n_nationkey','n_name','n_regionkey','n_comment']); + it('snappy-nation.impala.parquet loads', async function () { + await check('snappy-nation.impala.parquet', ['n_nationkey', 'n_name', 'n_regionkey', 'n_comment']); }); - it('mr_times.parq loads', async function() { + it('mr_times.parq loads', async function () { const data = await readData('mr_times.parq'); - assert.deepEqual(data,[ - {'id':'1','date_added':'83281000000000'}, - {'id':'2','date_added':'83282000000000'}, - {'id':'3','date_added':'83283000000000'}, - {'id':'4','date_added':'83284000000000'}, - {'id':'5','date_added':'83284000000000'}, - {'id':'6','date_added':'83285000000000'}, - {'id':'7','date_added':'83286000000000'}, - {'id':'8','date_added':'83287000000000'}, - {'id':'9','date_added':'83288000000000'}, - {'id':'10','date_added':'83289000000000'} + assert.deepEqual(data, [ + { id: '1', date_added: '83281000000000' }, + { id: '2', date_added: '83282000000000' }, + { id: '3', date_added: '83283000000000' }, + { id: '4', date_added: '83284000000000' }, + { id: '5', date_added: '83284000000000' }, + { id: '6', date_added: '83285000000000' }, + { id: '7', date_added: '83286000000000' }, + { id: '8', date_added: '83287000000000' }, + { id: '9', date_added: '83288000000000' }, + { id: '10', date_added: '83289000000000' }, ]); }); - it('nested.parq loads', async function() { + it('nested.parq loads', async function () { const data = await readData('nested.parq'); - assert.deepEqual(data,[...Array(10)].map( () => ({'nest':{'thing':{'list':[{'element':'hi'},{'element':'world'}]}}}))) + assert.deepEqual( + data, + [...Array(10)].map(() => ({ nest: { thing: { list: [{ element: 'hi' }, { element: 'world' }] } } })) + ); }); - it('test-converted-type-null.parquet loads', async function() { + it('test-converted-type-null.parquet loads', async function () { const data = await readData('test-converted-type-null.parquet'); - assert.deepEqual(data,[{foo: 'bar'},{foo: null}]); + assert.deepEqual(data, [{ foo: 'bar' }, { foo: null }]); }); - it('test-enum-type.parquet loads', async function() { + it('test-enum-type.parquet loads', async function () { const data = await readData('test-enum-type.parquet'); - assert.deepEqual(data,[{foo: 'bar'}]); + assert.deepEqual(data, [{ foo: 'bar' }]); }); - it('test-null-dictionary.parquet loads', async function() { + it('test-null-dictionary.parquet loads', async function () { const data = await readData('test-null-dictionary.parquet'); - assert.deepEqual( - data, - [ - { foo: null }, - { foo: 'bar' }, { foo: 'baz' }, - { foo: 'bar' }, { foo: 'baz' }, - { foo: 'bar' }, { foo: 'baz' } - ] - ); + assert.deepEqual(data, [ + { foo: null }, + { foo: 'bar' }, + { foo: 'baz' }, + { foo: 'bar' }, + { foo: 'baz' }, + { foo: 'bar' }, + { foo: 'baz' }, + ]); }); - it('test-null.parquet loads', async function() { + it('test-null.parquet loads', async function () { const data = await readData('test-null.parquet'); - assert.deepEqual(data,[{foo: 1, bar: 2},{foo: 1, bar: null}]); + assert.deepEqual(data, [ + { foo: 1, bar: 2 }, + { foo: 1, bar: null }, + ]); }); - it('test.parquet loads', async function() { + it('test.parquet loads', async function () { const data = await readData('test.parquet'); bufferToString(data); - assert.deepEqual(data.slice(0,5), [ - {'bhello':'hello','f':0,'i32':0,'i64':0n,'hello':'hello'}, - {'bhello':'people','f':1,'i32':1,'i64':1n,'hello':'people'}, - {'bhello':'people','f':2,'i32':2,'i64':2n,'hello':'people'}, - {'bhello':'people','f':3,'i32':3,'i64':3n,'hello':'people'}, - {'bhello':'you','f':4,'i32':4,'i64':4n,'hello':'you'} + assert.deepEqual(data.slice(0, 5), [ + { bhello: 'hello', f: 0, i32: 0, i64: 0n, hello: 'hello' }, + { bhello: 'people', f: 1, i32: 1, i64: 1n, hello: 'people' }, + { bhello: 'people', f: 2, i32: 2, i64: 2n, hello: 'people' }, + { bhello: 'people', f: 3, i32: 3, i64: 3n, hello: 'people' }, + { bhello: 'you', f: 4, i32: 4, i64: 4n, hello: 'you' }, ]); }); @@ -151,7 +169,7 @@ describe('test-files', function() { const schema = await readSchema('fixed_length_decimal.parquet'); const data = await readData('fixed_length_decimal.parquet'); - const scale = schema.fields["value"].scale; + const scale = schema.fields['value'].scale; assert.equal(scale, 2); const divider = 10 ** scale; @@ -164,7 +182,8 @@ describe('test-files', function() { // In reality, the user will need to use a more novel approach to parse the // buffer to an object that can handle large fractional numbers. const lastThreeBytes = data[i].value.slice(-3); - const numericalValue = (lastThreeBytes[0] * 0x10000 + lastThreeBytes[1] * 0x100 ** 1 + lastThreeBytes[2]) / divider; + const numericalValue = + (lastThreeBytes[0] * 0x10000 + lastThreeBytes[1] * 0x100 ** 1 + lastThreeBytes[2]) / divider; assert.equal(numericalValue, valueToMatch); } }); @@ -173,7 +192,7 @@ describe('test-files', function() { const schema = await readSchema('byte_array_decimal.parquet'); const data = await readData('byte_array_decimal.parquet'); - const scale = schema.fields["value"].scale; + const scale = schema.fields['value'].scale; assert.equal(scale, 2); const divider = 10 ** scale; @@ -197,22 +216,22 @@ describe('test-files', function() { } }); - describe("RLE", function () { + describe('RLE', function () { // Tracked in https://github.com/LibertyDSNP/parquetjs/issues/113 - it.skip('rle_boolean_encoding.parquet loads', async function() { + it.skip('rle_boolean_encoding.parquet loads', async function () { const data = await readData('rle/rle_boolean_encoding.parquet'); - assert.deepEqual(data[0],{ datatype_boolean: true }); - assert.deepEqual(data[1],{ datatype_boolean: false }); + assert.deepEqual(data[0], { datatype_boolean: true }); + assert.deepEqual(data[1], { datatype_boolean: false }); }); - it('rle-dict-snappy-checksum.parquet loads', async function() { + it('rle-dict-snappy-checksum.parquet loads', async function () { const data = await readData('rle/rle-dict-snappy-checksum.parquet'); - assert.deepEqual(data[0],{ binary_field: "c95e263a-f5d4-401f-8107-5ca7146a1f98", long_field: "0" }); + assert.deepEqual(data[0], { binary_field: 'c95e263a-f5d4-401f-8107-5ca7146a1f98', long_field: '0' }); }); - it('rle-dict-uncompressed-corrupt-checksum.parquet loads', async function() { + it('rle-dict-uncompressed-corrupt-checksum.parquet loads', async function () { const data = await readData('rle/rle-dict-uncompressed-corrupt-checksum.parquet'); - assert.deepEqual(data[0],{ binary_field: "6325c32b-f417-41aa-9e02-9b8601542aff", long_field: "0" }); + assert.deepEqual(data[0], { binary_field: '6325c32b-f417-41aa-9e02-9b8601542aff', long_field: '0' }); }); - }) + }); }); diff --git a/test/test-files/address.schema.json b/test/test-files/address.schema.json index d657c8a2..7a56d4c7 100644 --- a/test/test-files/address.schema.json +++ b/test/test-files/address.schema.json @@ -1,42 +1,34 @@ { - "$id": "https://example.com/address.schema.json", - "$schema": "https://json-schema.org/draft/2020-12/schema", - "description": "An address similar to http://microformats.org/wiki/h-card", - "type": "object", - "properties": { - "post-office-box": { - "type": "string" - }, - "extended-address": { - "type": "string" - }, - "street-address": { - "type": "string" - }, - "locality": { - "type": "string" - }, - "region": { - "type": "string" - }, - "postal-code": { - "type": "string" - }, - "country-name": { - "type": "string" - } + "$id": "https://example.com/address.schema.json", + "$schema": "https://json-schema.org/draft/2020-12/schema", + "description": "An address similar to http://microformats.org/wiki/h-card", + "type": "object", + "properties": { + "post-office-box": { + "type": "string" }, - "required": [ - "locality", - "region", - "country-name" - ], - "dependentRequired": { - "post-office-box": [ - "street-address" - ], - "extended-address": [ - "street-address" - ] + "extended-address": { + "type": "string" + }, + "street-address": { + "type": "string" + }, + "locality": { + "type": "string" + }, + "region": { + "type": "string" + }, + "postal-code": { + "type": "string" + }, + "country-name": { + "type": "string" } + }, + "required": ["locality", "region", "country-name"], + "dependentRequired": { + "post-office-box": ["street-address"], + "extended-address": ["street-address"] } +} diff --git a/test/test-files/address.schema.result.json b/test/test-files/address.schema.result.json index 035f9423..ced4a747 100644 --- a/test/test-files/address.schema.result.json +++ b/test/test-files/address.schema.result.json @@ -48,9 +48,7 @@ "name": "post-office-box", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "post-office-box" - ], + "path": ["post-office-box"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -61,9 +59,7 @@ "name": "extended-address", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "extended-address" - ], + "path": ["extended-address"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -74,9 +70,7 @@ "name": "street-address", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "street-address" - ], + "path": ["street-address"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -87,9 +81,7 @@ "name": "locality", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "locality" - ], + "path": ["locality"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -100,9 +92,7 @@ "name": "region", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "region" - ], + "path": ["region"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -113,9 +103,7 @@ "name": "postal-code", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "postal-code" - ], + "path": ["postal-code"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -126,9 +114,7 @@ "name": "country-name", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "country-name" - ], + "path": ["country-name"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -141,9 +127,7 @@ "name": "post-office-box", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "post-office-box" - ], + "path": ["post-office-box"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -154,9 +138,7 @@ "name": "extended-address", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "extended-address" - ], + "path": ["extended-address"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -167,9 +149,7 @@ "name": "street-address", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "street-address" - ], + "path": ["street-address"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -180,9 +160,7 @@ "name": "locality", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "locality" - ], + "path": ["locality"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -193,9 +171,7 @@ "name": "region", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "region" - ], + "path": ["region"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -206,9 +182,7 @@ "name": "postal-code", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "postal-code" - ], + "path": ["postal-code"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -219,9 +193,7 @@ "name": "country-name", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "country-name" - ], + "path": ["country-name"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", diff --git a/test/test-files/array.schema.json b/test/test-files/array.schema.json index 9b5b12d2..0f8e0520 100644 --- a/test/test-files/array.schema.json +++ b/test/test-files/array.schema.json @@ -1,18 +1,18 @@ { - "type": "object", - "properties": { - "numberArray": { - "type": "array", - "items": { - "type": "number" - } - }, - "stringArray": { - "type": "array", - "items": { - "type": "string" - } + "type": "object", + "properties": { + "numberArray": { + "type": "array", + "items": { + "type": "number" } }, - "required": ["numberArray", "stringArray"] - } + "stringArray": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": ["numberArray", "stringArray"] +} diff --git a/test/test-files/array.schema.result.json b/test/test-files/array.schema.result.json index 0107a22f..c33a60ae 100644 --- a/test/test-files/array.schema.result.json +++ b/test/test-files/array.schema.result.json @@ -38,9 +38,7 @@ "fields": { "numberArray": { "name": "numberArray", - "path": [ - "numberArray" - ], + "path": ["numberArray"], "repetitionType": "REQUIRED", "rLevelMax": 0, "dLevelMax": 0, @@ -49,10 +47,7 @@ "fields": { "list": { "name": "list", - "path": [ - "numberArray", - "list" - ], + "path": ["numberArray", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 1, @@ -62,11 +57,7 @@ "element": { "name": "element", "primitiveType": "DOUBLE", - "path": [ - "numberArray", - "list", - "element" - ], + "path": ["numberArray", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -80,9 +71,7 @@ }, "stringArray": { "name": "stringArray", - "path": [ - "stringArray" - ], + "path": ["stringArray"], "repetitionType": "REQUIRED", "rLevelMax": 0, "dLevelMax": 0, @@ -91,10 +80,7 @@ "fields": { "list": { "name": "list", - "path": [ - "stringArray", - "list" - ], + "path": ["stringArray", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 1, @@ -105,11 +91,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "stringArray", - "list", - "element" - ], + "path": ["stringArray", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -125,9 +107,7 @@ "fieldList": [ { "name": "numberArray", - "path": [ - "numberArray" - ], + "path": ["numberArray"], "repetitionType": "REQUIRED", "rLevelMax": 0, "dLevelMax": 0, @@ -136,10 +116,7 @@ "fields": { "list": { "name": "list", - "path": [ - "numberArray", - "list" - ], + "path": ["numberArray", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 1, @@ -149,11 +126,7 @@ "element": { "name": "element", "primitiveType": "DOUBLE", - "path": [ - "numberArray", - "list", - "element" - ], + "path": ["numberArray", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -167,10 +140,7 @@ }, { "name": "list", - "path": [ - "numberArray", - "list" - ], + "path": ["numberArray", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 1, @@ -180,11 +150,7 @@ "element": { "name": "element", "primitiveType": "DOUBLE", - "path": [ - "numberArray", - "list", - "element" - ], + "path": ["numberArray", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -196,11 +162,7 @@ { "name": "element", "primitiveType": "DOUBLE", - "path": [ - "numberArray", - "list", - "element" - ], + "path": ["numberArray", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -209,9 +171,7 @@ }, { "name": "stringArray", - "path": [ - "stringArray" - ], + "path": ["stringArray"], "repetitionType": "REQUIRED", "rLevelMax": 0, "dLevelMax": 0, @@ -220,10 +180,7 @@ "fields": { "list": { "name": "list", - "path": [ - "stringArray", - "list" - ], + "path": ["stringArray", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 1, @@ -234,11 +191,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "stringArray", - "list", - "element" - ], + "path": ["stringArray", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -252,10 +205,7 @@ }, { "name": "list", - "path": [ - "stringArray", - "list" - ], + "path": ["stringArray", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 1, @@ -266,11 +216,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "stringArray", - "list", - "element" - ], + "path": ["stringArray", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -283,11 +229,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "stringArray", - "list", - "element" - ], + "path": ["stringArray", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", diff --git a/test/test-files/customer.impala.json b/test/test-files/customer.impala.json index 587802a1..3e798ac3 100644 --- a/test/test-files/customer.impala.json +++ b/test/test-files/customer.impala.json @@ -1 +1,1002 @@ -[{"c_custkey":1,"c_name":"Customer#000000001","c_address":"IVhzIApeRb ot,c,E","c_nationkey":15,"c_phone":"25-989-741-2988","c_acctbal":711.56,"c_mktsegment":"BUILDING","c_comment":"to the even, regular platelets. regular, ironic epitaphs nag e"},{"c_custkey":2,"c_name":"Customer#000000002","c_address":"XSTf4,NCwDVaWNe6tEgvwfmRchLXak","c_nationkey":13,"c_phone":"23-768-687-3665","c_acctbal":121.65,"c_mktsegment":"AUTOMOBILE","c_comment":"l accounts. blithely ironic theodolites integrate boldly: caref"},{"c_custkey":3,"c_name":"Customer#000000003","c_address":"MG9kdTD2WBHm","c_nationkey":1,"c_phone":"11-719-748-3364","c_acctbal":7498.12,"c_mktsegment":"AUTOMOBILE","c_comment":" deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov"},{"c_custkey":4,"c_name":"Customer#000000004","c_address":"XxVSJsLAGtn","c_nationkey":4,"c_phone":"14-128-190-5944","c_acctbal":2866.83,"c_mktsegment":"MACHINERY","c_comment":" requests. final, regular ideas sleep final accou"},{"c_custkey":5,"c_name":"Customer#000000005","c_address":"KvpyuHCplrB84WgAiGV6sYpZq7Tj","c_nationkey":3,"c_phone":"13-750-942-6364","c_acctbal":794.47,"c_mktsegment":"HOUSEHOLD","c_comment":"n accounts will have to unwind. foxes cajole accor"},{"c_custkey":6,"c_name":"Customer#000000006","c_address":"sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn","c_nationkey":20,"c_phone":"30-114-968-4951","c_acctbal":7638.57,"c_mktsegment":"AUTOMOBILE","c_comment":"tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious"},{"c_custkey":7,"c_name":"Customer#000000007","c_address":"TcGe5gaZNgVePxU5kRrvXBfkasDTea","c_nationkey":18,"c_phone":"28-190-982-9759","c_acctbal":9561.95,"c_mktsegment":"AUTOMOBILE","c_comment":"ainst the ironic, express theodolites. express, even pinto beans among the exp"},{"c_custkey":8,"c_name":"Customer#000000008","c_address":"I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5","c_nationkey":17,"c_phone":"27-147-574-9335","c_acctbal":6819.74,"c_mktsegment":"BUILDING","c_comment":"among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide"},{"c_custkey":9,"c_name":"Customer#000000009","c_address":"xKiAFTjUsCuxfeleNqefumTrjS","c_nationkey":8,"c_phone":"18-338-906-3675","c_acctbal":8324.07,"c_mktsegment":"FURNITURE","c_comment":"r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl"},{"c_custkey":10,"c_name":"Customer#000000010","c_address":"6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2","c_nationkey":5,"c_phone":"15-741-346-9870","c_acctbal":2753.54,"c_mktsegment":"HOUSEHOLD","c_comment":"es regular deposits haggle. fur"},{"c_custkey":11,"c_name":"Customer#000000011","c_address":"PkWS 3HlXqwTuzrKg633BEi","c_nationkey":23,"c_phone":"33-464-151-3439","c_acctbal":-272.6,"c_mktsegment":"BUILDING","c_comment":"ckages. requests sleep slyly. quickly even pinto beans promise above the slyly regular pinto beans. "},{"c_custkey":12,"c_name":"Customer#000000012","c_address":"9PWKuhzT4Zr1Q","c_nationkey":13,"c_phone":"23-791-276-1263","c_acctbal":3396.49,"c_mktsegment":"HOUSEHOLD","c_comment":" to the carefully final braids. blithely regular requests nag. ironic theodolites boost quickly along"},{"c_custkey":13,"c_name":"Customer#000000013","c_address":"nsXQu0oVjD7PM659uC3SRSp","c_nationkey":3,"c_phone":"13-761-547-5974","c_acctbal":3857.34,"c_mktsegment":"BUILDING","c_comment":"ounts sleep carefully after the close frays. carefully bold notornis use ironic requests. blithely"},{"c_custkey":14,"c_name":"Customer#000000014","c_address":"KXkletMlL2JQEA ","c_nationkey":1,"c_phone":"11-845-129-3851","c_acctbal":5266.3,"c_mktsegment":"FURNITURE","c_comment":", ironic packages across the unus"},{"c_custkey":15,"c_name":"Customer#000000015","c_address":"YtWggXoOLdwdo7b0y,BZaGUQMLJMX1Y,EC,6Dn","c_nationkey":23,"c_phone":"33-687-542-7601","c_acctbal":2788.52,"c_mktsegment":"HOUSEHOLD","c_comment":" platelets. regular deposits detect asymptotes. blithely unusual packages nag slyly at the fluf"},{"c_custkey":16,"c_name":"Customer#000000016","c_address":"cYiaeMLZSMAOQ2 d0W,","c_nationkey":10,"c_phone":"20-781-609-3107","c_acctbal":4681.03,"c_mktsegment":"FURNITURE","c_comment":"kly silent courts. thinly regular theodolites sleep fluffily after "},{"c_custkey":17,"c_name":"Customer#000000017","c_address":"izrh 6jdqtp2eqdtbkswDD8SG4SzXruMfIXyR7","c_nationkey":2,"c_phone":"12-970-682-3487","c_acctbal":6.34,"c_mktsegment":"AUTOMOBILE","c_comment":"packages wake! blithely even pint"},{"c_custkey":18,"c_name":"Customer#000000018","c_address":"3txGO AiuFux3zT0Z9NYaFRnZt","c_nationkey":6,"c_phone":"16-155-215-1315","c_acctbal":5494.43,"c_mktsegment":"BUILDING","c_comment":"s sleep. carefully even instructions nag furiously alongside of t"},{"c_custkey":19,"c_name":"Customer#000000019","c_address":"uc,3bHIx84H,wdrmLOjVsiqXCq2tr","c_nationkey":18,"c_phone":"28-396-526-5053","c_acctbal":8914.71,"c_mktsegment":"HOUSEHOLD","c_comment":" nag. furiously careful packages are slyly at the accounts. furiously regular in"},{"c_custkey":20,"c_name":"Customer#000000020","c_address":"JrPk8Pqplj4Ne","c_nationkey":22,"c_phone":"32-957-234-8742","c_acctbal":7603.4,"c_mktsegment":"FURNITURE","c_comment":"g alongside of the special excuses-- fluffily enticing packages wake "},{"c_custkey":21,"c_name":"Customer#000000021","c_address":"XYmVpr9yAHDEn","c_nationkey":8,"c_phone":"18-902-614-8344","c_acctbal":1428.25,"c_mktsegment":"MACHINERY","c_comment":" quickly final accounts integrate blithely furiously u"},{"c_custkey":22,"c_name":"Customer#000000022","c_address":"QI6p41,FNs5k7RZoCCVPUTkUdYpB","c_nationkey":3,"c_phone":"13-806-545-9701","c_acctbal":591.98,"c_mktsegment":"MACHINERY","c_comment":"s nod furiously above the furiously ironic ideas. "},{"c_custkey":23,"c_name":"Customer#000000023","c_address":"OdY W13N7Be3OC5MpgfmcYss0Wn6TKT","c_nationkey":3,"c_phone":"13-312-472-8245","c_acctbal":3332.02,"c_mktsegment":"HOUSEHOLD","c_comment":"deposits. special deposits cajole slyly. fluffily special deposits about the furiously "},{"c_custkey":24,"c_name":"Customer#000000024","c_address":"HXAFgIAyjxtdqwimt13Y3OZO 4xeLe7U8PqG","c_nationkey":13,"c_phone":"23-127-851-8031","c_acctbal":9255.67,"c_mktsegment":"MACHINERY","c_comment":"into beans. fluffily final ideas haggle fluffily"},{"c_custkey":25,"c_name":"Customer#000000025","c_address":"Hp8GyFQgGHFYSilH5tBfe","c_nationkey":12,"c_phone":"22-603-468-3533","c_acctbal":7133.7,"c_mktsegment":"FURNITURE","c_comment":"y. accounts sleep ruthlessly according to the regular theodolites. unusual instructions sleep. ironic, final"},{"c_custkey":26,"c_name":"Customer#000000026","c_address":"8ljrc5ZeMl7UciP","c_nationkey":22,"c_phone":"32-363-455-4837","c_acctbal":5182.05,"c_mktsegment":"AUTOMOBILE","c_comment":"c requests use furiously ironic requests. slyly ironic dependencies us"},{"c_custkey":27,"c_name":"Customer#000000027","c_address":"IS8GIyxpBrLpMT0u7","c_nationkey":3,"c_phone":"13-137-193-2709","c_acctbal":5679.84,"c_mktsegment":"BUILDING","c_comment":" about the carefully ironic pinto beans. accoun"},{"c_custkey":28,"c_name":"Customer#000000028","c_address":"iVyg0daQ,Tha8x2WPWA9m2529m","c_nationkey":8,"c_phone":"18-774-241-1462","c_acctbal":1007.18,"c_mktsegment":"FURNITURE","c_comment":" along the regular deposits. furiously final pac"},{"c_custkey":29,"c_name":"Customer#000000029","c_address":"sJ5adtfyAkCK63df2,vF25zyQMVYE34uh","c_nationkey":0,"c_phone":"10-773-203-7342","c_acctbal":7618.27,"c_mktsegment":"FURNITURE","c_comment":"its after the carefully final platelets x-ray against "},{"c_custkey":30,"c_name":"Customer#000000030","c_address":"nJDsELGAavU63Jl0c5NKsKfL8rIJQQkQnYL2QJY","c_nationkey":1,"c_phone":"11-764-165-5076","c_acctbal":9321.01,"c_mktsegment":"BUILDING","c_comment":"lithely final requests. furiously unusual account"},{"c_custkey":31,"c_name":"Customer#000000031","c_address":"LUACbO0viaAv6eXOAebryDB xjVst","c_nationkey":23,"c_phone":"33-197-837-7094","c_acctbal":5236.89,"c_mktsegment":"HOUSEHOLD","c_comment":"s use among the blithely pending depo"},{"c_custkey":32,"c_name":"Customer#000000032","c_address":"jD2xZzi UmId,DCtNBLXKj9q0Tlp2iQ6ZcO3J","c_nationkey":15,"c_phone":"25-430-914-2194","c_acctbal":3471.53,"c_mktsegment":"BUILDING","c_comment":"cial ideas. final, furious requests across the e"},{"c_custkey":33,"c_name":"Customer#000000033","c_address":"qFSlMuLucBmx9xnn5ib2csWUweg D","c_nationkey":17,"c_phone":"27-375-391-1280","c_acctbal":-78.56,"c_mktsegment":"AUTOMOBILE","c_comment":"s. slyly regular accounts are furiously. carefully pending requests"},{"c_custkey":34,"c_name":"Customer#000000034","c_address":"Q6G9wZ6dnczmtOx509xgE,M2KV","c_nationkey":15,"c_phone":"25-344-968-5422","c_acctbal":8589.7,"c_mktsegment":"HOUSEHOLD","c_comment":"nder against the even, pending accounts. even"},{"c_custkey":35,"c_name":"Customer#000000035","c_address":"TEjWGE4nBzJL2","c_nationkey":17,"c_phone":"27-566-888-7431","c_acctbal":1228.24,"c_mktsegment":"HOUSEHOLD","c_comment":"requests. special, express requests nag slyly furiousl"},{"c_custkey":36,"c_name":"Customer#000000036","c_address":"3TvCzjuPzpJ0,DdJ8kW5U","c_nationkey":21,"c_phone":"31-704-669-5769","c_acctbal":4987.27,"c_mktsegment":"BUILDING","c_comment":"haggle. enticing, quiet platelets grow quickly bold sheaves. carefully regular acc"},{"c_custkey":37,"c_name":"Customer#000000037","c_address":"7EV4Pwh,3SboctTWt","c_nationkey":8,"c_phone":"18-385-235-7162","c_acctbal":-917.75,"c_mktsegment":"FURNITURE","c_comment":"ilent packages are carefully among the deposits. furiousl"},{"c_custkey":38,"c_name":"Customer#000000038","c_address":"a5Ee5e9568R8RLP 2ap7","c_nationkey":12,"c_phone":"22-306-880-7212","c_acctbal":6345.11,"c_mktsegment":"HOUSEHOLD","c_comment":"lar excuses. closely even asymptotes cajole blithely excuses. carefully silent pinto beans sleep carefully fin"},{"c_custkey":39,"c_name":"Customer#000000039","c_address":"nnbRg,Pvy33dfkorYE FdeZ60","c_nationkey":2,"c_phone":"12-387-467-6509","c_acctbal":6264.31,"c_mktsegment":"AUTOMOBILE","c_comment":"tions. slyly silent excuses slee"},{"c_custkey":40,"c_name":"Customer#000000040","c_address":"gOnGWAyhSV1ofv","c_nationkey":3,"c_phone":"13-652-915-8939","c_acctbal":1335.3,"c_mktsegment":"BUILDING","c_comment":"rges impress after the slyly ironic courts. foxes are. blithely "},{"c_custkey":41,"c_name":"Customer#000000041","c_address":"IM9mzmyoxeBmvNw8lA7G3Ydska2nkZF","c_nationkey":10,"c_phone":"20-917-711-4011","c_acctbal":270.95,"c_mktsegment":"HOUSEHOLD","c_comment":"ly regular accounts hang bold, silent packages. unusual foxes haggle slyly above the special, final depo"},{"c_custkey":42,"c_name":"Customer#000000042","c_address":"ziSrvyyBke","c_nationkey":5,"c_phone":"15-416-330-4175","c_acctbal":8727.01,"c_mktsegment":"BUILDING","c_comment":"ssly according to the pinto beans: carefully special requests across the even, pending accounts wake special"},{"c_custkey":43,"c_name":"Customer#000000043","c_address":"ouSbjHk8lh5fKX3zGso3ZSIj9Aa3PoaFd","c_nationkey":19,"c_phone":"29-316-665-2897","c_acctbal":9904.28,"c_mktsegment":"MACHINERY","c_comment":"ial requests: carefully pending foxes detect quickly. carefully final courts cajole quickly. carefully"},{"c_custkey":44,"c_name":"Customer#000000044","c_address":"Oi,dOSPwDu4jo4x,,P85E0dmhZGvNtBwi","c_nationkey":16,"c_phone":"26-190-260-5375","c_acctbal":7315.94,"c_mktsegment":"AUTOMOBILE","c_comment":"r requests around the unusual, bold a"},{"c_custkey":45,"c_name":"Customer#000000045","c_address":"4v3OcpFgoOmMG,CbnF,4mdC","c_nationkey":9,"c_phone":"19-715-298-9917","c_acctbal":9983.38,"c_mktsegment":"AUTOMOBILE","c_comment":"nto beans haggle slyly alongside of t"},{"c_custkey":46,"c_name":"Customer#000000046","c_address":"eaTXWWm10L9","c_nationkey":6,"c_phone":"16-357-681-2007","c_acctbal":5744.59,"c_mktsegment":"AUTOMOBILE","c_comment":"ctions. accounts sleep furiously even requests. regular, regular accounts cajole blithely around the final pa"},{"c_custkey":47,"c_name":"Customer#000000047","c_address":"b0UgocSqEW5 gdVbhNT","c_nationkey":2,"c_phone":"12-427-271-9466","c_acctbal":274.58,"c_mktsegment":"BUILDING","c_comment":"ions. express, ironic instructions sleep furiously ironic ideas. furi"},{"c_custkey":48,"c_name":"Customer#000000048","c_address":"0UU iPhBupFvemNB","c_nationkey":0,"c_phone":"10-508-348-5882","c_acctbal":3792.5,"c_mktsegment":"BUILDING","c_comment":"re fluffily pending foxes. pending, bold platelets sleep slyly. even platelets cajo"},{"c_custkey":49,"c_name":"Customer#000000049","c_address":"cNgAeX7Fqrdf7HQN9EwjUa4nxT,68L FKAxzl","c_nationkey":10,"c_phone":"20-908-631-4424","c_acctbal":4573.94,"c_mktsegment":"FURNITURE","c_comment":"nusual foxes! fluffily pending packages maintain to the regular "},{"c_custkey":50,"c_name":"Customer#000000050","c_address":"9SzDYlkzxByyJ1QeTI o","c_nationkey":6,"c_phone":"16-658-112-3221","c_acctbal":4266.13,"c_mktsegment":"MACHINERY","c_comment":"ts. furiously ironic accounts cajole furiously slyly ironic dinos."},{"c_custkey":51,"c_name":"Customer#000000051","c_address":"uR,wEaiTvo4","c_nationkey":12,"c_phone":"22-344-885-4251","c_acctbal":855.87,"c_mktsegment":"FURNITURE","c_comment":"eposits. furiously regular requests integrate carefully packages. furious"},{"c_custkey":52,"c_name":"Customer#000000052","c_address":"7 QOqGqqSy9jfV51BC71jcHJSD0","c_nationkey":11,"c_phone":"21-186-284-5998","c_acctbal":5630.28,"c_mktsegment":"HOUSEHOLD","c_comment":"ic platelets use evenly even accounts. stealthy theodolites cajole furiou"},{"c_custkey":53,"c_name":"Customer#000000053","c_address":"HnaxHzTfFTZs8MuCpJyTbZ47Cm4wFOOgib","c_nationkey":15,"c_phone":"25-168-852-5363","c_acctbal":4113.64,"c_mktsegment":"HOUSEHOLD","c_comment":"ar accounts are. even foxes are blithely. fluffily pending deposits boost"},{"c_custkey":54,"c_name":"Customer#000000054","c_address":",k4vf 5vECGWFy,hosTE,","c_nationkey":4,"c_phone":"14-776-370-4745","c_acctbal":868.9,"c_mktsegment":"AUTOMOBILE","c_comment":"sual, silent accounts. furiously express accounts cajole special deposits. final, final accounts use furi"},{"c_custkey":55,"c_name":"Customer#000000055","c_address":"zIRBR4KNEl HzaiV3a i9n6elrxzDEh8r8pDom","c_nationkey":10,"c_phone":"20-180-440-8525","c_acctbal":4572.11,"c_mktsegment":"MACHINERY","c_comment":"ully unusual packages wake bravely bold packages. unusual requests boost deposits! blithely ironic packages ab"},{"c_custkey":56,"c_name":"Customer#000000056","c_address":"BJYZYJQk4yD5B","c_nationkey":10,"c_phone":"20-895-685-6920","c_acctbal":6530.86,"c_mktsegment":"FURNITURE","c_comment":". notornis wake carefully. carefully fluffy requests are furiously even accounts. slyly expre"},{"c_custkey":57,"c_name":"Customer#000000057","c_address":"97XYbsuOPRXPWU","c_nationkey":21,"c_phone":"31-835-306-1650","c_acctbal":4151.93,"c_mktsegment":"AUTOMOBILE","c_comment":"ove the carefully special packages. even, unusual deposits sleep slyly pend"},{"c_custkey":58,"c_name":"Customer#000000058","c_address":"g9ap7Dk1Sv9fcXEWjpMYpBZIRUohi T","c_nationkey":13,"c_phone":"23-244-493-2508","c_acctbal":6478.46,"c_mktsegment":"HOUSEHOLD","c_comment":"ideas. ironic ideas affix furiously express, final instructions. regular excuses use quickly e"},{"c_custkey":59,"c_name":"Customer#000000059","c_address":"zLOCP0wh92OtBihgspOGl4","c_nationkey":1,"c_phone":"11-355-584-3112","c_acctbal":3458.6,"c_mktsegment":"MACHINERY","c_comment":"ously final packages haggle blithely after the express deposits. furiou"},{"c_custkey":60,"c_name":"Customer#000000060","c_address":"FyodhjwMChsZmUz7Jz0H","c_nationkey":12,"c_phone":"22-480-575-5866","c_acctbal":2741.87,"c_mktsegment":"MACHINERY","c_comment":"latelets. blithely unusual courts boost furiously about the packages. blithely final instruct"},{"c_custkey":61,"c_name":"Customer#000000061","c_address":"9kndve4EAJxhg3veF BfXr7AqOsT39o gtqjaYE","c_nationkey":17,"c_phone":"27-626-559-8599","c_acctbal":1536.24,"c_mktsegment":"FURNITURE","c_comment":"egular packages shall have to impress along the "},{"c_custkey":62,"c_name":"Customer#000000062","c_address":"upJK2Dnw13,","c_nationkey":7,"c_phone":"17-361-978-7059","c_acctbal":595.61,"c_mktsegment":"MACHINERY","c_comment":"kly special dolphins. pinto beans are slyly. quickly regular accounts are furiously a"},{"c_custkey":63,"c_name":"Customer#000000063","c_address":"IXRSpVWWZraKII","c_nationkey":21,"c_phone":"31-952-552-9584","c_acctbal":9331.13,"c_mktsegment":"AUTOMOBILE","c_comment":"ithely even accounts detect slyly above the fluffily ir"},{"c_custkey":64,"c_name":"Customer#000000064","c_address":"MbCeGY20kaKK3oalJD,OT","c_nationkey":3,"c_phone":"13-558-731-7204","c_acctbal":-646.64,"c_mktsegment":"BUILDING","c_comment":"structions after the quietly ironic theodolites cajole be"},{"c_custkey":65,"c_name":"Customer#000000065","c_address":"RGT yzQ0y4l0H90P783LG4U95bXQFDRXbWa1sl,X","c_nationkey":23,"c_phone":"33-733-623-5267","c_acctbal":8795.16,"c_mktsegment":"AUTOMOBILE","c_comment":"y final foxes serve carefully. theodolites are carefully. pending i"},{"c_custkey":66,"c_name":"Customer#000000066","c_address":"XbsEqXH1ETbJYYtA1A","c_nationkey":22,"c_phone":"32-213-373-5094","c_acctbal":242.77,"c_mktsegment":"HOUSEHOLD","c_comment":"le slyly accounts. carefully silent packages benea"},{"c_custkey":67,"c_name":"Customer#000000067","c_address":"rfG0cOgtr5W8 xILkwp9fpCS8","c_nationkey":9,"c_phone":"19-403-114-4356","c_acctbal":8166.59,"c_mktsegment":"MACHINERY","c_comment":"indle furiously final, even theodo"},{"c_custkey":68,"c_name":"Customer#000000068","c_address":"o8AibcCRkXvQFh8hF,7o","c_nationkey":12,"c_phone":"22-918-832-2411","c_acctbal":6853.37,"c_mktsegment":"HOUSEHOLD","c_comment":" pending pinto beans impress realms. final dependencies "},{"c_custkey":69,"c_name":"Customer#000000069","c_address":"Ltx17nO9Wwhtdbe9QZVxNgP98V7xW97uvSH1prEw","c_nationkey":9,"c_phone":"19-225-978-5670","c_acctbal":1709.28,"c_mktsegment":"HOUSEHOLD","c_comment":"thely final ideas around the quickly final dependencies affix carefully quickly final theodolites. final accounts c"},{"c_custkey":70,"c_name":"Customer#000000070","c_address":"mFowIuhnHjp2GjCiYYavkW kUwOjIaTCQ","c_nationkey":22,"c_phone":"32-828-107-2832","c_acctbal":4867.52,"c_mktsegment":"FURNITURE","c_comment":"fter the special asymptotes. ideas after the unusual frets cajole quickly regular pinto be"},{"c_custkey":71,"c_name":"Customer#000000071","c_address":"TlGalgdXWBmMV,6agLyWYDyIz9MKzcY8gl,w6t1B","c_nationkey":7,"c_phone":"17-710-812-5403","c_acctbal":-611.19,"c_mktsegment":"HOUSEHOLD","c_comment":"g courts across the regular, final pinto beans are blithely pending ac"},{"c_custkey":72,"c_name":"Customer#000000072","c_address":"putjlmskxE,zs,HqeIA9Wqu7dhgH5BVCwDwHHcf","c_nationkey":2,"c_phone":"12-759-144-9689","c_acctbal":-362.86,"c_mktsegment":"FURNITURE","c_comment":"ithely final foxes sleep always quickly bold accounts. final wat"},{"c_custkey":73,"c_name":"Customer#000000073","c_address":"8IhIxreu4Ug6tt5mog4","c_nationkey":0,"c_phone":"10-473-439-3214","c_acctbal":4288.5,"c_mktsegment":"BUILDING","c_comment":"usual, unusual packages sleep busily along the furiou"},{"c_custkey":74,"c_name":"Customer#000000074","c_address":"IkJHCA3ZThF7qL7VKcrU nRLl,kylf ","c_nationkey":4,"c_phone":"14-199-862-7209","c_acctbal":2764.43,"c_mktsegment":"MACHINERY","c_comment":"onic accounts. blithely slow packages would haggle carefully. qui"},{"c_custkey":75,"c_name":"Customer#000000075","c_address":"Dh 6jZ,cwxWLKQfRKkiGrzv6pm","c_nationkey":18,"c_phone":"28-247-803-9025","c_acctbal":6684.1,"c_mktsegment":"AUTOMOBILE","c_comment":" instructions cajole even, even deposits. finally bold deposits use above the even pains. slyl"},{"c_custkey":76,"c_name":"Customer#000000076","c_address":"m3sbCvjMOHyaOofH,e UkGPtqc4","c_nationkey":0,"c_phone":"10-349-718-3044","c_acctbal":5745.33,"c_mktsegment":"FURNITURE","c_comment":"pecial deposits. ironic ideas boost blithely according to the closely ironic theodolites! furiously final deposits n"},{"c_custkey":77,"c_name":"Customer#000000077","c_address":"4tAE5KdMFGD4byHtXF92vx","c_nationkey":17,"c_phone":"27-269-357-4674","c_acctbal":1738.87,"c_mktsegment":"BUILDING","c_comment":"uffily silent requests. carefully ironic asymptotes among the ironic hockey players are carefully bli"},{"c_custkey":78,"c_name":"Customer#000000078","c_address":"HBOta,ZNqpg3U2cSL0kbrftkPwzX","c_nationkey":9,"c_phone":"19-960-700-9191","c_acctbal":7136.97,"c_mktsegment":"FURNITURE","c_comment":"ests. blithely bold pinto beans h"},{"c_custkey":79,"c_name":"Customer#000000079","c_address":"n5hH2ftkVRwW8idtD,BmM2","c_nationkey":15,"c_phone":"25-147-850-4166","c_acctbal":5121.28,"c_mktsegment":"MACHINERY","c_comment":"es. packages haggle furiously. regular, special requests poach after the quickly express ideas. blithely pending re"},{"c_custkey":80,"c_name":"Customer#000000080","c_address":"K,vtXp8qYB ","c_nationkey":0,"c_phone":"10-267-172-7101","c_acctbal":7383.53,"c_mktsegment":"FURNITURE","c_comment":"tect among the dependencies. bold accounts engage closely even pinto beans. ca"},{"c_custkey":81,"c_name":"Customer#000000081","c_address":"SH6lPA7JiiNC6dNTrR","c_nationkey":20,"c_phone":"30-165-277-3269","c_acctbal":2023.71,"c_mktsegment":"BUILDING","c_comment":"r packages. fluffily ironic requests cajole fluffily. ironically regular theodolit"},{"c_custkey":82,"c_name":"Customer#000000082","c_address":"zhG3EZbap4c992Gj3bK,3Ne,Xn","c_nationkey":18,"c_phone":"28-159-442-5305","c_acctbal":9468.34,"c_mktsegment":"AUTOMOBILE","c_comment":"s wake. bravely regular accounts are furiously. regula"},{"c_custkey":83,"c_name":"Customer#000000083","c_address":"HnhTNB5xpnSF20JBH4Ycs6psVnkC3RDf","c_nationkey":22,"c_phone":"32-817-154-4122","c_acctbal":6463.51,"c_mktsegment":"BUILDING","c_comment":"ccording to the quickly bold warhorses. final, regular foxes integrate carefully. bold packages nag blithely ev"},{"c_custkey":84,"c_name":"Customer#000000084","c_address":"lpXz6Fwr9945rnbtMc8PlueilS1WmASr CB","c_nationkey":11,"c_phone":"21-546-818-3802","c_acctbal":5174.71,"c_mktsegment":"FURNITURE","c_comment":"ly blithe foxes. special asymptotes haggle blithely against the furiously regular depo"},{"c_custkey":85,"c_name":"Customer#000000085","c_address":"siRerlDwiolhYR 8FgksoezycLj","c_nationkey":5,"c_phone":"15-745-585-8219","c_acctbal":3386.64,"c_mktsegment":"FURNITURE","c_comment":"ronic ideas use above the slowly pendin"},{"c_custkey":86,"c_name":"Customer#000000086","c_address":"US6EGGHXbTTXPL9SBsxQJsuvy","c_nationkey":0,"c_phone":"10-677-951-2353","c_acctbal":3306.32,"c_mktsegment":"HOUSEHOLD","c_comment":"quests. pending dugouts are carefully aroun"},{"c_custkey":87,"c_name":"Customer#000000087","c_address":"hgGhHVSWQl 6jZ6Ev","c_nationkey":23,"c_phone":"33-869-884-7053","c_acctbal":6327.54,"c_mktsegment":"FURNITURE","c_comment":"hely ironic requests integrate according to the ironic accounts. slyly regular pla"},{"c_custkey":88,"c_name":"Customer#000000088","c_address":"wtkjBN9eyrFuENSMmMFlJ3e7jE5KXcg","c_nationkey":16,"c_phone":"26-516-273-2566","c_acctbal":8031.44,"c_mktsegment":"AUTOMOBILE","c_comment":"s are quickly above the quickly ironic instructions; even requests about the carefully final deposi"},{"c_custkey":89,"c_name":"Customer#000000089","c_address":"dtR, y9JQWUO6FoJExyp8whOU","c_nationkey":14,"c_phone":"24-394-451-5404","c_acctbal":1530.76,"c_mktsegment":"FURNITURE","c_comment":"counts are slyly beyond the slyly final accounts. quickly final ideas wake. r"},{"c_custkey":90,"c_name":"Customer#000000090","c_address":"QxCzH7VxxYUWwfL7","c_nationkey":16,"c_phone":"26-603-491-1238","c_acctbal":7354.23,"c_mktsegment":"BUILDING","c_comment":"sly across the furiously even "},{"c_custkey":91,"c_name":"Customer#000000091","c_address":"S8OMYFrpHwoNHaGBeuS6E 6zhHGZiprw1b7 q","c_nationkey":8,"c_phone":"18-239-400-3677","c_acctbal":4643.14,"c_mktsegment":"AUTOMOBILE","c_comment":"onic accounts. fluffily silent pinto beans boost blithely according to the fluffily exp"},{"c_custkey":92,"c_name":"Customer#000000092","c_address":"obP PULk2LH LqNF,K9hcbNqnLAkJVsl5xqSrY,","c_nationkey":2,"c_phone":"12-446-416-8471","c_acctbal":1182.91,"c_mktsegment":"MACHINERY","c_comment":". pinto beans hang slyly final deposits. ac"},{"c_custkey":93,"c_name":"Customer#000000093","c_address":"EHXBr2QGdh","c_nationkey":7,"c_phone":"17-359-388-5266","c_acctbal":2182.52,"c_mktsegment":"MACHINERY","c_comment":"press deposits. carefully regular platelets r"},{"c_custkey":94,"c_name":"Customer#000000094","c_address":"IfVNIN9KtkScJ9dUjK3Pg5gY1aFeaXewwf","c_nationkey":9,"c_phone":"19-953-499-8833","c_acctbal":5500.11,"c_mktsegment":"HOUSEHOLD","c_comment":"latelets across the bold, final requests sleep according to the fluffily bold accounts. unusual deposits amon"},{"c_custkey":95,"c_name":"Customer#000000095","c_address":"EU0xvmWvOmUUn5J,2z85DQyG7QCJ9Xq7","c_nationkey":15,"c_phone":"25-923-255-2929","c_acctbal":5327.38,"c_mktsegment":"MACHINERY","c_comment":"ithely. ruthlessly final requests wake slyly alongside of the furiously silent pinto beans. even the"},{"c_custkey":96,"c_name":"Customer#000000096","c_address":"vWLOrmXhRR","c_nationkey":8,"c_phone":"18-422-845-1202","c_acctbal":6323.92,"c_mktsegment":"AUTOMOBILE","c_comment":"press requests believe furiously. carefully final instructions snooze carefully. "},{"c_custkey":97,"c_name":"Customer#000000097","c_address":"OApyejbhJG,0Iw3j rd1M","c_nationkey":17,"c_phone":"27-588-919-5638","c_acctbal":2164.48,"c_mktsegment":"AUTOMOBILE","c_comment":"haggle slyly. bold, special ideas are blithely above the thinly bold theo"},{"c_custkey":98,"c_name":"Customer#000000098","c_address":"7yiheXNSpuEAwbswDW","c_nationkey":12,"c_phone":"22-885-845-6889","c_acctbal":-551.37,"c_mktsegment":"BUILDING","c_comment":"ages. furiously pending accounts are quickly carefully final foxes: busily pe"},{"c_custkey":99,"c_name":"Customer#000000099","c_address":"szsrOiPtCHVS97Lt","c_nationkey":15,"c_phone":"25-515-237-9232","c_acctbal":4088.65,"c_mktsegment":"HOUSEHOLD","c_comment":"cajole slyly about the regular theodolites! furiously bold requests nag along the pending, regular packages. somas"},{"c_custkey":100,"c_name":"Customer#000000100","c_address":"fptUABXcmkC5Wx","c_nationkey":20,"c_phone":"30-749-445-4907","c_acctbal":9889.89,"c_mktsegment":"FURNITURE","c_comment":"was furiously fluffily quiet deposits. silent, pending requests boost against "}] \ No newline at end of file +[ + { + "c_custkey": 1, + "c_name": "Customer#000000001", + "c_address": "IVhzIApeRb ot,c,E", + "c_nationkey": 15, + "c_phone": "25-989-741-2988", + "c_acctbal": 711.56, + "c_mktsegment": "BUILDING", + "c_comment": "to the even, regular platelets. regular, ironic epitaphs nag e" + }, + { + "c_custkey": 2, + "c_name": "Customer#000000002", + "c_address": "XSTf4,NCwDVaWNe6tEgvwfmRchLXak", + "c_nationkey": 13, + "c_phone": "23-768-687-3665", + "c_acctbal": 121.65, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "l accounts. blithely ironic theodolites integrate boldly: caref" + }, + { + "c_custkey": 3, + "c_name": "Customer#000000003", + "c_address": "MG9kdTD2WBHm", + "c_nationkey": 1, + "c_phone": "11-719-748-3364", + "c_acctbal": 7498.12, + "c_mktsegment": "AUTOMOBILE", + "c_comment": " deposits eat slyly ironic, even instructions. express foxes detect slyly. blithely even accounts abov" + }, + { + "c_custkey": 4, + "c_name": "Customer#000000004", + "c_address": "XxVSJsLAGtn", + "c_nationkey": 4, + "c_phone": "14-128-190-5944", + "c_acctbal": 2866.83, + "c_mktsegment": "MACHINERY", + "c_comment": " requests. final, regular ideas sleep final accou" + }, + { + "c_custkey": 5, + "c_name": "Customer#000000005", + "c_address": "KvpyuHCplrB84WgAiGV6sYpZq7Tj", + "c_nationkey": 3, + "c_phone": "13-750-942-6364", + "c_acctbal": 794.47, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "n accounts will have to unwind. foxes cajole accor" + }, + { + "c_custkey": 6, + "c_name": "Customer#000000006", + "c_address": "sKZz0CsnMD7mp4Xd0YrBvx,LREYKUWAh yVn", + "c_nationkey": 20, + "c_phone": "30-114-968-4951", + "c_acctbal": 7638.57, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "tions. even deposits boost according to the slyly bold packages. final accounts cajole requests. furious" + }, + { + "c_custkey": 7, + "c_name": "Customer#000000007", + "c_address": "TcGe5gaZNgVePxU5kRrvXBfkasDTea", + "c_nationkey": 18, + "c_phone": "28-190-982-9759", + "c_acctbal": 9561.95, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "ainst the ironic, express theodolites. express, even pinto beans among the exp" + }, + { + "c_custkey": 8, + "c_name": "Customer#000000008", + "c_address": "I0B10bB0AymmC, 0PrRYBCP1yGJ8xcBPmWhl5", + "c_nationkey": 17, + "c_phone": "27-147-574-9335", + "c_acctbal": 6819.74, + "c_mktsegment": "BUILDING", + "c_comment": "among the slyly regular theodolites kindle blithely courts. carefully even theodolites haggle slyly along the ide" + }, + { + "c_custkey": 9, + "c_name": "Customer#000000009", + "c_address": "xKiAFTjUsCuxfeleNqefumTrjS", + "c_nationkey": 8, + "c_phone": "18-338-906-3675", + "c_acctbal": 8324.07, + "c_mktsegment": "FURNITURE", + "c_comment": "r theodolites according to the requests wake thinly excuses: pending requests haggle furiousl" + }, + { + "c_custkey": 10, + "c_name": "Customer#000000010", + "c_address": "6LrEaV6KR6PLVcgl2ArL Q3rqzLzcT1 v2", + "c_nationkey": 5, + "c_phone": "15-741-346-9870", + "c_acctbal": 2753.54, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "es regular deposits haggle. fur" + }, + { + "c_custkey": 11, + "c_name": "Customer#000000011", + "c_address": "PkWS 3HlXqwTuzrKg633BEi", + "c_nationkey": 23, + "c_phone": "33-464-151-3439", + "c_acctbal": -272.6, + "c_mktsegment": "BUILDING", + "c_comment": "ckages. requests sleep slyly. quickly even pinto beans promise above the slyly regular pinto beans. " + }, + { + "c_custkey": 12, + "c_name": "Customer#000000012", + "c_address": "9PWKuhzT4Zr1Q", + "c_nationkey": 13, + "c_phone": "23-791-276-1263", + "c_acctbal": 3396.49, + "c_mktsegment": "HOUSEHOLD", + "c_comment": " to the carefully final braids. blithely regular requests nag. ironic theodolites boost quickly along" + }, + { + "c_custkey": 13, + "c_name": "Customer#000000013", + "c_address": "nsXQu0oVjD7PM659uC3SRSp", + "c_nationkey": 3, + "c_phone": "13-761-547-5974", + "c_acctbal": 3857.34, + "c_mktsegment": "BUILDING", + "c_comment": "ounts sleep carefully after the close frays. carefully bold notornis use ironic requests. blithely" + }, + { + "c_custkey": 14, + "c_name": "Customer#000000014", + "c_address": "KXkletMlL2JQEA ", + "c_nationkey": 1, + "c_phone": "11-845-129-3851", + "c_acctbal": 5266.3, + "c_mktsegment": "FURNITURE", + "c_comment": ", ironic packages across the unus" + }, + { + "c_custkey": 15, + "c_name": "Customer#000000015", + "c_address": "YtWggXoOLdwdo7b0y,BZaGUQMLJMX1Y,EC,6Dn", + "c_nationkey": 23, + "c_phone": "33-687-542-7601", + "c_acctbal": 2788.52, + "c_mktsegment": "HOUSEHOLD", + "c_comment": " platelets. regular deposits detect asymptotes. blithely unusual packages nag slyly at the fluf" + }, + { + "c_custkey": 16, + "c_name": "Customer#000000016", + "c_address": "cYiaeMLZSMAOQ2 d0W,", + "c_nationkey": 10, + "c_phone": "20-781-609-3107", + "c_acctbal": 4681.03, + "c_mktsegment": "FURNITURE", + "c_comment": "kly silent courts. thinly regular theodolites sleep fluffily after " + }, + { + "c_custkey": 17, + "c_name": "Customer#000000017", + "c_address": "izrh 6jdqtp2eqdtbkswDD8SG4SzXruMfIXyR7", + "c_nationkey": 2, + "c_phone": "12-970-682-3487", + "c_acctbal": 6.34, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "packages wake! blithely even pint" + }, + { + "c_custkey": 18, + "c_name": "Customer#000000018", + "c_address": "3txGO AiuFux3zT0Z9NYaFRnZt", + "c_nationkey": 6, + "c_phone": "16-155-215-1315", + "c_acctbal": 5494.43, + "c_mktsegment": "BUILDING", + "c_comment": "s sleep. carefully even instructions nag furiously alongside of t" + }, + { + "c_custkey": 19, + "c_name": "Customer#000000019", + "c_address": "uc,3bHIx84H,wdrmLOjVsiqXCq2tr", + "c_nationkey": 18, + "c_phone": "28-396-526-5053", + "c_acctbal": 8914.71, + "c_mktsegment": "HOUSEHOLD", + "c_comment": " nag. furiously careful packages are slyly at the accounts. furiously regular in" + }, + { + "c_custkey": 20, + "c_name": "Customer#000000020", + "c_address": "JrPk8Pqplj4Ne", + "c_nationkey": 22, + "c_phone": "32-957-234-8742", + "c_acctbal": 7603.4, + "c_mktsegment": "FURNITURE", + "c_comment": "g alongside of the special excuses-- fluffily enticing packages wake " + }, + { + "c_custkey": 21, + "c_name": "Customer#000000021", + "c_address": "XYmVpr9yAHDEn", + "c_nationkey": 8, + "c_phone": "18-902-614-8344", + "c_acctbal": 1428.25, + "c_mktsegment": "MACHINERY", + "c_comment": " quickly final accounts integrate blithely furiously u" + }, + { + "c_custkey": 22, + "c_name": "Customer#000000022", + "c_address": "QI6p41,FNs5k7RZoCCVPUTkUdYpB", + "c_nationkey": 3, + "c_phone": "13-806-545-9701", + "c_acctbal": 591.98, + "c_mktsegment": "MACHINERY", + "c_comment": "s nod furiously above the furiously ironic ideas. " + }, + { + "c_custkey": 23, + "c_name": "Customer#000000023", + "c_address": "OdY W13N7Be3OC5MpgfmcYss0Wn6TKT", + "c_nationkey": 3, + "c_phone": "13-312-472-8245", + "c_acctbal": 3332.02, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "deposits. special deposits cajole slyly. fluffily special deposits about the furiously " + }, + { + "c_custkey": 24, + "c_name": "Customer#000000024", + "c_address": "HXAFgIAyjxtdqwimt13Y3OZO 4xeLe7U8PqG", + "c_nationkey": 13, + "c_phone": "23-127-851-8031", + "c_acctbal": 9255.67, + "c_mktsegment": "MACHINERY", + "c_comment": "into beans. fluffily final ideas haggle fluffily" + }, + { + "c_custkey": 25, + "c_name": "Customer#000000025", + "c_address": "Hp8GyFQgGHFYSilH5tBfe", + "c_nationkey": 12, + "c_phone": "22-603-468-3533", + "c_acctbal": 7133.7, + "c_mktsegment": "FURNITURE", + "c_comment": "y. accounts sleep ruthlessly according to the regular theodolites. unusual instructions sleep. ironic, final" + }, + { + "c_custkey": 26, + "c_name": "Customer#000000026", + "c_address": "8ljrc5ZeMl7UciP", + "c_nationkey": 22, + "c_phone": "32-363-455-4837", + "c_acctbal": 5182.05, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "c requests use furiously ironic requests. slyly ironic dependencies us" + }, + { + "c_custkey": 27, + "c_name": "Customer#000000027", + "c_address": "IS8GIyxpBrLpMT0u7", + "c_nationkey": 3, + "c_phone": "13-137-193-2709", + "c_acctbal": 5679.84, + "c_mktsegment": "BUILDING", + "c_comment": " about the carefully ironic pinto beans. accoun" + }, + { + "c_custkey": 28, + "c_name": "Customer#000000028", + "c_address": "iVyg0daQ,Tha8x2WPWA9m2529m", + "c_nationkey": 8, + "c_phone": "18-774-241-1462", + "c_acctbal": 1007.18, + "c_mktsegment": "FURNITURE", + "c_comment": " along the regular deposits. furiously final pac" + }, + { + "c_custkey": 29, + "c_name": "Customer#000000029", + "c_address": "sJ5adtfyAkCK63df2,vF25zyQMVYE34uh", + "c_nationkey": 0, + "c_phone": "10-773-203-7342", + "c_acctbal": 7618.27, + "c_mktsegment": "FURNITURE", + "c_comment": "its after the carefully final platelets x-ray against " + }, + { + "c_custkey": 30, + "c_name": "Customer#000000030", + "c_address": "nJDsELGAavU63Jl0c5NKsKfL8rIJQQkQnYL2QJY", + "c_nationkey": 1, + "c_phone": "11-764-165-5076", + "c_acctbal": 9321.01, + "c_mktsegment": "BUILDING", + "c_comment": "lithely final requests. furiously unusual account" + }, + { + "c_custkey": 31, + "c_name": "Customer#000000031", + "c_address": "LUACbO0viaAv6eXOAebryDB xjVst", + "c_nationkey": 23, + "c_phone": "33-197-837-7094", + "c_acctbal": 5236.89, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "s use among the blithely pending depo" + }, + { + "c_custkey": 32, + "c_name": "Customer#000000032", + "c_address": "jD2xZzi UmId,DCtNBLXKj9q0Tlp2iQ6ZcO3J", + "c_nationkey": 15, + "c_phone": "25-430-914-2194", + "c_acctbal": 3471.53, + "c_mktsegment": "BUILDING", + "c_comment": "cial ideas. final, furious requests across the e" + }, + { + "c_custkey": 33, + "c_name": "Customer#000000033", + "c_address": "qFSlMuLucBmx9xnn5ib2csWUweg D", + "c_nationkey": 17, + "c_phone": "27-375-391-1280", + "c_acctbal": -78.56, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "s. slyly regular accounts are furiously. carefully pending requests" + }, + { + "c_custkey": 34, + "c_name": "Customer#000000034", + "c_address": "Q6G9wZ6dnczmtOx509xgE,M2KV", + "c_nationkey": 15, + "c_phone": "25-344-968-5422", + "c_acctbal": 8589.7, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "nder against the even, pending accounts. even" + }, + { + "c_custkey": 35, + "c_name": "Customer#000000035", + "c_address": "TEjWGE4nBzJL2", + "c_nationkey": 17, + "c_phone": "27-566-888-7431", + "c_acctbal": 1228.24, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "requests. special, express requests nag slyly furiousl" + }, + { + "c_custkey": 36, + "c_name": "Customer#000000036", + "c_address": "3TvCzjuPzpJ0,DdJ8kW5U", + "c_nationkey": 21, + "c_phone": "31-704-669-5769", + "c_acctbal": 4987.27, + "c_mktsegment": "BUILDING", + "c_comment": "haggle. enticing, quiet platelets grow quickly bold sheaves. carefully regular acc" + }, + { + "c_custkey": 37, + "c_name": "Customer#000000037", + "c_address": "7EV4Pwh,3SboctTWt", + "c_nationkey": 8, + "c_phone": "18-385-235-7162", + "c_acctbal": -917.75, + "c_mktsegment": "FURNITURE", + "c_comment": "ilent packages are carefully among the deposits. furiousl" + }, + { + "c_custkey": 38, + "c_name": "Customer#000000038", + "c_address": "a5Ee5e9568R8RLP 2ap7", + "c_nationkey": 12, + "c_phone": "22-306-880-7212", + "c_acctbal": 6345.11, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "lar excuses. closely even asymptotes cajole blithely excuses. carefully silent pinto beans sleep carefully fin" + }, + { + "c_custkey": 39, + "c_name": "Customer#000000039", + "c_address": "nnbRg,Pvy33dfkorYE FdeZ60", + "c_nationkey": 2, + "c_phone": "12-387-467-6509", + "c_acctbal": 6264.31, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "tions. slyly silent excuses slee" + }, + { + "c_custkey": 40, + "c_name": "Customer#000000040", + "c_address": "gOnGWAyhSV1ofv", + "c_nationkey": 3, + "c_phone": "13-652-915-8939", + "c_acctbal": 1335.3, + "c_mktsegment": "BUILDING", + "c_comment": "rges impress after the slyly ironic courts. foxes are. blithely " + }, + { + "c_custkey": 41, + "c_name": "Customer#000000041", + "c_address": "IM9mzmyoxeBmvNw8lA7G3Ydska2nkZF", + "c_nationkey": 10, + "c_phone": "20-917-711-4011", + "c_acctbal": 270.95, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "ly regular accounts hang bold, silent packages. unusual foxes haggle slyly above the special, final depo" + }, + { + "c_custkey": 42, + "c_name": "Customer#000000042", + "c_address": "ziSrvyyBke", + "c_nationkey": 5, + "c_phone": "15-416-330-4175", + "c_acctbal": 8727.01, + "c_mktsegment": "BUILDING", + "c_comment": "ssly according to the pinto beans: carefully special requests across the even, pending accounts wake special" + }, + { + "c_custkey": 43, + "c_name": "Customer#000000043", + "c_address": "ouSbjHk8lh5fKX3zGso3ZSIj9Aa3PoaFd", + "c_nationkey": 19, + "c_phone": "29-316-665-2897", + "c_acctbal": 9904.28, + "c_mktsegment": "MACHINERY", + "c_comment": "ial requests: carefully pending foxes detect quickly. carefully final courts cajole quickly. carefully" + }, + { + "c_custkey": 44, + "c_name": "Customer#000000044", + "c_address": "Oi,dOSPwDu4jo4x,,P85E0dmhZGvNtBwi", + "c_nationkey": 16, + "c_phone": "26-190-260-5375", + "c_acctbal": 7315.94, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "r requests around the unusual, bold a" + }, + { + "c_custkey": 45, + "c_name": "Customer#000000045", + "c_address": "4v3OcpFgoOmMG,CbnF,4mdC", + "c_nationkey": 9, + "c_phone": "19-715-298-9917", + "c_acctbal": 9983.38, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "nto beans haggle slyly alongside of t" + }, + { + "c_custkey": 46, + "c_name": "Customer#000000046", + "c_address": "eaTXWWm10L9", + "c_nationkey": 6, + "c_phone": "16-357-681-2007", + "c_acctbal": 5744.59, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "ctions. accounts sleep furiously even requests. regular, regular accounts cajole blithely around the final pa" + }, + { + "c_custkey": 47, + "c_name": "Customer#000000047", + "c_address": "b0UgocSqEW5 gdVbhNT", + "c_nationkey": 2, + "c_phone": "12-427-271-9466", + "c_acctbal": 274.58, + "c_mktsegment": "BUILDING", + "c_comment": "ions. express, ironic instructions sleep furiously ironic ideas. furi" + }, + { + "c_custkey": 48, + "c_name": "Customer#000000048", + "c_address": "0UU iPhBupFvemNB", + "c_nationkey": 0, + "c_phone": "10-508-348-5882", + "c_acctbal": 3792.5, + "c_mktsegment": "BUILDING", + "c_comment": "re fluffily pending foxes. pending, bold platelets sleep slyly. even platelets cajo" + }, + { + "c_custkey": 49, + "c_name": "Customer#000000049", + "c_address": "cNgAeX7Fqrdf7HQN9EwjUa4nxT,68L FKAxzl", + "c_nationkey": 10, + "c_phone": "20-908-631-4424", + "c_acctbal": 4573.94, + "c_mktsegment": "FURNITURE", + "c_comment": "nusual foxes! fluffily pending packages maintain to the regular " + }, + { + "c_custkey": 50, + "c_name": "Customer#000000050", + "c_address": "9SzDYlkzxByyJ1QeTI o", + "c_nationkey": 6, + "c_phone": "16-658-112-3221", + "c_acctbal": 4266.13, + "c_mktsegment": "MACHINERY", + "c_comment": "ts. furiously ironic accounts cajole furiously slyly ironic dinos." + }, + { + "c_custkey": 51, + "c_name": "Customer#000000051", + "c_address": "uR,wEaiTvo4", + "c_nationkey": 12, + "c_phone": "22-344-885-4251", + "c_acctbal": 855.87, + "c_mktsegment": "FURNITURE", + "c_comment": "eposits. furiously regular requests integrate carefully packages. furious" + }, + { + "c_custkey": 52, + "c_name": "Customer#000000052", + "c_address": "7 QOqGqqSy9jfV51BC71jcHJSD0", + "c_nationkey": 11, + "c_phone": "21-186-284-5998", + "c_acctbal": 5630.28, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "ic platelets use evenly even accounts. stealthy theodolites cajole furiou" + }, + { + "c_custkey": 53, + "c_name": "Customer#000000053", + "c_address": "HnaxHzTfFTZs8MuCpJyTbZ47Cm4wFOOgib", + "c_nationkey": 15, + "c_phone": "25-168-852-5363", + "c_acctbal": 4113.64, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "ar accounts are. even foxes are blithely. fluffily pending deposits boost" + }, + { + "c_custkey": 54, + "c_name": "Customer#000000054", + "c_address": ",k4vf 5vECGWFy,hosTE,", + "c_nationkey": 4, + "c_phone": "14-776-370-4745", + "c_acctbal": 868.9, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "sual, silent accounts. furiously express accounts cajole special deposits. final, final accounts use furi" + }, + { + "c_custkey": 55, + "c_name": "Customer#000000055", + "c_address": "zIRBR4KNEl HzaiV3a i9n6elrxzDEh8r8pDom", + "c_nationkey": 10, + "c_phone": "20-180-440-8525", + "c_acctbal": 4572.11, + "c_mktsegment": "MACHINERY", + "c_comment": "ully unusual packages wake bravely bold packages. unusual requests boost deposits! blithely ironic packages ab" + }, + { + "c_custkey": 56, + "c_name": "Customer#000000056", + "c_address": "BJYZYJQk4yD5B", + "c_nationkey": 10, + "c_phone": "20-895-685-6920", + "c_acctbal": 6530.86, + "c_mktsegment": "FURNITURE", + "c_comment": ". notornis wake carefully. carefully fluffy requests are furiously even accounts. slyly expre" + }, + { + "c_custkey": 57, + "c_name": "Customer#000000057", + "c_address": "97XYbsuOPRXPWU", + "c_nationkey": 21, + "c_phone": "31-835-306-1650", + "c_acctbal": 4151.93, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "ove the carefully special packages. even, unusual deposits sleep slyly pend" + }, + { + "c_custkey": 58, + "c_name": "Customer#000000058", + "c_address": "g9ap7Dk1Sv9fcXEWjpMYpBZIRUohi T", + "c_nationkey": 13, + "c_phone": "23-244-493-2508", + "c_acctbal": 6478.46, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "ideas. ironic ideas affix furiously express, final instructions. regular excuses use quickly e" + }, + { + "c_custkey": 59, + "c_name": "Customer#000000059", + "c_address": "zLOCP0wh92OtBihgspOGl4", + "c_nationkey": 1, + "c_phone": "11-355-584-3112", + "c_acctbal": 3458.6, + "c_mktsegment": "MACHINERY", + "c_comment": "ously final packages haggle blithely after the express deposits. furiou" + }, + { + "c_custkey": 60, + "c_name": "Customer#000000060", + "c_address": "FyodhjwMChsZmUz7Jz0H", + "c_nationkey": 12, + "c_phone": "22-480-575-5866", + "c_acctbal": 2741.87, + "c_mktsegment": "MACHINERY", + "c_comment": "latelets. blithely unusual courts boost furiously about the packages. blithely final instruct" + }, + { + "c_custkey": 61, + "c_name": "Customer#000000061", + "c_address": "9kndve4EAJxhg3veF BfXr7AqOsT39o gtqjaYE", + "c_nationkey": 17, + "c_phone": "27-626-559-8599", + "c_acctbal": 1536.24, + "c_mktsegment": "FURNITURE", + "c_comment": "egular packages shall have to impress along the " + }, + { + "c_custkey": 62, + "c_name": "Customer#000000062", + "c_address": "upJK2Dnw13,", + "c_nationkey": 7, + "c_phone": "17-361-978-7059", + "c_acctbal": 595.61, + "c_mktsegment": "MACHINERY", + "c_comment": "kly special dolphins. pinto beans are slyly. quickly regular accounts are furiously a" + }, + { + "c_custkey": 63, + "c_name": "Customer#000000063", + "c_address": "IXRSpVWWZraKII", + "c_nationkey": 21, + "c_phone": "31-952-552-9584", + "c_acctbal": 9331.13, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "ithely even accounts detect slyly above the fluffily ir" + }, + { + "c_custkey": 64, + "c_name": "Customer#000000064", + "c_address": "MbCeGY20kaKK3oalJD,OT", + "c_nationkey": 3, + "c_phone": "13-558-731-7204", + "c_acctbal": -646.64, + "c_mktsegment": "BUILDING", + "c_comment": "structions after the quietly ironic theodolites cajole be" + }, + { + "c_custkey": 65, + "c_name": "Customer#000000065", + "c_address": "RGT yzQ0y4l0H90P783LG4U95bXQFDRXbWa1sl,X", + "c_nationkey": 23, + "c_phone": "33-733-623-5267", + "c_acctbal": 8795.16, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "y final foxes serve carefully. theodolites are carefully. pending i" + }, + { + "c_custkey": 66, + "c_name": "Customer#000000066", + "c_address": "XbsEqXH1ETbJYYtA1A", + "c_nationkey": 22, + "c_phone": "32-213-373-5094", + "c_acctbal": 242.77, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "le slyly accounts. carefully silent packages benea" + }, + { + "c_custkey": 67, + "c_name": "Customer#000000067", + "c_address": "rfG0cOgtr5W8 xILkwp9fpCS8", + "c_nationkey": 9, + "c_phone": "19-403-114-4356", + "c_acctbal": 8166.59, + "c_mktsegment": "MACHINERY", + "c_comment": "indle furiously final, even theodo" + }, + { + "c_custkey": 68, + "c_name": "Customer#000000068", + "c_address": "o8AibcCRkXvQFh8hF,7o", + "c_nationkey": 12, + "c_phone": "22-918-832-2411", + "c_acctbal": 6853.37, + "c_mktsegment": "HOUSEHOLD", + "c_comment": " pending pinto beans impress realms. final dependencies " + }, + { + "c_custkey": 69, + "c_name": "Customer#000000069", + "c_address": "Ltx17nO9Wwhtdbe9QZVxNgP98V7xW97uvSH1prEw", + "c_nationkey": 9, + "c_phone": "19-225-978-5670", + "c_acctbal": 1709.28, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "thely final ideas around the quickly final dependencies affix carefully quickly final theodolites. final accounts c" + }, + { + "c_custkey": 70, + "c_name": "Customer#000000070", + "c_address": "mFowIuhnHjp2GjCiYYavkW kUwOjIaTCQ", + "c_nationkey": 22, + "c_phone": "32-828-107-2832", + "c_acctbal": 4867.52, + "c_mktsegment": "FURNITURE", + "c_comment": "fter the special asymptotes. ideas after the unusual frets cajole quickly regular pinto be" + }, + { + "c_custkey": 71, + "c_name": "Customer#000000071", + "c_address": "TlGalgdXWBmMV,6agLyWYDyIz9MKzcY8gl,w6t1B", + "c_nationkey": 7, + "c_phone": "17-710-812-5403", + "c_acctbal": -611.19, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "g courts across the regular, final pinto beans are blithely pending ac" + }, + { + "c_custkey": 72, + "c_name": "Customer#000000072", + "c_address": "putjlmskxE,zs,HqeIA9Wqu7dhgH5BVCwDwHHcf", + "c_nationkey": 2, + "c_phone": "12-759-144-9689", + "c_acctbal": -362.86, + "c_mktsegment": "FURNITURE", + "c_comment": "ithely final foxes sleep always quickly bold accounts. final wat" + }, + { + "c_custkey": 73, + "c_name": "Customer#000000073", + "c_address": "8IhIxreu4Ug6tt5mog4", + "c_nationkey": 0, + "c_phone": "10-473-439-3214", + "c_acctbal": 4288.5, + "c_mktsegment": "BUILDING", + "c_comment": "usual, unusual packages sleep busily along the furiou" + }, + { + "c_custkey": 74, + "c_name": "Customer#000000074", + "c_address": "IkJHCA3ZThF7qL7VKcrU nRLl,kylf ", + "c_nationkey": 4, + "c_phone": "14-199-862-7209", + "c_acctbal": 2764.43, + "c_mktsegment": "MACHINERY", + "c_comment": "onic accounts. blithely slow packages would haggle carefully. qui" + }, + { + "c_custkey": 75, + "c_name": "Customer#000000075", + "c_address": "Dh 6jZ,cwxWLKQfRKkiGrzv6pm", + "c_nationkey": 18, + "c_phone": "28-247-803-9025", + "c_acctbal": 6684.1, + "c_mktsegment": "AUTOMOBILE", + "c_comment": " instructions cajole even, even deposits. finally bold deposits use above the even pains. slyl" + }, + { + "c_custkey": 76, + "c_name": "Customer#000000076", + "c_address": "m3sbCvjMOHyaOofH,e UkGPtqc4", + "c_nationkey": 0, + "c_phone": "10-349-718-3044", + "c_acctbal": 5745.33, + "c_mktsegment": "FURNITURE", + "c_comment": "pecial deposits. ironic ideas boost blithely according to the closely ironic theodolites! furiously final deposits n" + }, + { + "c_custkey": 77, + "c_name": "Customer#000000077", + "c_address": "4tAE5KdMFGD4byHtXF92vx", + "c_nationkey": 17, + "c_phone": "27-269-357-4674", + "c_acctbal": 1738.87, + "c_mktsegment": "BUILDING", + "c_comment": "uffily silent requests. carefully ironic asymptotes among the ironic hockey players are carefully bli" + }, + { + "c_custkey": 78, + "c_name": "Customer#000000078", + "c_address": "HBOta,ZNqpg3U2cSL0kbrftkPwzX", + "c_nationkey": 9, + "c_phone": "19-960-700-9191", + "c_acctbal": 7136.97, + "c_mktsegment": "FURNITURE", + "c_comment": "ests. blithely bold pinto beans h" + }, + { + "c_custkey": 79, + "c_name": "Customer#000000079", + "c_address": "n5hH2ftkVRwW8idtD,BmM2", + "c_nationkey": 15, + "c_phone": "25-147-850-4166", + "c_acctbal": 5121.28, + "c_mktsegment": "MACHINERY", + "c_comment": "es. packages haggle furiously. regular, special requests poach after the quickly express ideas. blithely pending re" + }, + { + "c_custkey": 80, + "c_name": "Customer#000000080", + "c_address": "K,vtXp8qYB ", + "c_nationkey": 0, + "c_phone": "10-267-172-7101", + "c_acctbal": 7383.53, + "c_mktsegment": "FURNITURE", + "c_comment": "tect among the dependencies. bold accounts engage closely even pinto beans. ca" + }, + { + "c_custkey": 81, + "c_name": "Customer#000000081", + "c_address": "SH6lPA7JiiNC6dNTrR", + "c_nationkey": 20, + "c_phone": "30-165-277-3269", + "c_acctbal": 2023.71, + "c_mktsegment": "BUILDING", + "c_comment": "r packages. fluffily ironic requests cajole fluffily. ironically regular theodolit" + }, + { + "c_custkey": 82, + "c_name": "Customer#000000082", + "c_address": "zhG3EZbap4c992Gj3bK,3Ne,Xn", + "c_nationkey": 18, + "c_phone": "28-159-442-5305", + "c_acctbal": 9468.34, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "s wake. bravely regular accounts are furiously. regula" + }, + { + "c_custkey": 83, + "c_name": "Customer#000000083", + "c_address": "HnhTNB5xpnSF20JBH4Ycs6psVnkC3RDf", + "c_nationkey": 22, + "c_phone": "32-817-154-4122", + "c_acctbal": 6463.51, + "c_mktsegment": "BUILDING", + "c_comment": "ccording to the quickly bold warhorses. final, regular foxes integrate carefully. bold packages nag blithely ev" + }, + { + "c_custkey": 84, + "c_name": "Customer#000000084", + "c_address": "lpXz6Fwr9945rnbtMc8PlueilS1WmASr CB", + "c_nationkey": 11, + "c_phone": "21-546-818-3802", + "c_acctbal": 5174.71, + "c_mktsegment": "FURNITURE", + "c_comment": "ly blithe foxes. special asymptotes haggle blithely against the furiously regular depo" + }, + { + "c_custkey": 85, + "c_name": "Customer#000000085", + "c_address": "siRerlDwiolhYR 8FgksoezycLj", + "c_nationkey": 5, + "c_phone": "15-745-585-8219", + "c_acctbal": 3386.64, + "c_mktsegment": "FURNITURE", + "c_comment": "ronic ideas use above the slowly pendin" + }, + { + "c_custkey": 86, + "c_name": "Customer#000000086", + "c_address": "US6EGGHXbTTXPL9SBsxQJsuvy", + "c_nationkey": 0, + "c_phone": "10-677-951-2353", + "c_acctbal": 3306.32, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "quests. pending dugouts are carefully aroun" + }, + { + "c_custkey": 87, + "c_name": "Customer#000000087", + "c_address": "hgGhHVSWQl 6jZ6Ev", + "c_nationkey": 23, + "c_phone": "33-869-884-7053", + "c_acctbal": 6327.54, + "c_mktsegment": "FURNITURE", + "c_comment": "hely ironic requests integrate according to the ironic accounts. slyly regular pla" + }, + { + "c_custkey": 88, + "c_name": "Customer#000000088", + "c_address": "wtkjBN9eyrFuENSMmMFlJ3e7jE5KXcg", + "c_nationkey": 16, + "c_phone": "26-516-273-2566", + "c_acctbal": 8031.44, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "s are quickly above the quickly ironic instructions; even requests about the carefully final deposi" + }, + { + "c_custkey": 89, + "c_name": "Customer#000000089", + "c_address": "dtR, y9JQWUO6FoJExyp8whOU", + "c_nationkey": 14, + "c_phone": "24-394-451-5404", + "c_acctbal": 1530.76, + "c_mktsegment": "FURNITURE", + "c_comment": "counts are slyly beyond the slyly final accounts. quickly final ideas wake. r" + }, + { + "c_custkey": 90, + "c_name": "Customer#000000090", + "c_address": "QxCzH7VxxYUWwfL7", + "c_nationkey": 16, + "c_phone": "26-603-491-1238", + "c_acctbal": 7354.23, + "c_mktsegment": "BUILDING", + "c_comment": "sly across the furiously even " + }, + { + "c_custkey": 91, + "c_name": "Customer#000000091", + "c_address": "S8OMYFrpHwoNHaGBeuS6E 6zhHGZiprw1b7 q", + "c_nationkey": 8, + "c_phone": "18-239-400-3677", + "c_acctbal": 4643.14, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "onic accounts. fluffily silent pinto beans boost blithely according to the fluffily exp" + }, + { + "c_custkey": 92, + "c_name": "Customer#000000092", + "c_address": "obP PULk2LH LqNF,K9hcbNqnLAkJVsl5xqSrY,", + "c_nationkey": 2, + "c_phone": "12-446-416-8471", + "c_acctbal": 1182.91, + "c_mktsegment": "MACHINERY", + "c_comment": ". pinto beans hang slyly final deposits. ac" + }, + { + "c_custkey": 93, + "c_name": "Customer#000000093", + "c_address": "EHXBr2QGdh", + "c_nationkey": 7, + "c_phone": "17-359-388-5266", + "c_acctbal": 2182.52, + "c_mktsegment": "MACHINERY", + "c_comment": "press deposits. carefully regular platelets r" + }, + { + "c_custkey": 94, + "c_name": "Customer#000000094", + "c_address": "IfVNIN9KtkScJ9dUjK3Pg5gY1aFeaXewwf", + "c_nationkey": 9, + "c_phone": "19-953-499-8833", + "c_acctbal": 5500.11, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "latelets across the bold, final requests sleep according to the fluffily bold accounts. unusual deposits amon" + }, + { + "c_custkey": 95, + "c_name": "Customer#000000095", + "c_address": "EU0xvmWvOmUUn5J,2z85DQyG7QCJ9Xq7", + "c_nationkey": 15, + "c_phone": "25-923-255-2929", + "c_acctbal": 5327.38, + "c_mktsegment": "MACHINERY", + "c_comment": "ithely. ruthlessly final requests wake slyly alongside of the furiously silent pinto beans. even the" + }, + { + "c_custkey": 96, + "c_name": "Customer#000000096", + "c_address": "vWLOrmXhRR", + "c_nationkey": 8, + "c_phone": "18-422-845-1202", + "c_acctbal": 6323.92, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "press requests believe furiously. carefully final instructions snooze carefully. " + }, + { + "c_custkey": 97, + "c_name": "Customer#000000097", + "c_address": "OApyejbhJG,0Iw3j rd1M", + "c_nationkey": 17, + "c_phone": "27-588-919-5638", + "c_acctbal": 2164.48, + "c_mktsegment": "AUTOMOBILE", + "c_comment": "haggle slyly. bold, special ideas are blithely above the thinly bold theo" + }, + { + "c_custkey": 98, + "c_name": "Customer#000000098", + "c_address": "7yiheXNSpuEAwbswDW", + "c_nationkey": 12, + "c_phone": "22-885-845-6889", + "c_acctbal": -551.37, + "c_mktsegment": "BUILDING", + "c_comment": "ages. furiously pending accounts are quickly carefully final foxes: busily pe" + }, + { + "c_custkey": 99, + "c_name": "Customer#000000099", + "c_address": "szsrOiPtCHVS97Lt", + "c_nationkey": 15, + "c_phone": "25-515-237-9232", + "c_acctbal": 4088.65, + "c_mktsegment": "HOUSEHOLD", + "c_comment": "cajole slyly about the regular theodolites! furiously bold requests nag along the pending, regular packages. somas" + }, + { + "c_custkey": 100, + "c_name": "Customer#000000100", + "c_address": "fptUABXcmkC5Wx", + "c_nationkey": 20, + "c_phone": "30-749-445-4907", + "c_acctbal": 9889.89, + "c_mktsegment": "FURNITURE", + "c_comment": "was furiously fluffily quiet deposits. silent, pending requests boost against " + } +] diff --git a/test/test-files/json-schema-test-file.schema.result.json b/test/test-files/json-schema-test-file.schema.result.json index b4ba8cd1..f0fe1883 100644 --- a/test/test-files/json-schema-test-file.schema.result.json +++ b/test/test-files/json-schema-test-file.schema.result.json @@ -142,9 +142,7 @@ "name": "string_field", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "string_field" - ], + "path": ["string_field"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -154,9 +152,7 @@ "int_field": { "name": "int_field", "primitiveType": "INT64", - "path": [ - "int_field" - ], + "path": ["int_field"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -166,9 +162,7 @@ "number_field": { "name": "number_field", "primitiveType": "DOUBLE", - "path": [ - "number_field" - ], + "path": ["number_field"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -177,9 +171,7 @@ }, "array_field": { "name": "array_field", - "path": [ - "array_field" - ], + "path": ["array_field"], "repetitionType": "OPTIONAL", "rLevelMax": 0, "dLevelMax": 1, @@ -188,10 +180,7 @@ "fields": { "list": { "name": "list", - "path": [ - "array_field", - "list" - ], + "path": ["array_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -202,11 +191,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "array_field", - "list", - "element" - ], + "path": ["array_field", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -220,9 +205,7 @@ }, "timestamp_array_field": { "name": "timestamp_array_field", - "path": [ - "timestamp_array_field" - ], + "path": ["timestamp_array_field"], "repetitionType": "OPTIONAL", "rLevelMax": 0, "dLevelMax": 1, @@ -231,10 +214,7 @@ "fields": { "list": { "name": "list", - "path": [ - "timestamp_array_field", - "list" - ], + "path": ["timestamp_array_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -245,11 +225,7 @@ "name": "element", "primitiveType": "INT64", "originalType": "TIMESTAMP_MILLIS", - "path": [ - "timestamp_array_field", - "list", - "element" - ], + "path": ["timestamp_array_field", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -265,9 +241,7 @@ "name": "timestamp_field", "primitiveType": "INT64", "originalType": "TIMESTAMP_MILLIS", - "path": [ - "timestamp_field" - ], + "path": ["timestamp_field"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -276,9 +250,7 @@ }, "obj_field": { "name": "obj_field", - "path": [ - "obj_field" - ], + "path": ["obj_field"], "repetitionType": "OPTIONAL", "rLevelMax": 0, "dLevelMax": 1, @@ -289,10 +261,7 @@ "name": "sub1", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "obj_field", - "sub1" - ], + "path": ["obj_field", "sub1"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -303,10 +272,7 @@ "name": "sub2", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "obj_field", - "sub2" - ], + "path": ["obj_field", "sub2"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -317,9 +283,7 @@ }, "struct_field": { "name": "struct_field", - "path": [ - "struct_field" - ], + "path": ["struct_field"], "repetitionType": "OPTIONAL", "rLevelMax": 0, "dLevelMax": 1, @@ -328,10 +292,7 @@ "fields": { "list": { "name": "list", - "path": [ - "struct_field", - "list" - ], + "path": ["struct_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -340,11 +301,7 @@ "fields": { "element": { "name": "element", - "path": [ - "struct_field", - "list", - "element" - ], + "path": ["struct_field", "list", "element"], "repetitionType": "REQUIRED", "rLevelMax": 1, "dLevelMax": 2, @@ -355,12 +312,7 @@ "name": "sub3", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub3" - ], + "path": ["struct_field", "list", "element", "sub3"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -371,12 +323,7 @@ "name": "sub4", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub4" - ], + "path": ["struct_field", "list", "element", "sub4"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -385,12 +332,7 @@ }, "sub5": { "name": "sub5", - "path": [ - "struct_field", - "list", - "element", - "sub5" - ], + "path": ["struct_field", "list", "element", "sub5"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -401,13 +343,7 @@ "name": "sub6", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub6" - ], + "path": ["struct_field", "list", "element", "sub5", "sub6"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -418,13 +354,7 @@ "name": "sub7", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub7" - ], + "path": ["struct_field", "list", "element", "sub5", "sub7"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -435,12 +365,7 @@ }, "sub8": { "name": "sub8", - "path": [ - "struct_field", - "list", - "element", - "sub8" - ], + "path": ["struct_field", "list", "element", "sub8"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -449,13 +374,7 @@ "fields": { "list": { "name": "list", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list" - ], + "path": ["struct_field", "list", "element", "sub8", "list"], "repetitionType": "REPEATED", "rLevelMax": 2, "dLevelMax": 4, @@ -466,14 +385,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list", - "element" - ], + "path": ["struct_field", "list", "element", "sub8", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -498,9 +410,7 @@ "name": "string_field", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "string_field" - ], + "path": ["string_field"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -510,9 +420,7 @@ { "name": "int_field", "primitiveType": "INT64", - "path": [ - "int_field" - ], + "path": ["int_field"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -522,9 +430,7 @@ { "name": "number_field", "primitiveType": "DOUBLE", - "path": [ - "number_field" - ], + "path": ["number_field"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -533,9 +439,7 @@ }, { "name": "array_field", - "path": [ - "array_field" - ], + "path": ["array_field"], "repetitionType": "OPTIONAL", "rLevelMax": 0, "dLevelMax": 1, @@ -544,10 +448,7 @@ "fields": { "list": { "name": "list", - "path": [ - "array_field", - "list" - ], + "path": ["array_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -558,11 +459,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "array_field", - "list", - "element" - ], + "path": ["array_field", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -576,10 +473,7 @@ }, { "name": "list", - "path": [ - "array_field", - "list" - ], + "path": ["array_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -590,11 +484,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "array_field", - "list", - "element" - ], + "path": ["array_field", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -607,11 +497,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "array_field", - "list", - "element" - ], + "path": ["array_field", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -620,9 +506,7 @@ }, { "name": "timestamp_array_field", - "path": [ - "timestamp_array_field" - ], + "path": ["timestamp_array_field"], "repetitionType": "OPTIONAL", "rLevelMax": 0, "dLevelMax": 1, @@ -631,10 +515,7 @@ "fields": { "list": { "name": "list", - "path": [ - "timestamp_array_field", - "list" - ], + "path": ["timestamp_array_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -645,11 +526,7 @@ "name": "element", "primitiveType": "INT64", "originalType": "TIMESTAMP_MILLIS", - "path": [ - "timestamp_array_field", - "list", - "element" - ], + "path": ["timestamp_array_field", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -663,10 +540,7 @@ }, { "name": "list", - "path": [ - "timestamp_array_field", - "list" - ], + "path": ["timestamp_array_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -677,11 +551,7 @@ "name": "element", "primitiveType": "INT64", "originalType": "TIMESTAMP_MILLIS", - "path": [ - "timestamp_array_field", - "list", - "element" - ], + "path": ["timestamp_array_field", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -694,11 +564,7 @@ "name": "element", "primitiveType": "INT64", "originalType": "TIMESTAMP_MILLIS", - "path": [ - "timestamp_array_field", - "list", - "element" - ], + "path": ["timestamp_array_field", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -709,9 +575,7 @@ "name": "timestamp_field", "primitiveType": "INT64", "originalType": "TIMESTAMP_MILLIS", - "path": [ - "timestamp_field" - ], + "path": ["timestamp_field"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -720,9 +584,7 @@ }, { "name": "obj_field", - "path": [ - "obj_field" - ], + "path": ["obj_field"], "repetitionType": "OPTIONAL", "rLevelMax": 0, "dLevelMax": 1, @@ -733,10 +595,7 @@ "name": "sub1", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "obj_field", - "sub1" - ], + "path": ["obj_field", "sub1"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -747,10 +606,7 @@ "name": "sub2", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "obj_field", - "sub2" - ], + "path": ["obj_field", "sub2"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -763,10 +619,7 @@ "name": "sub1", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "obj_field", - "sub1" - ], + "path": ["obj_field", "sub1"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -777,10 +630,7 @@ "name": "sub2", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "obj_field", - "sub2" - ], + "path": ["obj_field", "sub2"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -789,9 +639,7 @@ }, { "name": "struct_field", - "path": [ - "struct_field" - ], + "path": ["struct_field"], "repetitionType": "OPTIONAL", "rLevelMax": 0, "dLevelMax": 1, @@ -800,10 +648,7 @@ "fields": { "list": { "name": "list", - "path": [ - "struct_field", - "list" - ], + "path": ["struct_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -812,11 +657,7 @@ "fields": { "element": { "name": "element", - "path": [ - "struct_field", - "list", - "element" - ], + "path": ["struct_field", "list", "element"], "repetitionType": "REQUIRED", "rLevelMax": 1, "dLevelMax": 2, @@ -827,12 +668,7 @@ "name": "sub3", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub3" - ], + "path": ["struct_field", "list", "element", "sub3"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -843,12 +679,7 @@ "name": "sub4", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub4" - ], + "path": ["struct_field", "list", "element", "sub4"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -857,12 +688,7 @@ }, "sub5": { "name": "sub5", - "path": [ - "struct_field", - "list", - "element", - "sub5" - ], + "path": ["struct_field", "list", "element", "sub5"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -873,13 +699,7 @@ "name": "sub6", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub6" - ], + "path": ["struct_field", "list", "element", "sub5", "sub6"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -890,13 +710,7 @@ "name": "sub7", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub7" - ], + "path": ["struct_field", "list", "element", "sub5", "sub7"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -907,12 +721,7 @@ }, "sub8": { "name": "sub8", - "path": [ - "struct_field", - "list", - "element", - "sub8" - ], + "path": ["struct_field", "list", "element", "sub8"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -921,13 +730,7 @@ "fields": { "list": { "name": "list", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list" - ], + "path": ["struct_field", "list", "element", "sub8", "list"], "repetitionType": "REPEATED", "rLevelMax": 2, "dLevelMax": 4, @@ -938,14 +741,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list", - "element" - ], + "path": ["struct_field", "list", "element", "sub8", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -966,10 +762,7 @@ }, { "name": "list", - "path": [ - "struct_field", - "list" - ], + "path": ["struct_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -978,11 +771,7 @@ "fields": { "element": { "name": "element", - "path": [ - "struct_field", - "list", - "element" - ], + "path": ["struct_field", "list", "element"], "repetitionType": "REQUIRED", "rLevelMax": 1, "dLevelMax": 2, @@ -993,12 +782,7 @@ "name": "sub3", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub3" - ], + "path": ["struct_field", "list", "element", "sub3"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1009,12 +793,7 @@ "name": "sub4", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub4" - ], + "path": ["struct_field", "list", "element", "sub4"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1023,12 +802,7 @@ }, "sub5": { "name": "sub5", - "path": [ - "struct_field", - "list", - "element", - "sub5" - ], + "path": ["struct_field", "list", "element", "sub5"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -1039,13 +813,7 @@ "name": "sub6", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub6" - ], + "path": ["struct_field", "list", "element", "sub5", "sub6"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1056,13 +824,7 @@ "name": "sub7", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub7" - ], + "path": ["struct_field", "list", "element", "sub5", "sub7"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1073,12 +835,7 @@ }, "sub8": { "name": "sub8", - "path": [ - "struct_field", - "list", - "element", - "sub8" - ], + "path": ["struct_field", "list", "element", "sub8"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -1087,13 +844,7 @@ "fields": { "list": { "name": "list", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list" - ], + "path": ["struct_field", "list", "element", "sub8", "list"], "repetitionType": "REPEATED", "rLevelMax": 2, "dLevelMax": 4, @@ -1104,14 +855,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list", - "element" - ], + "path": ["struct_field", "list", "element", "sub8", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1129,11 +873,7 @@ }, { "name": "element", - "path": [ - "struct_field", - "list", - "element" - ], + "path": ["struct_field", "list", "element"], "repetitionType": "REQUIRED", "rLevelMax": 1, "dLevelMax": 2, @@ -1144,12 +884,7 @@ "name": "sub3", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub3" - ], + "path": ["struct_field", "list", "element", "sub3"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1160,12 +895,7 @@ "name": "sub4", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub4" - ], + "path": ["struct_field", "list", "element", "sub4"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1174,12 +904,7 @@ }, "sub5": { "name": "sub5", - "path": [ - "struct_field", - "list", - "element", - "sub5" - ], + "path": ["struct_field", "list", "element", "sub5"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -1190,13 +915,7 @@ "name": "sub6", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub6" - ], + "path": ["struct_field", "list", "element", "sub5", "sub6"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1207,13 +926,7 @@ "name": "sub7", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub7" - ], + "path": ["struct_field", "list", "element", "sub5", "sub7"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1224,12 +937,7 @@ }, "sub8": { "name": "sub8", - "path": [ - "struct_field", - "list", - "element", - "sub8" - ], + "path": ["struct_field", "list", "element", "sub8"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -1238,13 +946,7 @@ "fields": { "list": { "name": "list", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list" - ], + "path": ["struct_field", "list", "element", "sub8", "list"], "repetitionType": "REPEATED", "rLevelMax": 2, "dLevelMax": 4, @@ -1255,14 +957,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list", - "element" - ], + "path": ["struct_field", "list", "element", "sub8", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1280,12 +975,7 @@ "name": "sub3", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub3" - ], + "path": ["struct_field", "list", "element", "sub3"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1296,12 +986,7 @@ "name": "sub4", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub4" - ], + "path": ["struct_field", "list", "element", "sub4"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1310,12 +995,7 @@ }, { "name": "sub5", - "path": [ - "struct_field", - "list", - "element", - "sub5" - ], + "path": ["struct_field", "list", "element", "sub5"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -1326,13 +1006,7 @@ "name": "sub6", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub6" - ], + "path": ["struct_field", "list", "element", "sub5", "sub6"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1343,13 +1017,7 @@ "name": "sub7", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub7" - ], + "path": ["struct_field", "list", "element", "sub5", "sub7"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1362,13 +1030,7 @@ "name": "sub6", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub6" - ], + "path": ["struct_field", "list", "element", "sub5", "sub6"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1379,13 +1041,7 @@ "name": "sub7", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub7" - ], + "path": ["struct_field", "list", "element", "sub5", "sub7"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1394,12 +1050,7 @@ }, { "name": "sub8", - "path": [ - "struct_field", - "list", - "element", - "sub8" - ], + "path": ["struct_field", "list", "element", "sub8"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -1408,13 +1059,7 @@ "fields": { "list": { "name": "list", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list" - ], + "path": ["struct_field", "list", "element", "sub8", "list"], "repetitionType": "REPEATED", "rLevelMax": 2, "dLevelMax": 4, @@ -1425,14 +1070,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list", - "element" - ], + "path": ["struct_field", "list", "element", "sub8", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1446,13 +1084,7 @@ }, { "name": "list", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list" - ], + "path": ["struct_field", "list", "element", "sub8", "list"], "repetitionType": "REPEATED", "rLevelMax": 2, "dLevelMax": 4, @@ -1463,14 +1095,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list", - "element" - ], + "path": ["struct_field", "list", "element", "sub8", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1483,14 +1108,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list", - "element" - ], + "path": ["struct_field", "list", "element", "sub8", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", diff --git a/test/test-files/object-nested.schema.json b/test/test-files/object-nested.schema.json index db958133..67404272 100644 --- a/test/test-files/object-nested.schema.json +++ b/test/test-files/object-nested.schema.json @@ -1,68 +1,68 @@ { - "type": "object", - "properties": { - "string_field": { - "type": "string" - }, - "int_field": { - "type": "integer" - }, - "array_field": { - "type": "array", - "items": { - "type": "string" - }, - "additionalItems": false + "type": "object", + "properties": { + "string_field": { + "type": "string" + }, + "int_field": { + "type": "integer" + }, + "array_field": { + "type": "array", + "items": { + "type": "string" + }, + "additionalItems": false + }, + "timestamp_field": { + "type": "string" + }, + "obj_field": { + "type": "object", + "properties": { + "sub1": { + "type": "string" }, - "timestamp_field": { + "sub2": { + "type": "string" + } + }, + "additionalProperties": false + }, + "struct_field": { + "type": "array", + "items": { + "type": "object", + "properties": { + "sub3": { "type": "string" - }, - "obj_field": { + }, + "sub4": { + "type": "string" + }, + "sub5": { "type": "object", "properties": { - "sub1": { - "type": "string" - }, - "sub2": { - "type": "string" - } + "sub6": { + "type": "string" + }, + "sub7": { + "type": "string" + } }, "additionalProperties": false - }, - "struct_field": { + }, + "sub8": { "type": "array", "items": { - "type": "object", - "properties": { - "sub3": { - "type": "string" - }, - "sub4": { - "type": "string" - }, - "sub5": { - "type": "object", - "properties": { - "sub6": { - "type": "string" - }, - "sub7": { - "type": "string" - } - }, - "additionalProperties": false - }, - "sub8": { - "type": "array", - "items": { - "type": "string" - } - } - }, - "additionalProperties": false - }, - "additionalItems": false - } - }, - "additionalProperties": false + "type": "string" + } + } + }, + "additionalProperties": false + }, + "additionalItems": false + } + }, + "additionalProperties": false } diff --git a/test/test-files/object-nested.schema.result.json b/test/test-files/object-nested.schema.result.json index e7289cfe..700f3326 100644 --- a/test/test-files/object-nested.schema.result.json +++ b/test/test-files/object-nested.schema.result.json @@ -119,9 +119,7 @@ "name": "string_field", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "string_field" - ], + "path": ["string_field"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -131,9 +129,7 @@ "int_field": { "name": "int_field", "primitiveType": "INT64", - "path": [ - "int_field" - ], + "path": ["int_field"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -142,9 +138,7 @@ }, "array_field": { "name": "array_field", - "path": [ - "array_field" - ], + "path": ["array_field"], "repetitionType": "OPTIONAL", "rLevelMax": 0, "dLevelMax": 1, @@ -153,10 +147,7 @@ "fields": { "list": { "name": "list", - "path": [ - "array_field", - "list" - ], + "path": ["array_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -167,11 +158,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "array_field", - "list", - "element" - ], + "path": ["array_field", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -187,9 +174,7 @@ "name": "timestamp_field", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "timestamp_field" - ], + "path": ["timestamp_field"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -198,9 +183,7 @@ }, "obj_field": { "name": "obj_field", - "path": [ - "obj_field" - ], + "path": ["obj_field"], "repetitionType": "OPTIONAL", "rLevelMax": 0, "dLevelMax": 1, @@ -211,10 +194,7 @@ "name": "sub1", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "obj_field", - "sub1" - ], + "path": ["obj_field", "sub1"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -225,10 +205,7 @@ "name": "sub2", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "obj_field", - "sub2" - ], + "path": ["obj_field", "sub2"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -239,9 +216,7 @@ }, "struct_field": { "name": "struct_field", - "path": [ - "struct_field" - ], + "path": ["struct_field"], "repetitionType": "OPTIONAL", "rLevelMax": 0, "dLevelMax": 1, @@ -250,10 +225,7 @@ "fields": { "list": { "name": "list", - "path": [ - "struct_field", - "list" - ], + "path": ["struct_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -262,11 +234,7 @@ "fields": { "element": { "name": "element", - "path": [ - "struct_field", - "list", - "element" - ], + "path": ["struct_field", "list", "element"], "repetitionType": "REQUIRED", "rLevelMax": 1, "dLevelMax": 2, @@ -277,12 +245,7 @@ "name": "sub3", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub3" - ], + "path": ["struct_field", "list", "element", "sub3"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -293,12 +256,7 @@ "name": "sub4", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub4" - ], + "path": ["struct_field", "list", "element", "sub4"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -307,12 +265,7 @@ }, "sub5": { "name": "sub5", - "path": [ - "struct_field", - "list", - "element", - "sub5" - ], + "path": ["struct_field", "list", "element", "sub5"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -323,13 +276,7 @@ "name": "sub6", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub6" - ], + "path": ["struct_field", "list", "element", "sub5", "sub6"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -340,13 +287,7 @@ "name": "sub7", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub7" - ], + "path": ["struct_field", "list", "element", "sub5", "sub7"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -357,12 +298,7 @@ }, "sub8": { "name": "sub8", - "path": [ - "struct_field", - "list", - "element", - "sub8" - ], + "path": ["struct_field", "list", "element", "sub8"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -371,13 +307,7 @@ "fields": { "list": { "name": "list", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list" - ], + "path": ["struct_field", "list", "element", "sub8", "list"], "repetitionType": "REPEATED", "rLevelMax": 2, "dLevelMax": 4, @@ -388,14 +318,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list", - "element" - ], + "path": ["struct_field", "list", "element", "sub8", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -420,9 +343,7 @@ "name": "string_field", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "string_field" - ], + "path": ["string_field"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -432,9 +353,7 @@ { "name": "int_field", "primitiveType": "INT64", - "path": [ - "int_field" - ], + "path": ["int_field"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -443,9 +362,7 @@ }, { "name": "array_field", - "path": [ - "array_field" - ], + "path": ["array_field"], "repetitionType": "OPTIONAL", "rLevelMax": 0, "dLevelMax": 1, @@ -454,10 +371,7 @@ "fields": { "list": { "name": "list", - "path": [ - "array_field", - "list" - ], + "path": ["array_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -468,11 +382,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "array_field", - "list", - "element" - ], + "path": ["array_field", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -486,10 +396,7 @@ }, { "name": "list", - "path": [ - "array_field", - "list" - ], + "path": ["array_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -500,11 +407,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "array_field", - "list", - "element" - ], + "path": ["array_field", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -517,11 +420,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "array_field", - "list", - "element" - ], + "path": ["array_field", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -532,9 +431,7 @@ "name": "timestamp_field", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "timestamp_field" - ], + "path": ["timestamp_field"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -543,9 +440,7 @@ }, { "name": "obj_field", - "path": [ - "obj_field" - ], + "path": ["obj_field"], "repetitionType": "OPTIONAL", "rLevelMax": 0, "dLevelMax": 1, @@ -556,10 +451,7 @@ "name": "sub1", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "obj_field", - "sub1" - ], + "path": ["obj_field", "sub1"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -570,10 +462,7 @@ "name": "sub2", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "obj_field", - "sub2" - ], + "path": ["obj_field", "sub2"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -586,10 +475,7 @@ "name": "sub1", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "obj_field", - "sub1" - ], + "path": ["obj_field", "sub1"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -600,10 +486,7 @@ "name": "sub2", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "obj_field", - "sub2" - ], + "path": ["obj_field", "sub2"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -612,9 +495,7 @@ }, { "name": "struct_field", - "path": [ - "struct_field" - ], + "path": ["struct_field"], "repetitionType": "OPTIONAL", "rLevelMax": 0, "dLevelMax": 1, @@ -623,10 +504,7 @@ "fields": { "list": { "name": "list", - "path": [ - "struct_field", - "list" - ], + "path": ["struct_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -635,11 +513,7 @@ "fields": { "element": { "name": "element", - "path": [ - "struct_field", - "list", - "element" - ], + "path": ["struct_field", "list", "element"], "repetitionType": "REQUIRED", "rLevelMax": 1, "dLevelMax": 2, @@ -650,12 +524,7 @@ "name": "sub3", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub3" - ], + "path": ["struct_field", "list", "element", "sub3"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -666,12 +535,7 @@ "name": "sub4", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub4" - ], + "path": ["struct_field", "list", "element", "sub4"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -680,12 +544,7 @@ }, "sub5": { "name": "sub5", - "path": [ - "struct_field", - "list", - "element", - "sub5" - ], + "path": ["struct_field", "list", "element", "sub5"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -696,13 +555,7 @@ "name": "sub6", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub6" - ], + "path": ["struct_field", "list", "element", "sub5", "sub6"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -713,13 +566,7 @@ "name": "sub7", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub7" - ], + "path": ["struct_field", "list", "element", "sub5", "sub7"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -730,12 +577,7 @@ }, "sub8": { "name": "sub8", - "path": [ - "struct_field", - "list", - "element", - "sub8" - ], + "path": ["struct_field", "list", "element", "sub8"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -744,13 +586,7 @@ "fields": { "list": { "name": "list", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list" - ], + "path": ["struct_field", "list", "element", "sub8", "list"], "repetitionType": "REPEATED", "rLevelMax": 2, "dLevelMax": 4, @@ -761,14 +597,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list", - "element" - ], + "path": ["struct_field", "list", "element", "sub8", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -789,10 +618,7 @@ }, { "name": "list", - "path": [ - "struct_field", - "list" - ], + "path": ["struct_field", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 2, @@ -801,11 +627,7 @@ "fields": { "element": { "name": "element", - "path": [ - "struct_field", - "list", - "element" - ], + "path": ["struct_field", "list", "element"], "repetitionType": "REQUIRED", "rLevelMax": 1, "dLevelMax": 2, @@ -816,12 +638,7 @@ "name": "sub3", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub3" - ], + "path": ["struct_field", "list", "element", "sub3"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -832,12 +649,7 @@ "name": "sub4", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub4" - ], + "path": ["struct_field", "list", "element", "sub4"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -846,12 +658,7 @@ }, "sub5": { "name": "sub5", - "path": [ - "struct_field", - "list", - "element", - "sub5" - ], + "path": ["struct_field", "list", "element", "sub5"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -862,13 +669,7 @@ "name": "sub6", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub6" - ], + "path": ["struct_field", "list", "element", "sub5", "sub6"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -879,13 +680,7 @@ "name": "sub7", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub7" - ], + "path": ["struct_field", "list", "element", "sub5", "sub7"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -896,12 +691,7 @@ }, "sub8": { "name": "sub8", - "path": [ - "struct_field", - "list", - "element", - "sub8" - ], + "path": ["struct_field", "list", "element", "sub8"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -910,13 +700,7 @@ "fields": { "list": { "name": "list", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list" - ], + "path": ["struct_field", "list", "element", "sub8", "list"], "repetitionType": "REPEATED", "rLevelMax": 2, "dLevelMax": 4, @@ -927,14 +711,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list", - "element" - ], + "path": ["struct_field", "list", "element", "sub8", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -952,11 +729,7 @@ }, { "name": "element", - "path": [ - "struct_field", - "list", - "element" - ], + "path": ["struct_field", "list", "element"], "repetitionType": "REQUIRED", "rLevelMax": 1, "dLevelMax": 2, @@ -967,12 +740,7 @@ "name": "sub3", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub3" - ], + "path": ["struct_field", "list", "element", "sub3"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -983,12 +751,7 @@ "name": "sub4", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub4" - ], + "path": ["struct_field", "list", "element", "sub4"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -997,12 +760,7 @@ }, "sub5": { "name": "sub5", - "path": [ - "struct_field", - "list", - "element", - "sub5" - ], + "path": ["struct_field", "list", "element", "sub5"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -1013,13 +771,7 @@ "name": "sub6", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub6" - ], + "path": ["struct_field", "list", "element", "sub5", "sub6"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1030,13 +782,7 @@ "name": "sub7", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub7" - ], + "path": ["struct_field", "list", "element", "sub5", "sub7"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1047,12 +793,7 @@ }, "sub8": { "name": "sub8", - "path": [ - "struct_field", - "list", - "element", - "sub8" - ], + "path": ["struct_field", "list", "element", "sub8"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -1061,13 +802,7 @@ "fields": { "list": { "name": "list", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list" - ], + "path": ["struct_field", "list", "element", "sub8", "list"], "repetitionType": "REPEATED", "rLevelMax": 2, "dLevelMax": 4, @@ -1078,14 +813,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list", - "element" - ], + "path": ["struct_field", "list", "element", "sub8", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1103,12 +831,7 @@ "name": "sub3", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub3" - ], + "path": ["struct_field", "list", "element", "sub3"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1119,12 +842,7 @@ "name": "sub4", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub4" - ], + "path": ["struct_field", "list", "element", "sub4"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1133,12 +851,7 @@ }, { "name": "sub5", - "path": [ - "struct_field", - "list", - "element", - "sub5" - ], + "path": ["struct_field", "list", "element", "sub5"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -1149,13 +862,7 @@ "name": "sub6", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub6" - ], + "path": ["struct_field", "list", "element", "sub5", "sub6"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1166,13 +873,7 @@ "name": "sub7", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub7" - ], + "path": ["struct_field", "list", "element", "sub5", "sub7"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1185,13 +886,7 @@ "name": "sub6", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub6" - ], + "path": ["struct_field", "list", "element", "sub5", "sub6"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1202,13 +897,7 @@ "name": "sub7", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub5", - "sub7" - ], + "path": ["struct_field", "list", "element", "sub5", "sub7"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1217,12 +906,7 @@ }, { "name": "sub8", - "path": [ - "struct_field", - "list", - "element", - "sub8" - ], + "path": ["struct_field", "list", "element", "sub8"], "repetitionType": "OPTIONAL", "rLevelMax": 1, "dLevelMax": 3, @@ -1231,13 +915,7 @@ "fields": { "list": { "name": "list", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list" - ], + "path": ["struct_field", "list", "element", "sub8", "list"], "repetitionType": "REPEATED", "rLevelMax": 2, "dLevelMax": 4, @@ -1248,14 +926,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list", - "element" - ], + "path": ["struct_field", "list", "element", "sub8", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1269,13 +940,7 @@ }, { "name": "list", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list" - ], + "path": ["struct_field", "list", "element", "sub8", "list"], "repetitionType": "REPEATED", "rLevelMax": 2, "dLevelMax": 4, @@ -1286,14 +951,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list", - "element" - ], + "path": ["struct_field", "list", "element", "sub8", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -1306,14 +964,7 @@ "name": "element", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "struct_field", - "list", - "element", - "sub8", - "list", - "element" - ], + "path": ["struct_field", "list", "element", "sub8", "list", "element"], "repetitionType": "OPTIONAL", "encoding": "PLAIN", "compression": "UNCOMPRESSED", diff --git a/test/test-files/object.schema.json b/test/test-files/object.schema.json index 839db449..8e7e4ca4 100644 --- a/test/test-files/object.schema.json +++ b/test/test-files/object.schema.json @@ -1,7 +1,21 @@ { - "type": "object", - "properties": { - "person": { + "type": "object", + "properties": { + "person": { + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "age": { + "type": "integer" + } + }, + "required": ["name", "age"] + }, + "people": { + "type": "array", + "items": { "type": "object", "properties": { "name": { @@ -12,22 +26,8 @@ } }, "required": ["name", "age"] - }, - "people": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "age": { - "type": "integer" - } - }, - "required": ["name", "age"] - } } - }, - "required": ["person", "people"] - } + } + }, + "required": ["person", "people"] +} diff --git a/test/test-files/object.schema.result.json b/test/test-files/object.schema.result.json index 1dd725c3..55f86122 100644 --- a/test/test-files/object.schema.result.json +++ b/test/test-files/object.schema.result.json @@ -48,9 +48,7 @@ "fields": { "person": { "name": "person", - "path": [ - "person" - ], + "path": ["person"], "repetitionType": "REQUIRED", "rLevelMax": 0, "dLevelMax": 0, @@ -61,10 +59,7 @@ "name": "name", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "person", - "name" - ], + "path": ["person", "name"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -74,10 +69,7 @@ "age": { "name": "age", "primitiveType": "INT64", - "path": [ - "person", - "age" - ], + "path": ["person", "age"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -88,9 +80,7 @@ }, "people": { "name": "people", - "path": [ - "people" - ], + "path": ["people"], "repetitionType": "REQUIRED", "rLevelMax": 0, "dLevelMax": 0, @@ -99,10 +89,7 @@ "fields": { "list": { "name": "list", - "path": [ - "people", - "list" - ], + "path": ["people", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 1, @@ -111,11 +98,7 @@ "fields": { "element": { "name": "element", - "path": [ - "people", - "list", - "element" - ], + "path": ["people", "list", "element"], "repetitionType": "REQUIRED", "rLevelMax": 1, "dLevelMax": 1, @@ -126,12 +109,7 @@ "name": "name", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "people", - "list", - "element", - "name" - ], + "path": ["people", "list", "element", "name"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -141,12 +119,7 @@ "age": { "name": "age", "primitiveType": "INT64", - "path": [ - "people", - "list", - "element", - "age" - ], + "path": ["people", "list", "element", "age"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -164,9 +137,7 @@ "fieldList": [ { "name": "person", - "path": [ - "person" - ], + "path": ["person"], "repetitionType": "REQUIRED", "rLevelMax": 0, "dLevelMax": 0, @@ -177,10 +148,7 @@ "name": "name", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "person", - "name" - ], + "path": ["person", "name"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -190,10 +158,7 @@ "age": { "name": "age", "primitiveType": "INT64", - "path": [ - "person", - "age" - ], + "path": ["person", "age"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -206,10 +171,7 @@ "name": "name", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "person", - "name" - ], + "path": ["person", "name"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -219,10 +181,7 @@ { "name": "age", "primitiveType": "INT64", - "path": [ - "person", - "age" - ], + "path": ["person", "age"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -231,9 +190,7 @@ }, { "name": "people", - "path": [ - "people" - ], + "path": ["people"], "repetitionType": "REQUIRED", "rLevelMax": 0, "dLevelMax": 0, @@ -242,10 +199,7 @@ "fields": { "list": { "name": "list", - "path": [ - "people", - "list" - ], + "path": ["people", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 1, @@ -254,11 +208,7 @@ "fields": { "element": { "name": "element", - "path": [ - "people", - "list", - "element" - ], + "path": ["people", "list", "element"], "repetitionType": "REQUIRED", "rLevelMax": 1, "dLevelMax": 1, @@ -269,12 +219,7 @@ "name": "name", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "people", - "list", - "element", - "name" - ], + "path": ["people", "list", "element", "name"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -284,12 +229,7 @@ "age": { "name": "age", "primitiveType": "INT64", - "path": [ - "people", - "list", - "element", - "age" - ], + "path": ["people", "list", "element", "age"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -305,10 +245,7 @@ }, { "name": "list", - "path": [ - "people", - "list" - ], + "path": ["people", "list"], "repetitionType": "REPEATED", "rLevelMax": 1, "dLevelMax": 1, @@ -317,11 +254,7 @@ "fields": { "element": { "name": "element", - "path": [ - "people", - "list", - "element" - ], + "path": ["people", "list", "element"], "repetitionType": "REQUIRED", "rLevelMax": 1, "dLevelMax": 1, @@ -332,12 +265,7 @@ "name": "name", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "people", - "list", - "element", - "name" - ], + "path": ["people", "list", "element", "name"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -347,12 +275,7 @@ "age": { "name": "age", "primitiveType": "INT64", - "path": [ - "people", - "list", - "element", - "age" - ], + "path": ["people", "list", "element", "age"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -365,11 +288,7 @@ }, { "name": "element", - "path": [ - "people", - "list", - "element" - ], + "path": ["people", "list", "element"], "repetitionType": "REQUIRED", "rLevelMax": 1, "dLevelMax": 1, @@ -380,12 +299,7 @@ "name": "name", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "people", - "list", - "element", - "name" - ], + "path": ["people", "list", "element", "name"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -395,12 +309,7 @@ "age": { "name": "age", "primitiveType": "INT64", - "path": [ - "people", - "list", - "element", - "age" - ], + "path": ["people", "list", "element", "age"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -413,12 +322,7 @@ "name": "name", "primitiveType": "BYTE_ARRAY", "originalType": "UTF8", - "path": [ - "people", - "list", - "element", - "name" - ], + "path": ["people", "list", "element", "name"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", @@ -428,12 +332,7 @@ { "name": "age", "primitiveType": "INT64", - "path": [ - "people", - "list", - "element", - "age" - ], + "path": ["people", "list", "element", "age"], "repetitionType": "REQUIRED", "encoding": "PLAIN", "compression": "UNCOMPRESSED", diff --git a/test/thrift.js b/test/thrift.js index a8a4862e..b65ee758 100644 --- a/test/thrift.js +++ b/test/thrift.js @@ -2,17 +2,15 @@ const chai = require('chai'); const assert = chai.assert; const thrift = require('thrift'); -const parquet_thrift = require('../gen-nodejs/parquet_types') -const parquet_util = require('../lib/util') +const parquet_thrift = require('../gen-nodejs/parquet_types'); +const parquet_util = require('../lib/util'); -describe('Thrift', function() { - - it('should correctly en/decode literal zeroes with the CompactProtocol', function() { +describe('Thrift', function () { + it('should correctly en/decode literal zeroes with the CompactProtocol', function () { let obj = new parquet_thrift.ColumnMetaData(); obj.num_values = 0; let obj_bin = parquet_util.serializeThrift(obj); assert.equal(obj_bin.length, 3); }); - }); diff --git a/test/types.js b/test/types.js index 15957c8b..054f7903 100644 --- a/test/types.js +++ b/test/types.js @@ -1,203 +1,204 @@ 'use strict'; -const { toPrimitive, fromPrimitive } = require("../lib/types") +const { toPrimitive, fromPrimitive } = require('../lib/types'); const chai = require('chai'); const assert = chai.assert; -describe("toPrimitive* should give the correct values back", () => { - it('toPrimitive(INT_8, 127n)', () => { - assert.equal(toPrimitive('INT_8',127n), 127n) - }), +describe('toPrimitive* should give the correct values back', () => { + it('toPrimitive(INT_8, 127n)', () => { + assert.equal(toPrimitive('INT_8', 127n), 127n); + }), it('toPrimitive(UINT_8, 255n)', () => { - assert.equal(toPrimitive('UINT_8',255n), 255n) + assert.equal(toPrimitive('UINT_8', 255n), 255n); }), it('toPrimitive(INT_16, 32767n)', () => { - assert.equal(toPrimitive('INT_16',32767n), 32767n) + assert.equal(toPrimitive('INT_16', 32767n), 32767n); }), it('toPrimitive(UINT_16, 65535n)', () => { - assert.equal(toPrimitive('UINT_16',65535n), 65535n) + assert.equal(toPrimitive('UINT_16', 65535n), 65535n); }), it('toPrimitive(INT32, 2147483647n)', () => { - assert.equal(toPrimitive('INT32',2147483647n), 2147483647n) + assert.equal(toPrimitive('INT32', 2147483647n), 2147483647n); }), it('toPrimitive(UINT_32, 4294967295n)', () => { - assert.equal(toPrimitive('UINT_32',4294967295n), 4294967295n) + assert.equal(toPrimitive('UINT_32', 4294967295n), 4294967295n); }), it('toPrimitive(INT64, 9223372036854775807n)', () => { - assert.equal(toPrimitive('INT64',9223372036854775807n), 9223372036854775807n) + assert.equal(toPrimitive('INT64', 9223372036854775807n), 9223372036854775807n); }), it('toPrimitive(UINT_64, 9223372036854775807n)', () => { - assert.equal(toPrimitive('UINT_64',9223372036854775807n), 9223372036854775807n) + assert.equal(toPrimitive('UINT_64', 9223372036854775807n), 9223372036854775807n); }), it('toPrimitive(INT96, 9223372036854775807n)', () => { - assert.equal(toPrimitive('INT96',9223372036854775807n), 9223372036854775807n) - }) -}) + assert.equal(toPrimitive('INT96', 9223372036854775807n), 9223372036854775807n); + }); +}); -describe("toPrimitive INT* should give the correct values back with string value", () => { - it('toPrimitive(INT_8, "127")', () => { - assert.equal(toPrimitive('INT_8',"127"), 127n) - }), +describe('toPrimitive INT* should give the correct values back with string value', () => { + it('toPrimitive(INT_8, "127")', () => { + assert.equal(toPrimitive('INT_8', '127'), 127n); + }), it('toPrimitive(UINT_8, "255")', () => { - assert.equal(toPrimitive('UINT_8',"255"), 255n) + assert.equal(toPrimitive('UINT_8', '255'), 255n); }), it('toPrimitive(INT_16, "32767")', () => { - assert.equal(toPrimitive('INT_16',"32767"), 32767n) + assert.equal(toPrimitive('INT_16', '32767'), 32767n); }), it('toPrimitive(UINT_16, "65535")', () => { - assert.equal(toPrimitive('UINT_16',"65535"), 65535n) + assert.equal(toPrimitive('UINT_16', '65535'), 65535n); }), it('toPrimitive(INT32, "2147483647")', () => { - assert.equal(toPrimitive('INT32',"2147483647"), 2147483647n) + assert.equal(toPrimitive('INT32', '2147483647'), 2147483647n); }), it('toPrimitive(UINT_32, "4294967295")', () => { - assert.equal(toPrimitive('UINT_32',"4294967295"), 4294967295n) + assert.equal(toPrimitive('UINT_32', '4294967295'), 4294967295n); }), it('toPrimitive(INT64, "9223372036854775807")', () => { - assert.equal(toPrimitive('INT64',"9223372036854775807"), 9223372036854775807n) + assert.equal(toPrimitive('INT64', '9223372036854775807'), 9223372036854775807n); }), it('toPrimitive(UINT_64, "9223372036854775807")', () => { - assert.equal(toPrimitive('UINT_64',"9223372036854775807"), 9223372036854775807n) + assert.equal(toPrimitive('UINT_64', '9223372036854775807'), 9223372036854775807n); }), it('toPrimitive(INT96, "9223372036854775807")', () => { - assert.equal(toPrimitive('INT96',"9223372036854775807"), 9223372036854775807n) - }) -}) - + assert.equal(toPrimitive('INT96', '9223372036854775807'), 9223372036854775807n); + }); +}); -describe("toPrimitive INT* should throw when given invalid value", () => { - describe("Testing toPrimitive_INT_8 values", () => { - it('toPrimitive(INT_8, 128) is too large', () => { - assert.throws(() => toPrimitive('INT_8',128)) - }), - it('toPrimitive(INT_8, -256) is too small', () => { - assert.throws(() => toPrimitive('INT_8',-256)) - }), - it('toPrimitive(INT_8, "asd12@!$1") is given gibberish and should throw', () => { - assert.throws(() => toPrimitive('INT_8', "asd12@!$1")) - }) - }), - describe("Testing toPrimitive_UINT8 values", () => { - it('toPrimitive(UINT_8, 128) is too large', () => { - assert.throws(() => toPrimitive('UINT_8',256)) - }), +describe('toPrimitive INT* should throw when given invalid value', () => { + describe('Testing toPrimitive_INT_8 values', () => { + it('toPrimitive(INT_8, 128) is too large', () => { + assert.throws(() => toPrimitive('INT_8', 128)); + }), + it('toPrimitive(INT_8, -256) is too small', () => { + assert.throws(() => toPrimitive('INT_8', -256)); + }), + it('toPrimitive(INT_8, "asd12@!$1") is given gibberish and should throw', () => { + assert.throws(() => toPrimitive('INT_8', 'asd12@!$1')); + }); + }), + describe('Testing toPrimitive_UINT8 values', () => { + it('toPrimitive(UINT_8, 128) is too large', () => { + assert.throws(() => toPrimitive('UINT_8', 256)); + }), it('toPrimitive(UINT_8, -256) is too small', () => { - assert.throws(() => toPrimitive('UINT_8',-1)) + assert.throws(() => toPrimitive('UINT_8', -1)); }), it('toPrimitive(UINT_8, "asd12@!$1") is given gibberish and should throw', () => { - assert.throws(() => toPrimitive('UINT_8', "asd12@!$1")) - }) + assert.throws(() => toPrimitive('UINT_8', 'asd12@!$1')); + }); }), - describe("Testing toPrimitive_INT16 values", () => { - it('toPrimitive(INT_16, 9999999) is too large', () => { - assert.throws(() => toPrimitive('INT_16',9999999)) - }), + describe('Testing toPrimitive_INT16 values', () => { + it('toPrimitive(INT_16, 9999999) is too large', () => { + assert.throws(() => toPrimitive('INT_16', 9999999)); + }), it('toPrimitive(INT_16, -9999999) is too small', () => { - assert.throws(() => toPrimitive('INT_16',-9999999)) + assert.throws(() => toPrimitive('INT_16', -9999999)); }), it('toPrimitive(INT_16, "asd12@!$1") is given gibberish and should throw', () => { - assert.throws(() => toPrimitive('INT_16', "asd12@!$1")) - }) + assert.throws(() => toPrimitive('INT_16', 'asd12@!$1')); + }); }), - describe("Testing toPrimitive_UINT16 values", () => { - it('toPrimitive(UINT_16, 9999999999999) is too large', () => { - assert.throws(() => toPrimitive('UINT_16',9999999999999)) - }), + describe('Testing toPrimitive_UINT16 values', () => { + it('toPrimitive(UINT_16, 9999999999999) is too large', () => { + assert.throws(() => toPrimitive('UINT_16', 9999999999999)); + }), it('toPrimitive(UINT_16, -999999999999) is too small', () => { - assert.throws(() => toPrimitive('UINT_16',-9999999999999)) + assert.throws(() => toPrimitive('UINT_16', -9999999999999)); }), it('toPrimitive(UINT_16, "asd12@!$1") is given gibberish and should throw', () => { - assert.throws(() => toPrimitive('UINT_16', "asd12@!$1")) - }) + assert.throws(() => toPrimitive('UINT_16', 'asd12@!$1')); + }); }), - describe("Testing toPrimitive_INT32 values", () => { - it('toPrimitive(INT_32, 999999999999) is too large', () => { - assert.throws(() => toPrimitive('INT_32',999999999999)) - }), + describe('Testing toPrimitive_INT32 values', () => { + it('toPrimitive(INT_32, 999999999999) is too large', () => { + assert.throws(() => toPrimitive('INT_32', 999999999999)); + }), it('toPrimitive(INT_32, -999999999999) is too small', () => { - assert.throws(() => toPrimitive('INT_32',-999999999999)) + assert.throws(() => toPrimitive('INT_32', -999999999999)); }), it('toPrimitive(INT_32, "asd12@!$1") is given gibberish and should throw', () => { - assert.throws(() => toPrimitive('INT_32', "asd12@!$1")) - }) + assert.throws(() => toPrimitive('INT_32', 'asd12@!$1')); + }); }), - describe("Testing toPrimitive_UINT32 values", () => { - it('toPrimitive(UINT_32, 999999999999) is too large', () => { - assert.throws(() => toPrimitive('UINT_32',999999999999999)) - }), + describe('Testing toPrimitive_UINT32 values', () => { + it('toPrimitive(UINT_32, 999999999999) is too large', () => { + assert.throws(() => toPrimitive('UINT_32', 999999999999999)); + }), it('toPrimitive(UINT_32, -999999999999) is too small', () => { - assert.throws(() => toPrimitive('UINT_32',-999999999999)) + assert.throws(() => toPrimitive('UINT_32', -999999999999)); }), it('toPrimitive(UINT_32, "asd12@!$1") is given gibberish and should throw', () => { - assert.throws(() => toPrimitive('UINT_32', "asd12@!$1")) - }) + assert.throws(() => toPrimitive('UINT_32', 'asd12@!$1')); + }); }), - describe("Testing toPrimitive_INT64 values", () => { - it('toPrimitive(INT_64, "9999999999999999999999") is too large', () => { - assert.throws(() => toPrimitive('INT_64', 9999999999999999999999)) - }), + describe('Testing toPrimitive_INT64 values', () => { + it('toPrimitive(INT_64, "9999999999999999999999") is too large', () => { + assert.throws(() => toPrimitive('INT_64', 9999999999999999999999)); + }), it('toPrimitive(INT_64, "-9999999999999999999999999") is too small', () => { - assert.throws(() => toPrimitive('INT_64', -9999999999999999999999999)) + assert.throws(() => toPrimitive('INT_64', -9999999999999999999999999)); }), it('toPrimitive(INT_64, "asd12@!$1") is given gibberish and should throw', () => { - assert.throws(() => toPrimitive('INT_64', "asd12@!$1")) - }) + assert.throws(() => toPrimitive('INT_64', 'asd12@!$1')); + }); }), - describe("Testing toPrimitive_UINT64 values", () => { - it('toPrimitive(UINT_64, 9999999999999999999999) is too large', () => { - assert.throws(() => toPrimitive('UINT_64',9999999999999999999999)) - }), + describe('Testing toPrimitive_UINT64 values', () => { + it('toPrimitive(UINT_64, 9999999999999999999999) is too large', () => { + assert.throws(() => toPrimitive('UINT_64', 9999999999999999999999)); + }), it('toPrimitive(UINT_64, -999999999999) is too small', () => { - assert.throws(() => toPrimitive('UINT_64',-999999999999)) + assert.throws(() => toPrimitive('UINT_64', -999999999999)); }), it('toPrimitive(UINT_64, "asd12@!$1") is given gibberish and should throw', () => { - assert.throws(() => toPrimitive('UINT_64', "asd12@!$1")) - }) + assert.throws(() => toPrimitive('UINT_64', 'asd12@!$1')); + }); }), - describe("Testing toPrimitive_INT96 values", () => { - it('toPrimitive(UINT_96, 9999999999999999999999) is too large', () => { - assert.throws(() => toPrimitive('INT_96',9999999999999999999999)) - }), + describe('Testing toPrimitive_INT96 values', () => { + it('toPrimitive(UINT_96, 9999999999999999999999) is too large', () => { + assert.throws(() => toPrimitive('INT_96', 9999999999999999999999)); + }), it('toPrimitive(UINT_96, -9999999999999999999999) is too small', () => { - assert.throws(() => toPrimitive('INT_96',-9999999999999999999999)) + assert.throws(() => toPrimitive('INT_96', -9999999999999999999999)); }), it('toPrimitive(UINT_96, "asd12@!$1") is given gibberish and should throw', () => { - assert.throws(() => toPrimitive('INT_96', "asd12@!$1")) - }) + assert.throws(() => toPrimitive('INT_96', 'asd12@!$1')); + }); }); - describe("toPrimitive ", () => { - const date = new Date(Date.parse('2022-12-01:00:00:01 GMT')); - - ['TIME_MILLIS', 'TIME_MICROS', 'DATE', 'TIMESTAMP_MILLIS', 'TIMESTAMP_MICROS'].forEach(typeName => { - it(`for type ${typeName} happy path`, () => { - assert.equal(1234, toPrimitive(typeName, 1234)); - assert.equal(1234, toPrimitive(typeName, "1234")); - }); - it(`for type ${typeName} fails with negative values`, () => { - assert.throws(() => toPrimitive(typeName, "-1"), `${typeName} value is out of bounds: -1`); - assert.throws(() => toPrimitive(typeName, -1), `${typeName} value is out of bounds: -1`); - }); - }); - ['DATE', 'TIMESTAMP_MILLIS', 'TIME_MILLIS'].forEach(typeName => { - it(`${typeName} throws when number too large`, () => { - assert.throws(() => toPrimitive(typeName, 9999999999999999999999), `${typeName} value is out of bounds: 1e+22`); - assert.throws(() => toPrimitive(typeName, "9999999999999999999999"), `${typeName} value is out of bounds: 1e+22`); - }) - }); - it('DATE conversion works for DATE type', () => { - assert.equal(toPrimitive('DATE', date), 19327.000011574073); - }); - it('TIMESTAMP_MICROS works for a Date type and bigint', () => { - assert.equal( toPrimitive('TIMESTAMP_MICROS', date), 1669852801000000n); - assert.equal( toPrimitive('TIMESTAMP_MICROS', "9999999999999999999999"), 9999999999999999999999n); - assert.equal( toPrimitive('TIMESTAMP_MICROS', 98989898n), 98989898n); + describe('toPrimitive ', () => { + const date = new Date(Date.parse('2022-12-01:00:00:01 GMT')); - } ) - it("TIME_MICROS works for a bigint", () => { - const timestampBigint = 1932733334490741n; - assert.equal( toPrimitive('TIME_MICROS', timestampBigint), 1932733334490741); - assert.equal( toPrimitive('TIME_MICROS', "9999999999999999999999"), 9999999999999999999999n); - assert.equal( toPrimitive('TIME_MICROS', 9999999999999999999999n), 9999999999999999999999n); - }) - }) -}) + ['TIME_MILLIS', 'TIME_MICROS', 'DATE', 'TIMESTAMP_MILLIS', 'TIMESTAMP_MICROS'].forEach((typeName) => { + it(`for type ${typeName} happy path`, () => { + assert.equal(1234, toPrimitive(typeName, 1234)); + assert.equal(1234, toPrimitive(typeName, '1234')); + }); + it(`for type ${typeName} fails with negative values`, () => { + assert.throws(() => toPrimitive(typeName, '-1'), `${typeName} value is out of bounds: -1`); + assert.throws(() => toPrimitive(typeName, -1), `${typeName} value is out of bounds: -1`); + }); + }); + ['DATE', 'TIMESTAMP_MILLIS', 'TIME_MILLIS'].forEach((typeName) => { + it(`${typeName} throws when number too large`, () => { + assert.throws(() => toPrimitive(typeName, 9999999999999999999999), `${typeName} value is out of bounds: 1e+22`); + assert.throws( + () => toPrimitive(typeName, '9999999999999999999999'), + `${typeName} value is out of bounds: 1e+22` + ); + }); + }); + it('DATE conversion works for DATE type', () => { + assert.equal(toPrimitive('DATE', date), 19327.000011574073); + }); + it('TIMESTAMP_MICROS works for a Date type and bigint', () => { + assert.equal(toPrimitive('TIMESTAMP_MICROS', date), 1669852801000000n); + assert.equal(toPrimitive('TIMESTAMP_MICROS', '9999999999999999999999'), 9999999999999999999999n); + assert.equal(toPrimitive('TIMESTAMP_MICROS', 98989898n), 98989898n); + }); + it('TIME_MICROS works for a bigint', () => { + const timestampBigint = 1932733334490741n; + assert.equal(toPrimitive('TIME_MICROS', timestampBigint), 1932733334490741); + assert.equal(toPrimitive('TIME_MICROS', '9999999999999999999999'), 9999999999999999999999n); + assert.equal(toPrimitive('TIME_MICROS', 9999999999999999999999n), 9999999999999999999999n); + }); + }); +}); diff --git a/test/util/assert_util.js b/test/util/assert_util.js index 723917dd..12ace15f 100644 --- a/test/util/assert_util.js +++ b/test/util/assert_util.js @@ -4,7 +4,7 @@ const assert = chai.assert; const EPSILON_DEFAULT = 0.01; -exports.assertArrayEqualEpsilon = function(a, b, e) { +exports.assertArrayEqualEpsilon = function (a, b, e) { if (!e) { e = EPSILON_DEFAULT; } @@ -13,4 +13,4 @@ exports.assertArrayEqualEpsilon = function(a, b, e) { for (let i = 0; i < a.length; ++i) { assert(Math.abs(a[i] - b[i]) < e); } -} +}; diff --git a/tsconfig.json b/tsconfig.json index 7842007a..3eb589d3 100644 --- a/tsconfig.json +++ b/tsconfig.json @@ -12,11 +12,8 @@ "sourceMap": false, "strict": true, "target": "ESNext", - "typeRoots": [ - "node_modules/@types", - "gen-nodejs", - ] + "typeRoots": ["node_modules/@types", "gen-nodejs"] }, - "include": ["parquet.ts","lib/**/*", "gen-nodejs/*", "browser/*.ts"], + "include": ["parquet.ts", "lib/**/*", "gen-nodejs/*", "browser/*.ts"], "files": ["lib/custom.d.ts"] } From 0e5d8c6b42991b35a484f2ab43130000cac3d0e2 Mon Sep 17 00:00:00 2001 From: Wil Wade Date: Mon, 1 Jul 2024 09:44:01 -0400 Subject: [PATCH 4/5] Manual fixes outside of tests Manual Type fixes --- esbuild-plugins.js | 2 +- lib/bloom/sbbf.ts | 4 ++-- lib/bloom/xxhasher.ts | 2 -- lib/compression.ts | 5 ----- lib/declare.ts | 5 ----- lib/reader.ts | 25 ++++++++++++++----------- lib/schema.ts | 2 -- lib/util.ts | 1 - lib/writer.ts | 34 ++++++++++++++++++++-------------- 9 files changed, 37 insertions(+), 43 deletions(-) diff --git a/esbuild-plugins.js b/esbuild-plugins.js index ef4f3932..a9caefc0 100644 --- a/esbuild-plugins.js +++ b/esbuild-plugins.js @@ -6,7 +6,7 @@ const compressionBrowserPlugin = { name: 'compressionBrowser', setup(build) { let path = require('path'); - build.onResolve({ filter: /^\.\/compression$/ }, (args) => { + build.onResolve({ filter: /^\.\/compression$/ }, (_args) => { return { path: path.resolve(__dirname, 'lib', 'browser', 'compression.js'), }; diff --git a/lib/bloom/sbbf.ts b/lib/bloom/sbbf.ts index 0eb386a5..902d5064 100644 --- a/lib/bloom/sbbf.ts +++ b/lib/bloom/sbbf.ts @@ -343,7 +343,7 @@ class SplitBlockBloomFilter { return this; } - if (!this.hashStrategy.hasOwnProperty('XXHASH')) { + if (!Object.prototype.hasOwnProperty.call(this.hashStrategy, 'XXHASH')) { throw new Error('unsupported hash strategy'); } @@ -357,7 +357,7 @@ class SplitBlockBloomFilter { } async hash(value: any): Promise { - if (!this.hashStrategy.hasOwnProperty('XXHASH')) { + if (!Object.prototype.hasOwnProperty.call(this.hashStrategy, 'XXHASH')) { throw new Error('unsupported hash strategy'); } const hashed = await this.hasher.hash64(value); diff --git a/lib/bloom/xxhasher.ts b/lib/bloom/xxhasher.ts index 392baa74..2ddba44b 100644 --- a/lib/bloom/xxhasher.ts +++ b/lib/bloom/xxhasher.ts @@ -1,8 +1,6 @@ import xxhash from 'xxhash-wasm'; import Long from 'long'; -type HasherFunc = (input: string, seedHigh?: number, seedLow?: number) => string; - /** * @class XxHasher * diff --git a/lib/compression.ts b/lib/compression.ts index 1d19eb08..cbfe2452 100644 --- a/lib/compression.ts +++ b/lib/compression.ts @@ -2,11 +2,6 @@ import zlib from 'zlib'; import snappy from 'snappyjs'; import { compress as brotliCompress, decompress as brotliDecompress } from 'brotli-wasm'; -type d_identity = (value: ArrayBuffer | Buffer | Uint8Array) => ArrayBuffer | Buffer | Uint8Array; -type d_gzip = (value: ArrayBuffer | Buffer | string) => Buffer; -type d_snappy = (value: ArrayBuffer | Buffer | Uint8Array) => ArrayBuffer | Buffer | Uint8Array; -type d_brotli = (value: Uint8Array) => Promise; - interface PARQUET_COMPRESSION_METHODS { [key: string]: { deflate: (value: any) => Buffer | Promise; diff --git a/lib/declare.ts b/lib/declare.ts index 1f2e64e2..9bc57c50 100644 --- a/lib/declare.ts +++ b/lib/declare.ts @@ -10,8 +10,6 @@ import { DataPageHeaderV2, DictionaryPageHeader, IndexPageHeader, - Type, - ColumnMetaData, } from '../gen-nodejs/parquet_types'; import SplitBlockBloomFilter from './bloom/sbbf'; import { createSBBFParams } from './bloomFilterIO/bloomFilterWriter'; @@ -214,9 +212,6 @@ export interface FileMetaDataExt extends parquet_thrift.FileMetaData { export class NewPageHeader extends parquet_thrift.PageHeader { offset?: number; headerSize?: number; - constructor() { - super(); - } } export type WriterOptions = { diff --git a/lib/reader.ts b/lib/reader.ts index 7a1c2630..ccc845d6 100644 --- a/lib/reader.ts +++ b/lib/reader.ts @@ -337,10 +337,11 @@ export class ParquetReader { if (typeof value === 'object') { for (let k in value) { if (value[k] instanceof Date) { - value[k].toJSON = () => ({ - parquetType: 'CTIME', - value: value[k].valueOf(), - }); + value[k].toJSON = () => + JSON.stringify({ + parquetType: 'CTIME', + value: value[k].valueOf(), + }); } } } @@ -407,7 +408,7 @@ export class ParquetEnvelopeReader { readFn: (offset: number, length: number, file?: string) => Promise; close: () => unknown; id: number; - fileSize: Function | number; + fileSize: number | (() => Promise); default_dictionary_size: number; metadata?: FileMetaDataExt; schema?: parquet_schema.ParquetSchema; @@ -467,7 +468,10 @@ export class ParquetEnvelopeReader { static async openS3v3(client: S3Client, params: any, options: any) { const fileStat = async () => { try { - let headObjectCommand = await client.send(new HeadObjectCommand(params)); + const headObjectCommand = await client.send(new HeadObjectCommand(params)); + if (headObjectCommand.ContentLength === undefined) { + throw new Error('Content Length is undefined!'); + } return Promise.resolve(headObjectCommand.ContentLength); } catch (e: any) { // having params match command names makes e.message clear to user @@ -526,9 +530,9 @@ export class ParquetEnvelopeReader { let defaultHeaders = params.headers || {}; - let filesize = async () => { + const filesize = async (): Promise => { const { headers } = await fetch(params.url); - return headers.get('Content-Length'); + return Number(headers.get('Content-Length')) || 0; }; let readFn = async (offset: number, length: number, file?: string) => { @@ -550,7 +554,7 @@ export class ParquetEnvelopeReader { constructor( readFn: (offset: number, length: number, file?: string) => Promise, closeFn: () => unknown, - fileSize: Function | number, + fileSize: number | (() => Promise), options?: BufferReaderOptions, metadata?: FileMetaDataExt ) { @@ -884,9 +888,8 @@ async function decodePage(cursor: Cursor, opts: Options): Promise { page = await decodeDataPageV2(cursor, pageHeader, opts); break; case 'DICTIONARY_PAGE': - const dict = await decodeDictionaryPage(cursor, pageHeader, opts); page = { - dictionary: dict, + dictionary: await decodeDictionaryPage(cursor, pageHeader, opts), }; break; default: diff --git a/lib/schema.ts b/lib/schema.ts index 5671bea9..efb7f059 100644 --- a/lib/schema.ts +++ b/lib/schema.ts @@ -5,8 +5,6 @@ import { SchemaDefinition, ParquetField, RepetitionType, FieldDefinition } from import { JSONSchema4 } from 'json-schema'; import { fromJsonSchema } from './jsonSchema'; -const PARQUET_COLUMN_KEY_SEPARATOR = '.'; - /** * A parquet file schema */ diff --git a/lib/util.ts b/lib/util.ts index 0cf45453..9aaacc09 100644 --- a/lib/util.ts +++ b/lib/util.ts @@ -4,7 +4,6 @@ import fs, { WriteStream } from 'fs'; import * as parquet_thrift from '../gen-nodejs/parquet_types'; import { FileMetaDataExt, WriterOptions } from './declare'; import { Int64 } from 'thrift'; -import { type } from 'os'; // Use this so users only need to implement the minimal amount of the WriteStream interface export type WriteStreamMinimal = Pick; diff --git a/lib/writer.ts b/lib/writer.ts index ac005162..8aebe400 100644 --- a/lib/writer.ts +++ b/lib/writer.ts @@ -6,15 +6,7 @@ import * as parquet_codec from './codec'; import * as parquet_compression from './compression'; import * as parquet_types from './types'; import * as bloomFilterWriter from './bloomFilterIO/bloomFilterWriter'; -import { - WriterOptions, - ParquetCodec, - ParquetField, - ColumnMetaDataExt, - RowGroupExt, - Page, - FieldDefinition, -} from './declare'; +import { WriterOptions, ParquetCodec, ParquetField, ColumnMetaDataExt, RowGroupExt, Page } from './declare'; import { Options } from './codec/types'; import { ParquetSchema } from './schema'; import Int64 from 'node-int64'; @@ -193,8 +185,8 @@ export class ParquetWriter { */ export class ParquetEnvelopeWriter { schema: ParquetSchema; - write: Function; - close: Function; + write: (buf: Buffer) => void; + close: () => void; offset: Int64; rowCount: Int64; rowGroups: RowGroupExt[]; @@ -212,7 +204,13 @@ export class ParquetEnvelopeWriter { return new ParquetEnvelopeWriter(schema, writeFn, closeFn, new Int64(0), opts); } - constructor(schema: ParquetSchema, writeFn: Function, closeFn: Function, fileOffset: Int64, opts: WriterOptions) { + constructor( + schema: ParquetSchema, + writeFn: (buf: Buffer) => void, + closeFn: () => void, + fileOffset: Int64, + opts: WriterOptions + ) { this.schema = schema; this.write = writeFn; this.close = closeFn; @@ -347,12 +345,20 @@ export class ParquetTransformer extends stream.Transform { this.writer = new ParquetWriter( schema, - new ParquetEnvelopeWriter(schema, writeProxy, function () {}, new Int64(0), opts), + new ParquetEnvelopeWriter( + schema, + writeProxy, + () => { + /* void */ + }, + new Int64(0), + opts + ), opts ); } - _transform(row: Record, _encoding: string, callback: Function) { + _transform(row: Record, _encoding: string, callback: (err?: Error | null, data?: any) => void) { if (row) { this.writer.appendRow(row).then( (data) => callback(null, data), From 91ae9ace3c1bf4892502e2d5d7357a2146e4644c Mon Sep 17 00:00:00 2001 From: Wil Wade Date: Mon, 1 Jul 2024 09:44:17 -0400 Subject: [PATCH 5/5] Eslint fix and manual fixes in tests --- lib/bloom/sbbf.ts | 16 +-- lib/bloomFilterIO/bloomFilterReader.ts | 20 ++-- lib/bloomFilterIO/bloomFilterWriter.ts | 12 +- lib/bufferReader.ts | 4 +- lib/codec/plain.ts | 72 ++++++------ lib/codec/rle.ts | 14 +-- lib/codec/types.ts | 2 +- lib/compression.ts | 9 +- lib/declare.ts | 18 ++- lib/reader.ts | 149 +++++++++++++------------ lib/schema.ts | 29 ++--- lib/shred.ts | 38 +++---- lib/types.ts | 18 +-- lib/util.ts | 28 ++--- lib/writer.ts | 82 +++++++------- test/bloomFilterIntegration.ts | 59 +++------- test/bloomFilterReader.test.ts | 15 +-- test/bloomFilterWriter.test.ts | 41 +++---- test/browser/main.ts | 8 +- test/dictionary.js | 2 +- test/integration.js | 10 +- test/jsonSchema.test.ts | 104 ++++++++--------- test/lib/bufferReader.test.js | 29 ++--- test/list.js | 70 ++++++------ test/metadata-cache.js | 4 +- test/reader.js | 31 ++--- test/reference-test/read-all.test.ts | 1 + test/sbbf.ts | 31 +++-- test/statistics.js | 2 +- test/thrift.js | 1 - test/types.js | 139 ++++++++++++----------- 31 files changed, 525 insertions(+), 533 deletions(-) diff --git a/lib/bloom/sbbf.ts b/lib/bloom/sbbf.ts index 902d5064..ca98baec 100644 --- a/lib/bloom/sbbf.ts +++ b/lib/bloom/sbbf.ts @@ -31,7 +31,7 @@ import { Block } from '../declare'; * .init() */ class SplitBlockBloomFilter { - private static readonly salt: Array = [ + private static readonly salt: number[] = [ 0x47b6137b, 0x44974d91, 0x8824ad5b, 0xa2b7289d, 0x705495c7, 0x2df1424b, 0x9efc4947, 0x5c6bfb31, ]; @@ -73,19 +73,19 @@ class SplitBlockBloomFilter { * from the provided Buffer * @param buffer a NodeJs Buffer containing bloom filter data for a row group. */ - static from(buffer: Buffer, rowCount?: number): SplitBlockBloomFilter { + static from(buffer: Buffer, _rowCount?: number): SplitBlockBloomFilter { if (buffer.length === 0) { throw new Error('buffer is empty'); } const chunkSize = SplitBlockBloomFilter.WORDS_PER_BLOCK; const uint32sFromBuf = new Uint32Array(buffer.buffer); - let result: Array = []; + const result: Block[] = []; const length = uint32sFromBuf.length; for (let index = 0; index < length; index += chunkSize) { result.push(uint32sFromBuf.subarray(index, index + chunkSize)); } - let sb = new SplitBlockBloomFilter(); + const sb = new SplitBlockBloomFilter(); sb.splitBlockFilter = result; sb.numBlocks = result.length; // these will not be knowable when reading @@ -151,7 +151,7 @@ class SplitBlockBloomFilter { * @return mask Block */ static mask(hashValue: Long): Block { - let result: Block = SplitBlockBloomFilter.initBlock(); + const result: Block = SplitBlockBloomFilter.initBlock(); for (let i = 0; i < result.length; i++) { const y = hashValue.getLowBitsUnsigned() * SplitBlockBloomFilter.salt[i]; result[i] = result[i] | (1 << (y >>> 27)); @@ -210,9 +210,9 @@ class SplitBlockBloomFilter { * Instance */ - private splitBlockFilter: Array = []; + private splitBlockFilter: Block[] = []; private desiredFalsePositiveRate: number = SplitBlockBloomFilter.DEFAULT_FALSE_POSITIVE_RATE; - private numBlocks: number = 0; + private numBlocks = 0; private numDistinctValues: number = SplitBlockBloomFilter.DEFAULT_DISTINCT_VALUES; private hashStrategy = new parquet_thrift.BloomFilterHash(new parquet_thrift.XxHash()); private hasher = new XxHasher(); @@ -230,7 +230,7 @@ class SplitBlockBloomFilter { getNumFilterBlocks(): number { return this.splitBlockFilter.length; } - getFilter(): Array { + getFilter(): Block[] { return this.splitBlockFilter; } diff --git a/lib/bloomFilterIO/bloomFilterReader.ts b/lib/bloomFilterIO/bloomFilterReader.ts index a63a89c1..5b386397 100644 --- a/lib/bloomFilterIO/bloomFilterReader.ts +++ b/lib/bloomFilterIO/bloomFilterReader.ts @@ -4,17 +4,17 @@ import sbbf from '../bloom/sbbf'; import { ParquetEnvelopeReader } from '../reader'; import { ColumnChunkData } from '../declare'; -const filterColumnChunksWithBloomFilters = (columnChunkDataCollection: Array) => { +const filterColumnChunksWithBloomFilters = (columnChunkDataCollection: ColumnChunkData[]) => { return columnChunkDataCollection.filter((columnChunk) => { return columnChunk.column.meta_data?.bloom_filter_offset; }); }; -type bloomFilterOffsetData = { +interface bloomFilterOffsetData { columnName: string; offsetBytes: number; rowGroupIndex: number; -}; +} const toInteger = (buffer: Buffer) => { const integer = parseInt(buffer.toString('hex'), 16); @@ -26,9 +26,7 @@ const toInteger = (buffer: Buffer) => { return integer; }; -export const parseBloomFilterOffsets = ( - ColumnChunkDataCollection: Array -): Array => { +export const parseBloomFilterOffsets = (ColumnChunkDataCollection: ColumnChunkData[]): bloomFilterOffsetData[] => { return ColumnChunkDataCollection.map(({ rowGroupIndex, column }) => { const { bloom_filter_offset: bloomOffset, path_in_schema: pathInSchema } = column.meta_data || {}; @@ -83,25 +81,25 @@ const readFilterData = async ( }; const readFilterDataFrom = ( - offsets: Array, + offsets: number[], envelopeReader: InstanceType -): Promise> => { +): Promise => { return Promise.all(offsets.map((offset) => readFilterData(offset, envelopeReader))); }; -export const siftAllByteOffsets = (columnChunkDataCollection: Array): Array => { +export const siftAllByteOffsets = (columnChunkDataCollection: ColumnChunkData[]): bloomFilterOffsetData[] => { return parseBloomFilterOffsets(filterColumnChunksWithBloomFilters(columnChunkDataCollection)); }; export const getBloomFiltersFor = async ( - paths: Array, + paths: string[], envelopeReader: InstanceType ) => { const columnChunkDataCollection = envelopeReader.getAllColumnChunkDataFor(paths); const bloomFilterOffsetData = siftAllByteOffsets(columnChunkDataCollection); const offsetByteValues = bloomFilterOffsetData.map(({ offsetBytes }) => offsetBytes); - const filterBlocksBuffers: Array = await readFilterDataFrom(offsetByteValues, envelopeReader); + const filterBlocksBuffers: Buffer[] = await readFilterDataFrom(offsetByteValues, envelopeReader); return filterBlocksBuffers.map((buffer, index) => { const { columnName, rowGroupIndex } = bloomFilterOffsetData[index]; diff --git a/lib/bloomFilterIO/bloomFilterWriter.ts b/lib/bloomFilterIO/bloomFilterWriter.ts index c6ac0de1..02f65399 100644 --- a/lib/bloomFilterIO/bloomFilterWriter.ts +++ b/lib/bloomFilterIO/bloomFilterWriter.ts @@ -5,12 +5,12 @@ import SplitBlockBloomFilter from '../bloom/sbbf'; import { Block } from '../declare'; import Int64 from 'node-int64'; -export type createSBBFParams = { +export interface createSBBFParams { numFilterBytes?: number; falsePositiveRate?: number; numDistinct?: number; column?: any; -}; +} export const createSBBF = (params: createSBBFParams): SplitBlockBloomFilter => { const { numFilterBytes, falsePositiveRate, numDistinct } = params; @@ -30,7 +30,7 @@ export const createSBBF = (params: createSBBFParams): SplitBlockBloomFilter => { return bloomFilter.init(); }; -const serializeFilterBlocks = (blocks: Array): Buffer => +const serializeFilterBlocks = (blocks: Block[]): Buffer => Buffer.concat(blocks.map((block) => Buffer.from(block.buffer))); const buildFilterHeader = (numBytes: number) => { @@ -49,10 +49,10 @@ export const serializeFilterHeaders = (numberOfBytes: number) => { return parquet_util.serializeThrift(bloomFilterHeader); }; -type serializeFilterDataParams = { - filterBlocks: Array; +interface serializeFilterDataParams { + filterBlocks: Block[]; filterByteSize: number; -}; +} export const serializeFilterData = (params: serializeFilterDataParams) => { const serializedFilterBlocks = serializeFilterBlocks(params.filterBlocks); diff --git a/lib/bufferReader.ts b/lib/bufferReader.ts index a29a3823..fc12dee6 100644 --- a/lib/bufferReader.ts +++ b/lib/bufferReader.ts @@ -23,7 +23,7 @@ export default class BufferReader { maxLength: number; queueWait: number; scheduled?: boolean; - queue: Array; + queue: BufferReaderQueueRow[]; envelopeReader: ParquetEnvelopeReader; constructor(envelopeReader: ParquetEnvelopeReader, options: BufferReaderOptions) { @@ -56,7 +56,7 @@ export default class BufferReader { this.queue = []; queue.sort((a, b) => a.offset - b.offset); - var subqueue: Array = []; + let subqueue: BufferReaderQueueRow[] = []; const readSubqueue = async () => { if (!subqueue.length) { diff --git a/lib/codec/plain.ts b/lib/codec/plain.ts index d950cf9c..9d9364d6 100644 --- a/lib/codec/plain.ts +++ b/lib/codec/plain.ts @@ -1,8 +1,8 @@ import INT53 from 'int53'; import { Cursor, Options } from './types'; -function encodeValues_BOOLEAN(values: Array) { - let buf = Buffer.alloc(Math.ceil(values.length / 8)); +function encodeValues_BOOLEAN(values: boolean[]) { + const buf = Buffer.alloc(Math.ceil(values.length / 8)); buf.fill(0); for (let i = 0; i < values.length; ++i) { @@ -15,10 +15,10 @@ function encodeValues_BOOLEAN(values: Array) { } function decodeValues_BOOLEAN(cursor: Cursor, count: number) { - let values = []; + const values = []; for (let i = 0; i < count; ++i) { - let b = cursor.buffer[cursor.offset + Math.floor(i / 8)]; + const b = cursor.buffer[cursor.offset + Math.floor(i / 8)]; values.push((b & (1 << i % 8)) > 0); } @@ -26,10 +26,10 @@ function decodeValues_BOOLEAN(cursor: Cursor, count: number) { return values; } -function encodeValues_INT32(values: Array, opts: Options) { +function encodeValues_INT32(values: number[], opts: Options) { const isDecimal = opts?.originalType === 'DECIMAL' || opts?.column?.originalType === 'DECIMAL'; const scale = opts?.scale || 0; - let buf = Buffer.alloc(4 * values.length); + const buf = Buffer.alloc(4 * values.length); for (let i = 0; i < values.length; i++) { if (isDecimal) { buf.writeInt32LE(values[i] * Math.pow(10, scale), i * 4); @@ -61,10 +61,10 @@ function decodeValues_INT32(cursor: Cursor, count: number, opts: Options) { return values; } -function encodeValues_INT64(values: Array, opts: Options) { +function encodeValues_INT64(values: number[], opts: Options) { const isDecimal = opts?.originalType === 'DECIMAL' || opts?.column?.originalType === 'DECIMAL'; const scale = opts?.scale || 0; - let buf = Buffer.alloc(8 * values.length); + const buf = Buffer.alloc(8 * values.length); for (let i = 0; i < values.length; i++) { if (isDecimal) { buf.writeBigInt64LE(BigInt(Math.floor(values[i] * Math.pow(10, scale))), i * 8); @@ -81,7 +81,7 @@ function decodeValues_INT64(cursor: Cursor, count: number, opts: Options) { const name = opts.name || opts.column?.name || undefined; try { if (opts.originalType === 'DECIMAL' || opts.column?.originalType === 'DECIMAL') { - let columnOptions: any = opts.column?.originalType ? opts.column : opts; + const columnOptions: any = opts.column?.originalType ? opts.column : opts; values = decodeValues_DECIMAL(cursor, count, columnOptions); } else { for (let i = 0; i < count; ++i) { @@ -107,7 +107,7 @@ function decodeValues_DECIMAL(cursor: Cursor, count: number, opts: Options) { throw `missing option: precision (required for DECIMAL) for column: ${name}`; } - let values = []; + const values = []; // by default we prepare the offset and bufferFunction to work with 32bit integers let offset = 4; @@ -130,8 +130,8 @@ function decodeValues_DECIMAL(cursor: Cursor, count: number, opts: Options) { return values; } -function encodeValues_INT96(values: Array) { - let buf = Buffer.alloc(12 * values.length); +function encodeValues_INT96(values: number[]) { + const buf = Buffer.alloc(12 * values.length); for (let i = 0; i < values.length; i++) { if (values[i] >= 0) { @@ -147,7 +147,7 @@ function encodeValues_INT96(values: Array) { } function decodeValues_INT96(cursor: Cursor, count: number) { - let values = []; + const values = []; for (let i = 0; i < count; ++i) { const low = INT53.readInt64LE(cursor.buffer, cursor.offset); @@ -165,8 +165,8 @@ function decodeValues_INT96(cursor: Cursor, count: number) { return values; } -function encodeValues_FLOAT(values: Array) { - let buf = Buffer.alloc(4 * values.length); +function encodeValues_FLOAT(values: number[]) { + const buf = Buffer.alloc(4 * values.length); for (let i = 0; i < values.length; i++) { buf.writeFloatLE(values[i], i * 4); } @@ -175,7 +175,7 @@ function encodeValues_FLOAT(values: Array) { } function decodeValues_FLOAT(cursor: Cursor, count: number) { - let values = []; + const values = []; for (let i = 0; i < count; ++i) { values.push(cursor.buffer.readFloatLE(cursor.offset)); @@ -185,8 +185,8 @@ function decodeValues_FLOAT(cursor: Cursor, count: number) { return values; } -function encodeValues_DOUBLE(values: Array) { - let buf = Buffer.alloc(8 * values.length); +function encodeValues_DOUBLE(values: number[]) { + const buf = Buffer.alloc(8 * values.length); for (let i = 0; i < values.length; i++) { buf.writeDoubleLE(values[i], i * 8); } @@ -195,7 +195,7 @@ function encodeValues_DOUBLE(values: Array) { } function decodeValues_DOUBLE(cursor: Cursor, count: number) { - let values = []; + const values = []; for (let i = 0; i < count; ++i) { values.push(cursor.buffer.readDoubleLE(cursor.offset)); @@ -205,15 +205,15 @@ function decodeValues_DOUBLE(cursor: Cursor, count: number) { return values; } -function encodeValues_BYTE_ARRAY(values: Array) { +function encodeValues_BYTE_ARRAY(values: Uint8Array[]) { let buf_len = 0; - const returnedValues: Array = []; + const returnedValues: Buffer[] = []; for (let i = 0; i < values.length; i++) { returnedValues[i] = Buffer.from(values[i]); buf_len += 4 + returnedValues[i].length; } - let buf = Buffer.alloc(buf_len); + const buf = Buffer.alloc(buf_len); let buf_pos = 0; for (let i = 0; i < returnedValues.length; i++) { buf.writeUInt32LE(returnedValues[i].length, buf_pos); @@ -225,10 +225,10 @@ function encodeValues_BYTE_ARRAY(values: Array) { } function decodeValues_BYTE_ARRAY(cursor: Cursor, count: number) { - let values = []; + const values = []; for (let i = 0; i < count; ++i) { - let len = cursor.buffer.readUInt32LE(cursor.offset); + const len = cursor.buffer.readUInt32LE(cursor.offset); cursor.offset += 4; values.push(cursor.buffer.subarray(cursor.offset, cursor.offset + len)); cursor.offset += len; @@ -237,12 +237,12 @@ function decodeValues_BYTE_ARRAY(cursor: Cursor, count: number) { return values; } -function encodeValues_FIXED_LEN_BYTE_ARRAY(values: Array, opts: Options) { +function encodeValues_FIXED_LEN_BYTE_ARRAY(values: Uint8Array[], opts: Options) { if (!opts.typeLength) { throw 'missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)'; } - const returnedValues: Array = []; + const returnedValues: Buffer[] = []; for (let i = 0; i < values.length; i++) { returnedValues[i] = Buffer.from(values[i]); @@ -255,7 +255,7 @@ function encodeValues_FIXED_LEN_BYTE_ARRAY(values: Array, opts: Opti } function decodeValues_FIXED_LEN_BYTE_ARRAY(cursor: Cursor, count: number, opts: Options) { - let values = []; + const values = []; const typeLength = opts.typeLength ?? (opts.column ? opts.column.typeLength : undefined); if (!typeLength) { throw 'missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)'; @@ -279,31 +279,31 @@ type ValidValueTypes = | 'BYTE_ARRAY' | 'FIXED_LEN_BYTE_ARRAY'; -export const encodeValues = function (type: ValidValueTypes | string, values: Array, opts: Options) { +export const encodeValues = function (type: ValidValueTypes | string, values: unknown[], opts: Options) { switch (type) { case 'BOOLEAN': - return encodeValues_BOOLEAN(values as Array); + return encodeValues_BOOLEAN(values as boolean[]); case 'INT32': - return encodeValues_INT32(values as Array, opts); + return encodeValues_INT32(values as number[], opts); case 'INT64': - return encodeValues_INT64(values as Array, opts); + return encodeValues_INT64(values as number[], opts); case 'INT96': - return encodeValues_INT96(values as Array); + return encodeValues_INT96(values as number[]); case 'FLOAT': - return encodeValues_FLOAT(values as Array); + return encodeValues_FLOAT(values as number[]); case 'DOUBLE': - return encodeValues_DOUBLE(values as Array); + return encodeValues_DOUBLE(values as number[]); case 'BYTE_ARRAY': - return encodeValues_BYTE_ARRAY(values as Array); + return encodeValues_BYTE_ARRAY(values as Uint8Array[]); case 'FIXED_LEN_BYTE_ARRAY': - return encodeValues_FIXED_LEN_BYTE_ARRAY(values as Array, opts); + return encodeValues_FIXED_LEN_BYTE_ARRAY(values as Uint8Array[], opts); default: throw 'unsupported type: ' + type; diff --git a/lib/codec/rle.ts b/lib/codec/rle.ts index 6c2f5d87..63ed755f 100644 --- a/lib/codec/rle.ts +++ b/lib/codec/rle.ts @@ -5,12 +5,12 @@ import varint from 'varint'; import { Cursor } from './types'; -function encodeRunBitpacked(values: Array, opts: { bitWidth: number }) { +function encodeRunBitpacked(values: number[], opts: { bitWidth: number }) { for (let i = 0; i < values.length % 8; i++) { values.push(0); } - let buf = Buffer.alloc(Math.ceil(opts.bitWidth * (values.length / 8))); + const buf = Buffer.alloc(Math.ceil(opts.bitWidth * (values.length / 8))); for (let b = 0; b < opts.bitWidth * values.length; ++b) { if ((values[Math.floor(b / opts.bitWidth)] & (1 << b % opts.bitWidth)) > 0) { buf[Math.floor(b / 8)] |= 1 << b % 8; @@ -21,7 +21,7 @@ function encodeRunBitpacked(values: Array, opts: { bitWidth: number }) { } function encodeRunRepeated(value: number, count: number, opts: { bitWidth: number }) { - let buf = Buffer.alloc(Math.ceil(opts.bitWidth / 8)); + const buf = Buffer.alloc(Math.ceil(opts.bitWidth / 8)); let remainingValue = value; // This is encoded LSB to MSB, so we pick off the least @@ -44,7 +44,7 @@ function unknownToParsedInt(value: string | number) { export const encodeValues = function ( type: string, - values: Array, + values: number[], opts: { bitWidth: number; disableEnvelope?: boolean } ) { if (!('bitWidth' in opts)) { @@ -98,7 +98,7 @@ export const encodeValues = function ( return buf; } - let envelope = Buffer.alloc(buf.length + 4); + const envelope = Buffer.alloc(buf.length + 4); envelope.writeUInt32LE(buf.length); buf.copy(envelope, 4); @@ -110,7 +110,7 @@ function decodeRunBitpacked(cursor: Cursor, count: number, opts: { bitWidth: num throw 'must be a multiple of 8'; } - let values = new Array(count).fill(0); + const values = new Array(count).fill(0); for (let b = 0; b < opts.bitWidth * count; ++b) { if (cursor.buffer[cursor.offset + Math.floor(b / 8)] & (1 << b % 8)) { values[Math.floor(b / opts.bitWidth)] |= 1 << b % opts.bitWidth; @@ -122,7 +122,7 @@ function decodeRunBitpacked(cursor: Cursor, count: number, opts: { bitWidth: num } function decodeRunRepeated(cursor: Cursor, count: number, opts: { bitWidth: number }) { - var bytesNeededForFixedBitWidth = Math.ceil(opts.bitWidth / 8); + const bytesNeededForFixedBitWidth = Math.ceil(opts.bitWidth / 8); let value = 0; for (let i = 0; i < bytesNeededForFixedBitWidth; ++i) { diff --git a/lib/codec/types.ts b/lib/codec/types.ts index e91e5315..7334fd4d 100644 --- a/lib/codec/types.ts +++ b/lib/codec/types.ts @@ -13,7 +13,7 @@ export interface Options { column?: ParquetField; rawStatistics?: Statistics; cache?: unknown; - dictionary?: Array; + dictionary?: number[]; num_values?: number; rLevelMax?: number; dLevelMax?: number; diff --git a/lib/compression.ts b/lib/compression.ts index cbfe2452..1dd1367b 100644 --- a/lib/compression.ts +++ b/lib/compression.ts @@ -2,12 +2,13 @@ import zlib from 'zlib'; import snappy from 'snappyjs'; import { compress as brotliCompress, decompress as brotliDecompress } from 'brotli-wasm'; -interface PARQUET_COMPRESSION_METHODS { - [key: string]: { +type PARQUET_COMPRESSION_METHODS = Record< + string, + { deflate: (value: any) => Buffer | Promise; inflate: (value: any) => Buffer | Promise; - }; -} + } +>; // LZO compression is disabled. See: https://github.com/LibertyDSNP/parquetjs/issues/18 export const PARQUET_COMPRESSION_METHODS: PARQUET_COMPRESSION_METHODS = { UNCOMPRESSED: { diff --git a/lib/declare.ts b/lib/declare.ts index 9bc57c50..049813e9 100644 --- a/lib/declare.ts +++ b/lib/declare.ts @@ -56,9 +56,7 @@ export type OriginalType = | 'BSON' // 20 | 'INTERVAL'; // 21 -export interface SchemaDefinition { - [string: string]: FieldDefinition; -} +export type SchemaDefinition = Record; export interface FieldDefinition { type?: ParquetType; @@ -110,9 +108,7 @@ export interface ParquetBuffer { columnData?: Record; } -export interface ParquetRecord { - [key: string]: any; -} +export type ParquetRecord = Record; export interface ColumnChunkData { rowGroupIndex: number; @@ -159,7 +155,7 @@ export interface PageData { pageHeaders?: PageHeader[]; pageHeader?: PageHeader; count?: number; - dictionary?: Array; + dictionary?: unknown[]; column?: parquet_thrift.ColumnChunk; useDictionary?: boolean; } @@ -214,7 +210,7 @@ export class NewPageHeader extends parquet_thrift.PageHeader { headerSize?: number; } -export type WriterOptions = { +export interface WriterOptions { pageIndex?: boolean; pageSize?: number; useDataPageV2?: boolean; @@ -229,13 +225,13 @@ export type WriterOptions = { emitClose?: boolean; start?: number; highWaterMark?: number; -}; +} -export type Page = { +export interface Page { page: Buffer; statistics: parquet_thrift.Statistics; first_row_index: number; distinct_values: Set; num_values: number; count?: number; -}; +} diff --git a/lib/reader.ts b/lib/reader.ts index ccc845d6..b71c54c8 100644 --- a/lib/reader.ts +++ b/lib/reader.ts @@ -54,8 +54,8 @@ class ParquetCursor { metadata: FileMetaDataExt; envelopeReader: ParquetEnvelopeReader; schema: parquet_schema.ParquetSchema; - columnList: Array>; - rowGroup: Array; + columnList: unknown[][]; + rowGroup: unknown[]; rowGroupIndex: number; cursorIndex: number; @@ -69,7 +69,7 @@ class ParquetCursor { metadata: FileMetaDataExt, envelopeReader: ParquetEnvelopeReader, schema: parquet_schema.ParquetSchema, - columnList: Array> + columnList: unknown[][] ) { this.metadata = metadata; this.envelopeReader = envelopeReader; @@ -90,7 +90,7 @@ class ParquetCursor { return null; } - let rowBuffer = await this.envelopeReader.readRowGroup( + const rowBuffer = await this.envelopeReader.readRowGroup( this.schema, this.metadata.row_groups[this.rowGroupIndex], this.columnList @@ -131,12 +131,12 @@ export class ParquetReader { * parquet reader */ static async openFile(filePath: string | Buffer | URL, options?: BufferReaderOptions) { - let envelopeReader = await ParquetEnvelopeReader.openFile(filePath, options); + const envelopeReader = await ParquetEnvelopeReader.openFile(filePath, options); return this.openEnvelopeReader(envelopeReader, options); } static async openBuffer(buffer: Buffer, options?: BufferReaderOptions) { - let envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer, options); + const envelopeReader = await ParquetEnvelopeReader.openBuffer(buffer, options); return this.openEnvelopeReader(envelopeReader, options); } @@ -150,7 +150,7 @@ export class ParquetReader { */ static async openS3(client: any, params: ClientParameters, options?: BufferReaderOptions) { try { - let envelopeReader: ParquetEnvelopeReader = + const envelopeReader: ParquetEnvelopeReader = 'function' === typeof client['headObject'] ? await ParquetEnvelopeReader.openS3(client as ClientS3, params, options) // S3 client v2 : await ParquetEnvelopeReader.openS3v3(client as S3Client, params, options); // S3 client v3 @@ -167,7 +167,7 @@ export class ParquetReader { * This function returns a new parquet reader */ static async openUrl(params: Parameter | URL | string, options?: BufferReaderOptions) { - let envelopeReader = await ParquetEnvelopeReader.openUrl(params, options); + const envelopeReader = await ParquetEnvelopeReader.openUrl(params, options); return this.openEnvelopeReader(envelopeReader, options); } @@ -178,7 +178,7 @@ export class ParquetReader { try { await envelopeReader.readHeader(); - let metadata = await envelopeReader.readFooter(); + const metadata = await envelopeReader.readFooter(); return new ParquetReader(metadata, envelopeReader, opts); } catch (err) { @@ -202,7 +202,7 @@ export class ParquetReader { // If metadata is a json file then we need to convert INT64 and CTIME if (metadata.json) { - const convert = (o: { [string: string]: any }) => { + const convert = (o: Record) => { if (o && typeof o === 'object') { Object.keys(o).forEach((key) => (o[key] = convert(o[key]))); if (o.parquetType === 'CTIME') { @@ -282,12 +282,12 @@ export class ParquetReader { * from disk. An empty array or no value implies all columns. A list of column * names means that only those columns should be loaded from disk. */ - getCursor(columnList?: Array>) { + getCursor(columnList?: unknown[][]) { if (!columnList) { columnList = []; } - columnList = columnList.map((x: Array) => (x.constructor === Array ? x : [x])); + columnList = columnList.map((x: unknown[]) => (x.constructor === Array ? x : [x])); return new ParquetCursor(this.metadata!, this.envelopeReader!, this.schema, columnList); } @@ -320,8 +320,8 @@ export class ParquetReader { * Returns the user (key/value) metadata for this file */ getMetadata() { - let md: Record = {}; - for (let kv of this.metadata!.key_value_metadata!) { + const md: Record = {}; + for (const kv of this.metadata!.key_value_metadata!) { md[kv.key] = kv.value; } @@ -329,13 +329,13 @@ export class ParquetReader { } async exportMetadata(indent: string | number | undefined) { - function replacer(_key: unknown, value: parquet_thrift.PageLocation | bigint | { [string: string]: any }) { + function replacer(_key: unknown, value: parquet_thrift.PageLocation | bigint | Record) { if (value instanceof parquet_thrift.PageLocation) { return [value.offset, value.compressed_page_size, value.first_row_index]; } if (typeof value === 'object') { - for (let k in value) { + for (const k in value) { if (value[k] instanceof Date) { value[k].toJSON = () => JSON.stringify({ @@ -414,10 +414,10 @@ export class ParquetEnvelopeReader { schema?: parquet_schema.ParquetSchema; static async openFile(filePath: string | Buffer | URL, options?: BufferReaderOptions) { - let fileStat = await parquet_util.fstat(filePath); - let fileDescriptor = await parquet_util.fopen(filePath); + const fileStat = await parquet_util.fstat(filePath); + const fileDescriptor = await parquet_util.fopen(filePath); - let readFn = (offset: number, length: number, file?: string) => { + const readFn = (offset: number, length: number, file?: string) => { if (file) { return Promise.reject('external references are not supported'); } @@ -425,13 +425,13 @@ export class ParquetEnvelopeReader { return parquet_util.fread(fileDescriptor, offset, length); }; - let closeFn = parquet_util.fclose.bind(undefined, fileDescriptor); + const closeFn = parquet_util.fclose.bind(undefined, fileDescriptor); return new ParquetEnvelopeReader(readFn, closeFn, fileStat.size, options); } static async openBuffer(buffer: Buffer, options?: BufferReaderOptions) { - let readFn = (offset: number, length: number, file?: string) => { + const readFn = (offset: number, length: number, file?: string) => { if (file) { return Promise.reject('external references are not supported'); } @@ -439,28 +439,28 @@ export class ParquetEnvelopeReader { return Promise.resolve(buffer.subarray(offset, offset + length)); }; - let closeFn = () => ({}); + const closeFn = () => ({}); return new ParquetEnvelopeReader(readFn, closeFn, buffer.length, options); } static async openS3(client: ClientS3, params: ClientParameters, options?: BufferReaderOptions) { - let fileStat = async () => + const fileStat = async () => client .headObject(params) .promise() .then((d: { ContentLength: number }) => d.ContentLength); - let readFn = async (offset: number, length: number, file?: string) => { + const readFn = async (offset: number, length: number, file?: string) => { if (file) { return Promise.reject('external references are not supported'); } - let Range = `bytes=${offset}-${offset + length - 1}`; - let res = await client.getObject(Object.assign({ Range }, params)).promise(); + const Range = `bytes=${offset}-${offset + length - 1}`; + const res = await client.getObject(Object.assign({ Range }, params)).promise(); return Promise.resolve(res.Body); }; - let closeFn = () => ({}); + const closeFn = () => ({}); return new ParquetEnvelopeReader(readFn, closeFn, fileStat, options); } @@ -494,7 +494,7 @@ export class ParquetEnvelopeReader { return Buffer.of(); }; - let closeFn = () => ({}); + const closeFn = () => ({}); return new ParquetEnvelopeReader(readFn, closeFn, fileStat, options); } @@ -528,17 +528,17 @@ export class ParquetEnvelopeReader { const baseArr = params.url.split('/'); const base = baseArr.slice(0, baseArr.length - 1).join('/') + '/'; - let defaultHeaders = params.headers || {}; + const defaultHeaders = params.headers || {}; const filesize = async (): Promise => { const { headers } = await fetch(params.url); return Number(headers.get('Content-Length')) || 0; }; - let readFn = async (offset: number, length: number, file?: string) => { - let url = file ? base + file : params.url; - let range = `bytes=${offset}-${offset + length - 1}`; - let headers = Object.assign({}, defaultHeaders, { range }); + const readFn = async (offset: number, length: number, file?: string) => { + const url = file ? base + file : params.url; + const range = `bytes=${offset}-${offset + length - 1}`; + const headers = Object.assign({}, defaultHeaders, { range }); const response = await fetch(url, { headers }); const arrayBuffer = await response.arrayBuffer(); const buffer = Buffer.from(arrayBuffer); @@ -546,7 +546,7 @@ export class ParquetEnvelopeReader { return buffer; }; - let closeFn = () => ({}); + const closeFn = () => ({}); return new ParquetEnvelopeReader(readFn, closeFn, filesize, options); } @@ -611,7 +611,7 @@ export class ParquetEnvelopeReader { return column; } - getAllColumnChunkDataFor(paths: Array, row_groups?: Array) { + getAllColumnChunkDataFor(paths: string[], row_groups?: RowGroupExt[]) { if (!row_groups) { row_groups = this.metadata!.row_groups; } @@ -629,7 +629,7 @@ export class ParquetEnvelopeReader { row_group: RowGroupExt | number | null, opts: Options ): Promise { - let column = this.getColumn(path, row_group); + const column = this.getColumn(path, row_group); if (column.offsetIndex) { return Promise.resolve(column.offsetIndex); } else if (!column.offset_index_offset || !column.offset_index_length) { @@ -637,7 +637,7 @@ export class ParquetEnvelopeReader { } const data = this.read(+column.offset_index_offset, column.offset_index_length).then((data: Buffer) => { - let offset_index = new parquet_thrift.OffsetIndex(); + const offset_index = new parquet_thrift.OffsetIndex(); parquet_util.decodeThrift(offset_index, data); Object.defineProperty(offset_index, 'column', { value: column, enumerable: false }); return offset_index; @@ -653,7 +653,7 @@ export class ParquetEnvelopeReader { row_group: RowGroupExt | number, opts: Options ): Promise { - let column = this.getColumn(path, row_group); + const column = this.getColumn(path, row_group); if (column.columnIndex) { return Promise.resolve(column.columnIndex); } else if (!column.column_index_offset) { @@ -661,7 +661,7 @@ export class ParquetEnvelopeReader { } const data = this.read(+column.column_index_offset, column.column_index_length as number).then((buf: Buffer) => { - let column_index = new parquet_thrift.ColumnIndex(); + const column_index = new parquet_thrift.ColumnIndex(); parquet_util.decodeThrift(column_index, buf); Object.defineProperty(column_index, 'column', { value: column }); @@ -685,7 +685,7 @@ export class ParquetEnvelopeReader { async readPage( column: ColumnChunkExt, page: parquet_thrift.PageLocation | number, - records: Array>, + records: Record[], opts: Options ) { column = Object.assign({}, column); @@ -706,22 +706,22 @@ export class ParquetEnvelopeReader { } const chunk = await this.readColumnChunk(this.schema!, column); Object.defineProperty(chunk, 'column', { value: column }); - let data = { + const data = { columnData: { [chunk.column!.meta_data!.path_in_schema.join(',')]: chunk }, }; return parquet_shredder.materializeRecords(this.schema!, data, records); } - async readRowGroup(schema: parquet_schema.ParquetSchema, rowGroup: RowGroupExt, columnList: Array>) { - var buffer: parquet_shredder.RecordBuffer = { + async readRowGroup(schema: parquet_schema.ParquetSchema, rowGroup: RowGroupExt, columnList: unknown[][]) { + const buffer: parquet_shredder.RecordBuffer = { rowCount: +rowGroup.num_rows, columnData: {}, pageRowCount: 0, pages: {}, }; - for (let colChunk of rowGroup.columns) { + for (const colChunk of rowGroup.columns) { const colMetadata = colChunk.meta_data; const colKey = colMetadata!.path_in_schema; @@ -736,13 +736,13 @@ export class ParquetEnvelopeReader { } async readColumnChunk(schema: parquet_schema.ParquetSchema, colChunk: ColumnChunkExt, opts?: Options) { - let metadata = colChunk.meta_data!; - let field = schema.findField(metadata.path_in_schema); - let type = parquet_util.getThriftEnum(parquet_thrift.Type, metadata.type); + const metadata = colChunk.meta_data!; + const field = schema.findField(metadata.path_in_schema); + const type = parquet_util.getThriftEnum(parquet_thrift.Type, metadata.type); - let compression = parquet_util.getThriftEnum(parquet_thrift.CompressionCodec, metadata.codec); + const compression = parquet_util.getThriftEnum(parquet_thrift.CompressionCodec, metadata.codec); - let pagesOffset = +metadata.data_page_offset; + const pagesOffset = +metadata.data_page_offset; let pagesSize = +metadata.total_compressed_size; if (!colChunk.file_path) { @@ -780,23 +780,23 @@ export class ParquetEnvelopeReader { this.fileSize = await this.fileSize(); } - let trailerLen = PARQUET_MAGIC.length + 4; + const trailerLen = PARQUET_MAGIC.length + 4; - let offset = (this.fileSize as number) - trailerLen; - let trailerBuf = await this.read(offset, trailerLen); + const offset = (this.fileSize as number) - trailerLen; + const trailerBuf = await this.read(offset, trailerLen); if (trailerBuf.subarray(4).toString() != PARQUET_MAGIC) { throw 'not a valid parquet file'; } - let metadataSize = trailerBuf.readUInt32LE(0); - let metadataOffset = (this.fileSize as number) - metadataSize - trailerLen; + const metadataSize = trailerBuf.readUInt32LE(0); + const metadataOffset = (this.fileSize as number) - metadataSize - trailerLen; if (metadataOffset < PARQUET_MAGIC.length) { throw 'invalid metadata size'; } - let metadataBuf = await this.read(metadataOffset, metadataSize); - let metadata = new parquet_thrift.FileMetaData(); + const metadataBuf = await this.read(metadataOffset, metadataSize); + const metadata = new parquet_thrift.FileMetaData(); parquet_util.decodeThrift(metadata, metadataBuf); return metadata; } @@ -905,13 +905,13 @@ async function decodePage(cursor: Cursor, opts: Options): Promise { async function decodePages(buffer: Buffer, opts: Options) { opts = opts || {}; - let cursor = { + const cursor = { buffer: buffer, offset: 0, size: buffer.length, }; - let data: PageData = { + const data: PageData = { rlevels: [], dlevels: [], values: [], @@ -935,12 +935,12 @@ async function decodePages(buffer: Buffer, opts: Options) { pageData.values = pageData.values!.map((d) => opts.dictionary![d]); } - let length = pageData.rlevels != undefined ? pageData.rlevels.length : 0; + const length = pageData.rlevels != undefined ? pageData.rlevels.length : 0; for (let i = 0; i < length; i++) { data.rlevels!.push(pageData.rlevels![i]); data.dlevels!.push(pageData.dlevels![i]); - let value = pageData.values![i]; + const value = pageData.values![i]; if (value !== undefined) { data.values!.push(value); } @@ -964,7 +964,7 @@ async function decodeDictionaryPage(cursor: Cursor, header: parquet_thrift.PageH cursor.offset = cursorEnd; if (opts.compression && opts.compression !== 'UNCOMPRESSED') { - let valuesBuf = await parquet_compression.inflate( + const valuesBuf = await parquet_compression.inflate( opts.compression, dictCursor.buffer.subarray(dictCursor.offset, cursorEnd) ); @@ -982,7 +982,7 @@ async function decodeDictionaryPage(cursor: Cursor, header: parquet_thrift.PageH dictCursor, header.dictionary_page_header!.num_values, opts - ).map((d: Array) => d.toString()); + ).map((d: unknown[]) => d.toString()); } async function decodeDataPage(cursor: Cursor, header: parquet_thrift.PageHeader, opts: Options) { @@ -990,12 +990,12 @@ async function decodeDataPage(cursor: Cursor, header: parquet_thrift.PageHeader, const dataPageHeader = header.data_page_header!; - let valueCount = dataPageHeader.num_values; - let valueEncoding = parquet_util.getThriftEnum(parquet_thrift.Encoding, dataPageHeader.encoding); + const valueCount = dataPageHeader.num_values; + const valueEncoding = parquet_util.getThriftEnum(parquet_thrift.Encoding, dataPageHeader.encoding); let valuesBufCursor = cursor; if (opts.compression && opts.compression !== 'UNCOMPRESSED') { - let valuesBuf = await parquet_compression.inflate( + const valuesBuf = await parquet_compression.inflate( opts.compression, cursor.buffer.subarray(cursor.offset, cursorEnd) ); @@ -1008,7 +1008,7 @@ async function decodeDataPage(cursor: Cursor, header: parquet_thrift.PageHeader, } /* read repetition levels */ - let rLevelEncoding = parquet_util.getThriftEnum(parquet_thrift.Encoding, dataPageHeader.repetition_level_encoding); + const rLevelEncoding = parquet_util.getThriftEnum(parquet_thrift.Encoding, dataPageHeader.repetition_level_encoding); let rLevels = new Array(valueCount); if (opts.rLevelMax! > 0) { @@ -1020,7 +1020,7 @@ async function decodeDataPage(cursor: Cursor, header: parquet_thrift.PageHeader, } /* read definition levels */ - let dLevelEncoding = parquet_util.getThriftEnum(parquet_thrift.Encoding, dataPageHeader.definition_level_encoding); + const dLevelEncoding = parquet_util.getThriftEnum(parquet_thrift.Encoding, dataPageHeader.definition_level_encoding); let dLevels = new Array(valueCount); if (opts.dLevelMax! > 0) { @@ -1033,13 +1033,13 @@ async function decodeDataPage(cursor: Cursor, header: parquet_thrift.PageHeader, /* read values */ let valueCountNonNull = 0; - for (let dlvl of dLevels) { + for (const dlvl of dLevels) { if (dlvl === opts.dLevelMax) { ++valueCountNonNull; } } - let values = decodeValues(opts.type!, valueEncoding as ParquetCodec, valuesBufCursor, valueCountNonNull, { + const values = decodeValues(opts.type!, valueEncoding as ParquetCodec, valuesBufCursor, valueCountNonNull, { typeLength: opts.column!.typeLength!, bitWidth: opts.column!.typeLength!, disableEnvelope: opts.column!.disableEnvelope, @@ -1094,7 +1094,7 @@ async function decodeDataPageV2(cursor: Cursor, header: parquet_thrift.PageHeade let valuesBufCursor = cursor; if (dataPageHeaderV2.is_compressed) { - let valuesBuf = await parquet_compression.inflate( + const valuesBuf = await parquet_compression.inflate( opts.compression!, cursor.buffer.subarray(cursor.offset, cursorEnd) ); @@ -1108,7 +1108,7 @@ async function decodeDataPageV2(cursor: Cursor, header: parquet_thrift.PageHeade cursor.offset = cursorEnd; } - let values = decodeValues(opts.type!, valueEncoding as ParquetCodec, valuesBufCursor, valueCountNonNull, { + const values = decodeValues(opts.type!, valueEncoding as ParquetCodec, valuesBufCursor, valueCountNonNull, { bitWidth: opts.column!.typeLength!, ...opts.column!, }); @@ -1122,10 +1122,13 @@ async function decodeDataPageV2(cursor: Cursor, header: parquet_thrift.PageHeade }; } -function decodeSchema(schemaElements: Array) { +function decodeSchema(schemaElements: parquet_thrift.SchemaElement[]) { let schema: SchemaDefinition | FieldDefinition = {}; schemaElements.forEach((schemaElement) => { - let repetitionType = parquet_util.getThriftEnum(parquet_thrift.FieldRepetitionType, schemaElement.repetition_type); + const repetitionType = parquet_util.getThriftEnum( + parquet_thrift.FieldRepetitionType, + schemaElement.repetition_type + ); let optional = false; let repeated = false; diff --git a/lib/schema.ts b/lib/schema.ts index efb7f059..f33165dd 100644 --- a/lib/schema.ts +++ b/lib/schema.ts @@ -11,7 +11,7 @@ import { fromJsonSchema } from './jsonSchema'; export class ParquetSchema { schema: SchemaDefinition; fields: Record; - fieldList: Array; + fieldList: ParquetField[]; /** * Create a new schema from JSON Schema (json-schema.org) @@ -33,7 +33,7 @@ export class ParquetSchema { /** * Retrieve a field definition */ - findField(path: string | Array) { + findField(path: string | string[]) { if (typeof path === 'string') { path = path.split(','); } else { @@ -42,7 +42,7 @@ export class ParquetSchema { let n = this.fields; for (; path.length > 1; path.shift()) { - let fields = n[path[0]]?.fields; + const fields = n[path[0]]?.fields; if (isDefined(fields)) { n = fields; } @@ -54,17 +54,17 @@ export class ParquetSchema { /** * Retrieve a field definition and all the field's ancestors */ - findFieldBranch(path: string | Array) { + findFieldBranch(path: string | string[]) { if (typeof path === 'string') { path = path.split(','); } - let branch = []; + const branch = []; let n = this.fields; for (; path.length > 0; path.shift()) { branch.push(n[path[0]]); - let fields = n[path[0]].fields; + const fields = n[path[0]].fields; if (path.length > 1 && isDefined(fields)) { n = fields; } @@ -74,12 +74,7 @@ export class ParquetSchema { } } -function buildFields( - schema: SchemaDefinition, - rLevelParentMax?: number, - dLevelParentMax?: number, - path?: Array -) { +function buildFields(schema: SchemaDefinition, rLevelParentMax?: number, dLevelParentMax?: number, path?: string[]) { if (!rLevelParentMax) { rLevelParentMax = 0; } @@ -92,9 +87,9 @@ function buildFields( path = []; } - let fieldList: Record = {}; - let fieldErrors: Array = []; - for (let name in schema) { + const fieldList: Record = {}; + let fieldErrors: string[] = []; + for (const name in schema) { const opts = schema[name]; /* field repetition type */ @@ -200,9 +195,9 @@ function buildFields( } function listFields(fields: Record) { - let list: Array = []; + let list: ParquetField[] = []; - for (let k in fields) { + for (const k in fields) { list.push(fields[k]); const nestedFields = fields[k].fields; diff --git a/lib/shred.ts b/lib/shred.ts index 93ef78aa..b5e83b84 100644 --- a/lib/shred.ts +++ b/lib/shred.ts @@ -35,8 +35,8 @@ export interface RecordBuffer { export const shredRecord = function (schema: ParquetSchema, record: Record, buffer: RecordBuffer) { /* shred the record, this may raise an exception */ - var recordShredded: Record = {}; - for (let field of schema.fieldList) { + const recordShredded: Record = {}; + for (const field of schema.fieldList) { recordShredded[field.path.join(',')] = { dlevels: [], rlevels: [], @@ -55,8 +55,8 @@ export const shredRecord = function (schema: ParquetSchema, record: Record = []; + let values: unknown[] = []; if (record && fieldName in record && record[fieldName] !== undefined && record[fieldName] !== null) { if (Array.isArray(record[fieldName])) { - values = record[fieldName] as Array; + values = record[fieldName] as unknown[]; } else if (ArrayBuffer.isView(record[fieldName])) { // checks if any typed array if (record[fieldName] instanceof Uint8Array) { @@ -181,18 +181,18 @@ function shredRecordInternal( export const materializeRecords = function ( schema: ParquetSchema, buffer: RecordBuffer, - records?: Array> + records?: Record[] ) { if (!records) { records = []; } - for (let k in buffer.columnData) { + for (const k in buffer.columnData) { const field = schema.findField(k); const fieldBranch = schema.findFieldBranch(k); - let values = buffer.columnData[k].values![Symbol.iterator](); + const values = buffer.columnData[k].values![Symbol.iterator](); - let rLevels = new Array(field.rLevelMax + 1); + const rLevels = new Array(field.rLevelMax + 1); rLevels.fill(0); for (let i = 0; i < buffer.columnData[k].count!; ++i) { @@ -224,8 +224,8 @@ export const materializeRecords = function ( function materializeRecordField( record: Record, - branch: Array, - rLevels: Array, + branch: ParquetField[], + rLevels: number[], dLevel: number, value: Record ) { @@ -242,7 +242,7 @@ function materializeRecordField( if (!(node.name in record)) { record[node.name] = []; } - const recordValue = record[node.name] as Array>; + const recordValue = record[node.name] as Record[]; while (recordValue.length < rLevels[0] + 1) { recordValue.push({}); @@ -260,7 +260,7 @@ function materializeRecordField( if (!(node.name in record)) { record[node.name] = []; } - const recordValue = record[node.name] as Array | null>; + const recordValue = record[node.name] as (Record | null)[]; while (recordValue.length < rLevels[0] + 1) { recordValue.push(null); diff --git a/lib/types.ts b/lib/types.ts index fcab2f08..a2bb9a98 100644 --- a/lib/types.ts +++ b/lib/types.ts @@ -7,13 +7,13 @@ import type { Document as BsonDocument } from 'bson'; const bsonSerialize = require('bson').serialize; const bsonDeserialize = require('bson').deserialize; -type ParquetTypeDataObject = { +interface ParquetTypeDataObject { primitiveType?: PrimitiveType; - toPrimitive: Function; - fromPrimitive?: Function; + toPrimitive: (x: any) => any; + fromPrimitive?: (x: any) => any; originalType?: OriginalType; typeLength?: number; -}; +} interface INTERVAL { months: number; @@ -84,7 +84,7 @@ const PARQUET_LOGICAL_TYPES = new Set([ 'LIST', ] satisfies ParquetType[]); -const PARQUET_LOGICAL_TYPE_DATA: { [logicalType: string]: ParquetTypeDataObject } = { +const PARQUET_LOGICAL_TYPE_DATA: Record = { BOOLEAN: { primitiveType: 'BOOLEAN', toPrimitive: toPrimitive_BOOLEAN, @@ -408,11 +408,11 @@ function toPrimitive_INT96(value: number | bigint | string) { } } -function toPrimitive_FIXED_LEN_BYTE_ARRAY_DECIMAL(value: Array) { +function toPrimitive_FIXED_LEN_BYTE_ARRAY_DECIMAL(value: number[]) { return Buffer.from(value); } -function toPrimitive_BYTE_ARRAY_DECIMAL(value: Array) { +function toPrimitive_BYTE_ARRAY_DECIMAL(value: number[]) { return Buffer.from(value); } @@ -424,7 +424,7 @@ function toPrimitive_LIST(value: any) { return value; } -function toPrimitive_BYTE_ARRAY(value: Array) { +function toPrimitive_BYTE_ARRAY(value: number[]) { return Buffer.from(value); } @@ -539,7 +539,7 @@ function toPrimitive_INTERVAL(value: INTERVAL) { throw 'value for INTERVAL must be object { months: ..., days: ..., milliseconds: ... }'; } - let buf = Buffer.alloc(12); + const buf = Buffer.alloc(12); buf.writeUInt32LE(value.months, 0); buf.writeUInt32LE(value.days, 4); buf.writeUInt32LE(value.milliseconds, 8); diff --git a/lib/util.ts b/lib/util.ts index 9aaacc09..5ceb9649 100644 --- a/lib/util.ts +++ b/lib/util.ts @@ -47,10 +47,10 @@ type ThriftObject = */ const getterSetter = (index: number) => ({ - get: function (this: Array): number { + get: function (this: number[]): number { return this[index]; }, - set: function (this: Array, value: number): number { + set: function (this: number[], value: number): number { return (this[index] = value); }, }); @@ -63,16 +63,16 @@ Object.defineProperty(parquet_thrift.PageLocation.prototype, 'first_row_index', * Helper function that serializes a thrift object into a buffer */ export const serializeThrift = function (obj: ThriftObject) { - let output: Array = []; + const output: Uint8Array[] = []; const callBack: TTransportCallback = function (buf: Buffer | undefined) { output.push(buf as Buffer); }; - let transport = new thrift.TBufferedTransport(undefined, callBack); + const transport = new thrift.TBufferedTransport(undefined, callBack); - let protocol = new thrift.TCompactProtocol(transport); - //@ts-ignore, https://issues.apache.org/jira/browse/THRIFT-3872 + const protocol = new thrift.TCompactProtocol(transport); + //@ts-expect-error, https://issues.apache.org/jira/browse/THRIFT-3872 obj.write(protocol); transport.flush(); @@ -84,10 +84,10 @@ export const decodeThrift = function (obj: ThriftObject, buf: Buffer, offset?: n offset = 0; } - var transport = new fixedTFramedTransport(buf); + const transport = new fixedTFramedTransport(buf); transport.readPos = offset; - var protocol = new thrift.TCompactProtocol(transport); - //@ts-ignore, https://issues.apache.org/jira/browse/THRIFT-3872 + const protocol = new thrift.TCompactProtocol(transport); + //@ts-expect-error, https://issues.apache.org/jira/browse/THRIFT-3872 obj.read(protocol); return transport.readPos - offset; }; @@ -107,7 +107,7 @@ export const getBitWidth = function (val: number) { * FIXME not ideal that this is linear */ export const getThriftEnum = function (klass: Enums, value: unknown) { - for (let k in klass) { + for (const k in klass) { if (klass[k] === value) { return k; } @@ -141,7 +141,7 @@ export const fstat = function (filePath: string | Buffer | URL): Promise { - let buffer = Buffer.alloc(length); + const buffer = Buffer.alloc(length); return new Promise((resolve, reject) => { fs.read(fd, buffer, 0, length, position, (err, bytesRead, buf) => { @@ -192,9 +192,9 @@ export const osend = function (os: WriteStreamMinimal) { export const osopen = function (path: string | Buffer | URL, opts?: WriterOptions): Promise { return new Promise((resolve, reject) => { - let outputStream = fs.createWriteStream(path, opts); + const outputStream = fs.createWriteStream(path, opts); - outputStream.on('open', function (fd) { + outputStream.on('open', function (_fd) { resolve(outputStream); }); @@ -204,7 +204,7 @@ export const osopen = function (path: string | Buffer | URL, opts?: WriterOption }); }; -export const fieldIndexOf = function (arr: Array>, elem: Array) { +export const fieldIndexOf = function (arr: unknown[][], elem: unknown[]) { for (let j = 0; j < arr.length; ++j) { if (arr[j].length !== elem.length) { continue; diff --git a/lib/writer.ts b/lib/writer.ts index 8aebe400..76b93bc1 100644 --- a/lib/writer.ts +++ b/lib/writer.ts @@ -52,7 +52,7 @@ export class ParquetWriter { * the specified file */ static async openFile(schema: ParquetSchema, path: string | Buffer | URL, opts?: WriterOptions) { - let outputStream = await parquet_util.osopen(path, opts); + const outputStream = await parquet_util.osopen(path, opts); return ParquetWriter.openStream(schema, outputStream, opts); } @@ -65,7 +65,7 @@ export class ParquetWriter { opts = {}; } - let envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts); + const envelopeWriter = await ParquetEnvelopeWriter.openStream(schema, outputStream, opts); return new ParquetWriter(schema, envelopeWriter, opts); } @@ -121,7 +121,7 @@ export class ParquetWriter { * method twice on the same object or add any rows after the close() method has * been called */ - async close(callback?: Function) { + async close(callback?: () => void) { if (this.closed) { throw 'writer was closed'; } @@ -199,8 +199,8 @@ export class ParquetEnvelopeWriter { * Create a new parquet envelope writer that writes to the specified stream */ static async openStream(schema: ParquetSchema, outputStream: parquet_util.WriteStreamMinimal, opts: WriterOptions) { - let writeFn = parquet_util.oswrite.bind(undefined, outputStream); - let closeFn = parquet_util.osend.bind(undefined, outputStream); + const writeFn = parquet_util.oswrite.bind(undefined, outputStream); + const closeFn = parquet_util.osend.bind(undefined, outputStream); return new ParquetEnvelopeWriter(schema, writeFn, closeFn, new Int64(0), opts); } @@ -244,7 +244,7 @@ export class ParquetEnvelopeWriter { * shredRecord method */ async writeRowGroup(records: parquet_shredder.RecordBuffer) { - let rgroup = await encodeRowGroup(this.schema, records, { + const rgroup = await encodeRowGroup(this.schema, records, { baseOffset: this.offset, pageSize: this.pageSize, useDataPageV2: this.useDataPageV2, @@ -282,11 +282,11 @@ export class ParquetEnvelopeWriter { writeIndex() { this.schema.fieldList.forEach((c, i) => { this.rowGroups.forEach((group) => { - let column = group.columns[i]; + const column = group.columns[i]; if (!column) return; if (column.meta_data?.columnIndex) { - let columnBody = parquet_util.serializeThrift(column.meta_data.columnIndex); + const columnBody = parquet_util.serializeThrift(column.meta_data.columnIndex); delete column.meta_data.columnIndex; column.column_index_offset = parquet_util.cloneInteger(this.offset); column.column_index_length = columnBody.length; @@ -294,7 +294,7 @@ export class ParquetEnvelopeWriter { } if (column.meta_data?.offsetIndex) { - let offsetBody = parquet_util.serializeThrift(column.meta_data.offsetIndex); + const offsetBody = parquet_util.serializeThrift(column.meta_data.offsetIndex); delete column.meta_data.offsetIndex; column.offset_index_offset = parquet_util.cloneInteger(this.offset); column.offset_index_length = offsetBody.length; @@ -337,7 +337,7 @@ export class ParquetTransformer extends stream.Transform { constructor(schema: ParquetSchema, opts = {}) { super({ objectMode: true }); - let writeProxy = (function (t) { + const writeProxy = (function (t) { return function (b: unknown) { t.push(b); }; @@ -425,7 +425,7 @@ async function encodePages( return; } - for (let field of schema.fieldList) { + for (const field of schema.fieldList) { if (field.isNested) { continue; } @@ -469,9 +469,9 @@ async function encodePages( page = await encodeDataPage(field, values.values || [], values.rlevels || [], values.dlevels || [], statistics!); } - let pages = rowBuffer.pages![field.path.join(',')]; - let lastPage = pages[pages.length - 1]; - let first_row_index = lastPage ? lastPage.first_row_index + lastPage.count! : 0; + const pages = rowBuffer.pages![field.path.join(',')]; + const lastPage = pages[pages.length - 1]; + const first_row_index = lastPage ? lastPage.first_row_index + lastPage.count! : 0; pages.push({ page, statistics, @@ -501,7 +501,7 @@ async function encodeDataPage( statistics: parquet_thrift.Statistics ) { /* encode values */ - let valuesBuf = encodeValues(column.primitiveType!, column.encoding!, values, { + const valuesBuf = encodeValues(column.primitiveType!, column.encoding!, values, { bitWidth: column.typeLength, ...column, }); @@ -525,7 +525,7 @@ async function encodeDataPage( let pageBody = Buffer.concat([rLevelsBuf, dLevelsBuf, valuesBuf]); pageBody = await parquet_compression.deflate(column.compression!, pageBody); - let pageHeader = new parquet_thrift.PageHeader(); + const pageHeader = new parquet_thrift.PageHeader(); pageHeader.type = parquet_thrift.PageType['DATA_PAGE']; pageHeader.uncompressed_page_size = rLevelsBuf.length + dLevelsBuf.length + valuesBuf.length; pageHeader.compressed_page_size = pageBody.length; @@ -555,12 +555,12 @@ async function encodeDataPageV2( statistics: parquet_thrift.Statistics ) { /* encode values */ - let valuesBuf = encodeValues(column.primitiveType!, column.encoding!, values, { + const valuesBuf = encodeValues(column.primitiveType!, column.encoding!, values, { bitWidth: column.typeLength, ...column, }); - let valuesBufCompressed = await parquet_compression.deflate(column.compression!, valuesBuf); + const valuesBufCompressed = await parquet_compression.deflate(column.compression!, valuesBuf); /* encode repetition and definition levels */ let rLevelsBuf = Buffer.alloc(0); @@ -580,7 +580,7 @@ async function encodeDataPageV2( } /* build page header */ - let pageHeader = new parquet_thrift.PageHeader(); + const pageHeader = new parquet_thrift.PageHeader(); pageHeader.type = parquet_thrift.PageType['DATA_PAGE_V2']; pageHeader.data_page_header_v2 = new parquet_thrift.DataPageHeaderV2(); pageHeader.data_page_header_v2.num_values = dlevels.length; @@ -619,12 +619,12 @@ async function encodeColumnChunk( pageIndex: boolean; } ) { - let pagesBuf = Buffer.concat(pages.map((d) => d.page)); - let num_values = pages.reduce((p, d) => p + d.num_values, 0); + const pagesBuf = Buffer.concat(pages.map((d) => d.page)); + const num_values = pages.reduce((p, d) => p + d.num_values, 0); let offset = opts.baseOffset; /* prepare metadata header */ - let metadata: ColumnMetaDataExt = new parquet_thrift.ColumnMetaData(); + const metadata: ColumnMetaDataExt = new parquet_thrift.ColumnMetaData(); metadata.path_in_schema = opts.column.path; metadata.num_values = new Int64(num_values); metadata.data_page_offset = new Int64(opts.baseOffset); @@ -636,18 +636,18 @@ async function encodeColumnChunk( metadata.codec = await parquet_thrift.CompressionCodec[opts.column.compression!]; /* compile statistics ColumnIndex and OffsetIndex*/ - let columnIndex = new parquet_thrift.ColumnIndex(); + const columnIndex = new parquet_thrift.ColumnIndex(); columnIndex.null_pages = []; columnIndex.max_values = []; columnIndex.min_values = []; // Default to unordered columnIndex.boundary_order = 0; - let offsetIndex = new parquet_thrift.OffsetIndex(); + const offsetIndex = new parquet_thrift.OffsetIndex(); offsetIndex.page_locations = []; /* prepare statistics */ - let statistics: parquet_thrift.Statistics = {}; - let distinct_values = new Set(); + const statistics: parquet_thrift.Statistics = {}; + const distinct_values = new Set(); statistics.null_count = new Int64(0); statistics.distinct_count = new Int64(0); @@ -671,7 +671,7 @@ async function encodeColumnChunk( columnIndex.min_values.push(encodeStatisticsValue(page.statistics.min_value, opts.column)); } - let pageLocation = new parquet_thrift.PageLocation(); + const pageLocation = new parquet_thrift.PageLocation(); pageLocation.offset = new Int64(offset); offset += page.page.length; pageLocation.compressed_page_size = page.page.length; @@ -696,8 +696,8 @@ async function encodeColumnChunk( metadata.encodings.push(parquet_thrift.Encoding[opts.column.encoding!]); /* concat metadata header and data pages */ - let metadataOffset = opts.baseOffset + pagesBuf.length; - let body = Buffer.concat([pagesBuf, parquet_util.serializeThrift(metadata)]); + const metadataOffset = opts.baseOffset + pagesBuf.length; + const body = Buffer.concat([pagesBuf, parquet_util.serializeThrift(metadata)]); return { body, metadata, metadataOffset }; } @@ -705,18 +705,18 @@ async function encodeColumnChunk( * Encode a list of column values into a parquet row group */ async function encodeRowGroup(schema: ParquetSchema, data: parquet_shredder.RecordBuffer, opts: WriterOptions) { - let metadata: RowGroupExt = new parquet_thrift.RowGroup(); + const metadata: RowGroupExt = new parquet_thrift.RowGroup(); metadata.num_rows = new Int64(data.rowCount!); metadata.columns = []; metadata.total_byte_size = new Int64(0); let body = Buffer.alloc(0); - for (let field of schema.fieldList) { + for (const field of schema.fieldList) { if (field.isNested) { continue; } - let cchunkData = await encodeColumnChunk(data.pages![field.path.join(',')], { + const cchunkData = await encodeColumnChunk(data.pages![field.path.join(',')], { column: field, baseOffset: opts.baseOffset!.valueOf() + body.length, pageSize: opts.pageSize || 0, @@ -725,7 +725,7 @@ async function encodeRowGroup(schema: ParquetSchema, data: parquet_shredder.Reco pageIndex: opts.pageIndex ?? true, }); - let cchunk = new parquet_thrift.ColumnChunk(); + const cchunk = new parquet_thrift.ColumnChunk(); cchunk.file_offset = new Int64(cchunkData.metadataOffset); cchunk.meta_data = cchunkData.metadata; metadata.columns.push(cchunk); @@ -746,7 +746,7 @@ function encodeFooter( rowGroups: RowGroupExt[], userMetadata: Record ) { - let metadata = new parquet_thrift.FileMetaData(); + const metadata = new parquet_thrift.FileMetaData(); metadata.version = PARQUET_VERSION; metadata.created_by = '@dsnp/parquetjs'; metadata.num_rows = rowCount; @@ -754,22 +754,22 @@ function encodeFooter( metadata.schema = []; metadata.key_value_metadata = []; - for (let k in userMetadata) { - let kv = new parquet_thrift.KeyValue(); + for (const k in userMetadata) { + const kv = new parquet_thrift.KeyValue(); kv.key = k; kv.value = userMetadata[k]; metadata.key_value_metadata.push(kv); } { - let schemaRoot = new parquet_thrift.SchemaElement(); + const schemaRoot = new parquet_thrift.SchemaElement(); schemaRoot.name = 'root'; schemaRoot.num_children = Object.keys(schema.fields).length; metadata.schema.push(schemaRoot); } - for (let field of schema.fieldList) { - let schemaElem = new parquet_thrift.SchemaElement(); + for (const field of schema.fieldList) { + const schemaElem = new parquet_thrift.SchemaElement(); schemaElem.name = field.name; schemaElem.repetition_type = parquet_thrift.FieldRepetitionType[field.repetitionType]; @@ -796,8 +796,8 @@ function encodeFooter( metadata.schema.push(schemaElem); } - let metadataEncoded = parquet_util.serializeThrift(metadata); - let footerEncoded = Buffer.alloc(metadataEncoded.length + 8); + const metadataEncoded = parquet_util.serializeThrift(metadata); + const footerEncoded = Buffer.alloc(metadataEncoded.length + 8); metadataEncoded.copy(footerEncoded); footerEncoded.writeUInt32LE(metadataEncoded.length, metadataEncoded.length); footerEncoded.write(PARQUET_MAGIC, metadataEncoded.length + 4); diff --git a/test/bloomFilterIntegration.ts b/test/bloomFilterIntegration.ts index 9b4a428d..016cdc9a 100644 --- a/test/bloomFilterIntegration.ts +++ b/test/bloomFilterIntegration.ts @@ -1,52 +1,23 @@ import { assert } from 'chai'; import parquet from '../parquet'; - -import parquet_thrift from '../gen-nodejs/parquet_types'; -import { decodeThrift } from '../lib/util'; import SplitBlockBloomFilter from '../lib/bloom/sbbf'; const TEST_VTIME = new Date(); const TEST_FILE = 'fruits-bloomfilter.parquet'; -type BloomFilterColumnData = { +interface BloomFilterColumnData { sbbf: SplitBlockBloomFilter; columnName: string; rowGroupIndex: number; -}; - -const sampleColumnHeaders = async (filename: string) => { - let reader = await parquet.ParquetReader.openFile(filename); - - let column = reader.metadata!.row_groups[0].columns[0]; - let buffer = await reader!.envelopeReader!.read( - +column!.meta_data!.data_page_offset, - +column!.meta_data!.total_compressed_size - ); - - let cursor = { - buffer: buffer, - offset: 0, - size: buffer.length, - }; - - const pages = []; - - while (cursor.offset < cursor.size) { - const pageHeader = new parquet_thrift.PageHeader(); - cursor.offset += decodeThrift(pageHeader, cursor.buffer.subarray(cursor.offset)); - pages.push(pageHeader); - cursor.offset += pageHeader.compressed_page_size; - } - return { column, pages }; -}; - -describe('bloom filter', async function () { - let row: any; +} + +describe('bloom filter', function () { let reader: any; - let bloomFilters: Record>; + let bloomFilters: Record; - describe('a nested schema', () => { + describe('a nested schema', function () { + // eslint-disable-next-line mocha/no-setup-in-describe const schema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, quantity: { type: 'INT64', optional: true }, @@ -65,6 +36,7 @@ describe('bloom filter', async function () { colour: { type: 'UTF8', repeated: true }, meta_json: { type: 'BSON', optional: true, statistics: false }, }); + before(async function () { const options = { pageSize: 3, @@ -84,7 +56,7 @@ describe('bloom filter', async function () { ], }; - let writer = await parquet.ParquetWriter.openFile(schema, TEST_FILE, options); + const writer = await parquet.ParquetWriter.openFile(schema, TEST_FILE, options); await writer.appendRow({ name: 'apples', @@ -137,12 +109,11 @@ describe('bloom filter', async function () { await writer.close(); reader = await parquet.ParquetReader.openFile(TEST_FILE); - row = reader.metadata.row_groups[0]; bloomFilters = await reader.getBloomFiltersFor(['name', 'quantity', 'stock,warehouse']); }); - it('contains name and quantity filter', () => { + it('contains name and quantity filter', function () { const columnsFilterNames = Object.keys(bloomFilters); assert.deepEqual(columnsFilterNames, ['name', 'quantity', 'stock,warehouse']); }); @@ -163,7 +134,7 @@ describe('bloom filter', async function () { assert.isFalse(await splitBlockBloomFilter.check(BigInt(100)), '100n is NOT included in quantity filter'); }); - it('writes bloom filters for stock,warehouse', async () => { + it('writes bloom filters for stock,warehouse', async function () { const splitBlockBloomFilter = bloomFilters['stock,warehouse'][0].sbbf; assert.isTrue(await splitBlockBloomFilter.check(Buffer.from('x')), 'x should be in the warehouse filter'); assert.isTrue(await splitBlockBloomFilter.check(Buffer.from('f')), 'f should be in the warehouse filter'); @@ -173,7 +144,9 @@ describe('bloom filter', async function () { ); }); }); - describe('a simple schema with a nested list', () => { + + describe('a simple schema with a nested list', function () { + // eslint-disable-next-line mocha/no-setup-in-describe const nestedListSchema = new parquet.ParquetSchema({ name: { type: 'UTF8' }, querystring: { @@ -194,7 +167,7 @@ describe('bloom filter', async function () { }, }); - it('can be written, read and checked', async () => { + it('can be written, read and checked', async function () { const file = '/tmp/issue-98.parquet'; const nestedListFilterColumn = 'querystring,list,element,key'; const writer = await parquet.ParquetWriter.openFile(nestedListSchema, file, { @@ -213,7 +186,7 @@ describe('bloom filter', async function () { }); await writer.close(); const reader = await parquet.ParquetReader.openFile(file); - const bloomFilters: Record> = await reader.getBloomFiltersFor([ + const bloomFilters: Record = await reader.getBloomFiltersFor([ 'name', 'querystring,list,element,key', ]); diff --git a/test/bloomFilterReader.test.ts b/test/bloomFilterReader.test.ts index 9a1517d9..97687b3e 100644 --- a/test/bloomFilterReader.test.ts +++ b/test/bloomFilterReader.test.ts @@ -26,11 +26,11 @@ const emptyMetaData = (): ColumnMetaDataExt => { }; }; -describe('bloomFilterReader', () => { - describe('offsets', () => { - let columnChunkDataCollection: Array; +describe('bloomFilterReader', function () { + describe('offsets', function () { + let columnChunkDataCollection: ColumnChunkData[]; - beforeEach(() => { + beforeEach(function () { const metaData: ColumnMetaDataExt = emptyMetaData(); metaData.path_in_schema = ['name']; metaData.bloom_filter_offset = new Int64(Buffer.from('000000000874', 'hex'), 0); @@ -49,7 +49,7 @@ describe('bloomFilterReader', () => { ]; }); - it('returns bloom filter offsets', () => { + it('returns bloom filter offsets', function () { const result = parseBloomFilterOffsets(columnChunkDataCollection); const expected = [ { @@ -62,8 +62,9 @@ describe('bloomFilterReader', () => { expect(result).to.deep.equal(expected); }); }); - describe('XXHasher', async () => { - it('outputs hex-encoded strings', async () => { + + describe('XXHasher', function () { + it('outputs hex-encoded strings', async function () { const hasher = await new XxHasher(); assert.equal('ee7276ee58e4421c', await hasher.hash64('15')); }); diff --git a/test/bloomFilterWriter.test.ts b/test/bloomFilterWriter.test.ts index d8284ef7..2aba5812 100644 --- a/test/bloomFilterWriter.test.ts +++ b/test/bloomFilterWriter.test.ts @@ -3,80 +3,81 @@ import sinon from 'sinon'; import { createSBBF } from '../lib/bloomFilterIO/bloomFilterWriter'; const SplitBlockBloomFilter = require('../lib/bloom/sbbf').default; -describe('buildFilterBlocks', () => { - describe('when no options are present', () => { +describe('buildFilterBlocks', function () { + describe('when no options are present', function () { let sbbfMock: sinon.SinonMock; - beforeEach(() => { + beforeEach(function () { sbbfMock = sinon.mock(SplitBlockBloomFilter.prototype); }); - afterEach(() => { + afterEach(function () { sbbfMock.verify(); }); - it('calls .init once', () => { + it('calls .init once', function () { sbbfMock.expects('init').once(); createSBBF({}); }); - it('does not set false positive rate', () => { + it('does not set false positive rate', function () { sbbfMock.expects('setOptionNumFilterBytes').never(); createSBBF({}); }); - it('does not set number of distinct', () => { + it('does not set number of distinct', function () { sbbfMock.expects('setOptionNumDistinct').never(); createSBBF({}); }); }); - describe('when numFilterBytes is present', () => { + describe('when numFilterBytes is present', function () { let sbbfMock: sinon.SinonMock; - beforeEach(() => { + beforeEach(function () { sbbfMock = sinon.mock(SplitBlockBloomFilter.prototype); }); - afterEach(() => { + afterEach(function () { sbbfMock.verify(); }); - it('calls setOptionNumberFilterBytes once', () => { + it('calls setOptionNumberFilterBytes once', function () { sbbfMock.expects('setOptionNumFilterBytes').once().returnsThis(); createSBBF({ numFilterBytes: 1024 }); }); - it('does not set number of distinct', () => { + it('does not set number of distinct', function () { sbbfMock.expects('setOptionNumDistinct').never(); createSBBF({}); }); - it('calls .init once', () => { + it('calls .init once', function () { sbbfMock.expects('init').once(); createSBBF({}); }); }); - describe('when numFilterBytes is NOT present', () => { + describe('when numFilterBytes is NOT present', function () { let sbbfMock: sinon.SinonMock; - beforeEach(() => { + + beforeEach(function () { sbbfMock = sinon.mock(SplitBlockBloomFilter.prototype); }); - afterEach(() => { + afterEach(function () { sbbfMock.verify(); }); - describe('and falsePositiveRate is present', () => { - it('calls ssbf.setOptionFalsePositiveRate', () => { + describe('and falsePositiveRate is present', function () { + it('calls ssbf.setOptionFalsePositiveRate', function () { sbbfMock.expects('setOptionFalsePositiveRate').once(); createSBBF({ falsePositiveRate: 0.1 }); }); }); - describe('and numDistinct is present', () => { - it('calls ssbf.setOptionNumDistinct', () => { + describe('and numDistinct is present', function () { + it('calls ssbf.setOptionNumDistinct', function () { sbbfMock.expects('setOptionNumDistinct').once(); createSBBF({ falsePositiveRate: 0.1, diff --git a/test/browser/main.ts b/test/browser/main.ts index f79d9b9d..a22e11d8 100644 --- a/test/browser/main.ts +++ b/test/browser/main.ts @@ -3,9 +3,9 @@ import { assert } from 'chai'; const buffer = require('buffer'); -describe('Browser tests', () => { - describe('reader', () => { - it('can read snappy compressed data', async () => { +describe('Browser tests', function () { + describe('reader', function () { + it('can read snappy compressed data', async function () { // Data from test/test-files/snappy-compressed.parquet const uint8Array = [ 80, 65, 82, 49, 21, 6, 21, 80, 21, 82, 92, 21, 8, 21, 0, 21, 8, 21, 0, 21, 0, 21, 0, 17, 28, 24, 5, 119, 111, @@ -32,7 +32,7 @@ describe('Browser tests', () => { } assert.equal(data.length, 4); - after(async () => { + after(async function () { await reader.close(); }); }); diff --git a/test/dictionary.js b/test/dictionary.js index c3cd0c48..7e610650 100644 --- a/test/dictionary.js +++ b/test/dictionary.js @@ -4,7 +4,7 @@ const assert = chai.assert; const parquet = require('../parquet'); const path = require('path'); -describe('dictionary encoding', async function () { +describe('dictionary encoding', function () { it('should read uncompressed dictionary from spark', async function () { let reader = await parquet.ParquetReader.openFile( path.resolve(__dirname, 'test-files/spark-uncompressed-dict.parquet') diff --git a/test/integration.js b/test/integration.js index 1d0258f5..03f4ed51 100644 --- a/test/integration.js +++ b/test/integration.js @@ -1,7 +1,9 @@ 'use strict'; + +/* eslint mocha/no-setup-in-describe: 'off', @typescript-eslint/no-empty-function: 'off' */ + const chai = require('chai'); const fs = require('fs'); -const os = require('os'); const assert = chai.assert; const parquet = require('../parquet'); const parquet_thrift = require('../gen-nodejs/parquet_types'); @@ -34,7 +36,7 @@ function mkTestSchema(opts) { }); } -function mkTestRows(opts) { +function mkTestRows(_opts) { let rows = []; for (let i = 0; i < TEST_NUM_ROWS; ++i) { @@ -120,7 +122,7 @@ async function writeTestStream(opts) { var out = new stream.PassThrough(); let writer = await parquet.ParquetWriter.openStream(schema, out, opts); - out.on('data', function (d) {}); + out.on('data', function (_d) {}); out.on('end', function () {}); writer.setMetadata('myuid', '420'); @@ -195,7 +197,7 @@ async function verifyStatistics() { assert.equal(colStats.null_count, 0); assert.equal(colStats.distinct_count, 4); - column.pages.forEach((d, i) => { + column.pages.forEach((d, _i) => { let header = d.data_page_header || d.data_page_header_v2; let pageStats = header.statistics; assert.equal(pageStats.null_count, 0); diff --git a/test/jsonSchema.test.ts b/test/jsonSchema.test.ts index 1dc5360c..d0f29a0a 100644 --- a/test/jsonSchema.test.ts +++ b/test/jsonSchema.test.ts @@ -54,70 +54,70 @@ describe('Json Schema Conversion', function () { }); }); -describe('Json Schema Conversion Test File', async function () { - const parquetSchema = ParquetSchema.fromJsonSchema({ - type: 'object', - properties: { - string_field: { type: 'string' }, - int_field: { type: 'integer' }, - number_field: { type: 'number' }, - array_field: { - type: 'array', - items: { type: 'string' }, - additionalItems: false, +const parquetSchema = ParquetSchema.fromJsonSchema({ + type: 'object', + properties: { + string_field: { type: 'string' }, + int_field: { type: 'integer' }, + number_field: { type: 'number' }, + array_field: { + type: 'array', + items: { type: 'string' }, + additionalItems: false, + }, + timestamp_array_field: { + type: 'array', + items: { + type: 'string', + format: 'date-time', }, - timestamp_array_field: { - type: 'array', - items: { + additionalItems: false, + }, + timestamp_field: { + type: 'string', + format: 'date-time', + }, + obj_field: { + type: 'object', + properties: { + sub1: { + type: 'string', + }, + sub2: { type: 'string', - format: 'date-time', }, - additionalItems: false, - }, - timestamp_field: { - type: 'string', - format: 'date-time', }, - obj_field: { + additionalProperties: false, + }, + struct_field: { + type: 'array', + items: { type: 'object', properties: { - sub1: { - type: 'string', + sub3: { type: 'string' }, + sub4: { type: 'string' }, + sub5: { + type: 'object', + properties: { + sub6: { type: 'string' }, + sub7: { type: 'string' }, + }, + additionalProperties: false, }, - sub2: { - type: 'string', + sub8: { + type: 'array', + items: { type: 'string' }, }, }, additionalProperties: false, }, - struct_field: { - type: 'array', - items: { - type: 'object', - properties: { - sub3: { type: 'string' }, - sub4: { type: 'string' }, - sub5: { - type: 'object', - properties: { - sub6: { type: 'string' }, - sub7: { type: 'string' }, - }, - additionalProperties: false, - }, - sub8: { - type: 'array', - items: { type: 'string' }, - }, - }, - additionalProperties: false, - }, - additionalItems: false, - }, + additionalItems: false, }, - additionalProperties: false, - }); + }, + additionalProperties: false, +}); +describe('Json Schema Conversion Test File', function () { const row1 = { string_field: 'string value', int_field: 10n, @@ -171,7 +171,7 @@ describe('Json Schema Conversion Test File', async function () { it('schema is encoded correctly', async function () { const schema = reader.metadata?.schema; - checkSnapshot(reader.metadata?.schema, './test-files/json-schema-test-file.result.json', update); + checkSnapshot(schema, './test-files/json-schema-test-file.result.json', update); }); it('output matches input', async function () { diff --git a/test/lib/bufferReader.test.js b/test/lib/bufferReader.test.js index e0c4a084..8cec6cbc 100644 --- a/test/lib/bufferReader.test.js +++ b/test/lib/bufferReader.test.js @@ -8,16 +8,17 @@ import { ParquetEnvelopeReader } from '../../lib/reader'; chai.use(sinonChai); chai.use(sinonChaiInOrder); -describe('bufferReader', () => { +describe('bufferReader', function () { let reader; - beforeEach(() => { + beforeEach(function () { const mockEnvelopeReader = sinon.fake(); reader = new BufferReader(mockEnvelopeReader, {}); }); - describe('#read', async () => { - describe('given that reader is scheduled', () => { - it('adds an item to the queue', () => { + + describe('#read', function () { + describe('given that reader is scheduled', function () { + it('adds an item to the queue', function () { const offset = 1; const length = 2; reader.read(offset, length); @@ -26,8 +27,8 @@ describe('bufferReader', () => { }); }); - describe('#processQueue', () => { - it('only enqueues an item and reads on flushing the queue', async () => { + describe('#processQueue', function () { + it('only enqueues an item and reads on flushing the queue', async function () { const mockResolve = sinon.spy(); const mockResolve2 = sinon.spy(); reader.envelopeReader = { readFn: sinon.fake.returns(Buffer.from('buffer', 'utf8')) }; @@ -51,7 +52,7 @@ describe('bufferReader', () => { sinon.assert.calledWith(mockResolve2, Buffer.from('uffe', 'utf8')); }); - it('enqueues items and then reads them', async () => { + it('enqueues items and then reads them', async function () { const mockResolve = sinon.spy(); const mockResolve2 = sinon.spy(); reader.maxLength = 1; @@ -76,7 +77,7 @@ describe('bufferReader', () => { sinon.assert.calledWith(mockResolve2, Buffer.from('uffe', 'utf8')); }); - it('enqueues items and reads them in order', async () => { + it('enqueues items and reads them in order', async function () { const mockResolve = sinon.spy(); reader.envelopeReader = { readFn: sinon.fake.returns(Buffer.from('thisisalargebuffer', 'utf8')) }; @@ -118,7 +119,7 @@ describe('bufferReader', () => { .subsequently.calledWith(Buffer.from('buffer', 'utf8')); }); - it('should read even if the maxSpan has been exceeded', async () => { + it('should read even if the maxSpan has been exceeded', async function () { const mockResolve = sinon.spy(); reader.maxSpan = 5; reader.envelopeReader = { readFn: sinon.fake.returns(Buffer.from('willslicefrombeginning', 'utf8')) }; @@ -163,17 +164,17 @@ describe('bufferReader', () => { }); }); -describe('bufferReader Integration Tests', () => { +describe('bufferReader Integration Tests', function () { let reader; let envelopeReader; - describe('Reading a file', async () => { - beforeEach(async () => { + describe('Reading a file', function () { + beforeEach(async function () { envelopeReader = await ParquetEnvelopeReader.openFile('./test/lib/test.txt', {}); reader = new BufferReader(envelopeReader); }); - it('should properly read the file', async () => { + it('should properly read the file', async function () { const buffer = await reader.read(0, 5); const buffer2 = await reader.read(6, 5); const buffer3 = await reader.read(12, 5); diff --git a/test/list.js b/test/list.js index e11d62f2..2d6130cd 100644 --- a/test/list.js +++ b/test/list.js @@ -33,28 +33,45 @@ const parquet = require('../parquet'); And verify that Athena parses the parquet file correctly by `SELECT * from listTest` */ -describe('struct list', async function () { - let reader; - - const listStructSchema = new parquet.ParquetSchema({ - id: { type: 'UTF8' }, - test: { - type: 'LIST', - fields: { - list: { - repeated: true, - fields: { - element: { - fields: { - a: { type: 'UTF8' }, - b: { type: 'INT64' }, - }, +const listStructSchema = new parquet.ParquetSchema({ + id: { type: 'UTF8' }, + test: { + type: 'LIST', + fields: { + list: { + repeated: true, + fields: { + element: { + fields: { + a: { type: 'UTF8' }, + b: { type: 'INT64' }, }, }, }, }, }, - }); + }, +}); + +const listArraySchema = new parquet.ParquetSchema({ + id: { type: 'UTF8' }, + test: { + type: 'LIST', + fields: { + list: { + repeated: true, + fields: { + element: { + type: 'UTF8', + }, + }, + }, + }, + }, +}); + +describe('struct list', function () { + let reader; const row1 = { id: 'Row1', @@ -98,26 +115,9 @@ describe('struct list', async function () { }); }); -describe('array list', async function () { +describe('array list', function () { let reader; - const listArraySchema = new parquet.ParquetSchema({ - id: { type: 'UTF8' }, - test: { - type: 'LIST', - fields: { - list: { - repeated: true, - fields: { - element: { - type: 'UTF8', - }, - }, - }, - }, - }, - }); - const row1 = { id: 'Row1', test: { list: [{ element: 'abcdef' }, { element: 'fedcba' }] }, diff --git a/test/metadata-cache.js b/test/metadata-cache.js index d5aed503..b587e56a 100644 --- a/test/metadata-cache.js +++ b/test/metadata-cache.js @@ -24,7 +24,9 @@ describe('metadata-cache', function () { column.offset_index_length = undefined; column.column_index_offset = undefined; column.column_index_length = undefined; - } catch (e) {} + } catch (e) { + console.error(e); + } } } const metaDataTxt = await reader.exportMetadata(); diff --git a/test/reader.js b/test/reader.js index fdb69047..13b69353 100644 --- a/test/reader.js +++ b/test/reader.js @@ -11,21 +11,23 @@ const { sdkStreamMixin } = require('@smithy/util-stream'); const { createReadStream } = require('fs'); const { ParquetReader } = require('../parquet'); -describe('ParquetReader', () => { - describe('#openUrl', () => { - before(() => { +const s3Mock = mockClient(S3Client); + +describe('ParquetReader', function () { + describe('#openUrl', function () { + before(function () { server.listen(); }); - afterEach(() => { + afterEach(function () { server.resetHandlers(); }); - after(() => { + after(function () { server.close(); }); - it('reads parquet files via http', async () => { + it('reads parquet files via http', async function () { const reader = await parquet.ParquetReader.openUrl('http://fruits-bloomfilter.parquet'); const cursor = await reader.getCursor(); @@ -86,12 +88,12 @@ describe('ParquetReader', () => { }); }); - describe('#asyncIterator', () => { - it('responds to for await', async () => { + describe('#asyncIterator', function () { + it('responds to for await', async function () { const reader = await parquet.ParquetReader.openFile(path.join(__dirname, 'test-files', 'fruits.parquet')); let counter = 0; - for await (const record of reader) { + for await (const _record of reader) { counter++; } @@ -99,8 +101,8 @@ describe('ParquetReader', () => { }); }); - describe('#handleDecimal', () => { - it('loads parquet with columns configured as DECIMAL', async () => { + describe('#handleDecimal', function () { + it('loads parquet with columns configured as DECIMAL', async function () { const reader = await parquet.ParquetReader.openFile( path.join(__dirname, 'test-files', 'valid-decimal-columns.parquet') ); @@ -118,11 +120,10 @@ describe('ParquetReader', () => { assert.equal(data[2].under_9_digits, undefined); }); }); - describe('ParquetReader with S3', () => { - describe('V3', () => { - const s3Mock = mockClient(S3Client); - it('works', async () => { + describe('ParquetReader with S3', function () { + describe('V3', function () { + it('works', async function () { let srcFile = 'test/test-files/nation.dict.parquet'; const headStream = new Readable(); diff --git a/test/reference-test/read-all.test.ts b/test/reference-test/read-all.test.ts index 045ad7ba..4170594b 100644 --- a/test/reference-test/read-all.test.ts +++ b/test/reference-test/read-all.test.ts @@ -1,3 +1,4 @@ +/* eslint mocha/no-setup-in-describe: 'off' */ import { expect } from 'chai'; import path from 'node:path'; import fs from 'node:fs'; diff --git a/test/sbbf.ts b/test/sbbf.ts index a8350e2d..1a41a081 100644 --- a/test/sbbf.ts +++ b/test/sbbf.ts @@ -5,7 +5,7 @@ import { Done } from 'mocha'; import SplitBlockBloomFilter from '../lib/bloom/sbbf'; -const times = (n: number, fn: Function) => { +const times = (n: number, fn: () => void) => { return Array(n).map(() => fn()); }; const random = (min: number, max: number) => { @@ -14,7 +14,7 @@ const random = (min: number, max: number) => { return Math.floor(Math.random() * (max - min + 1) + min); }; -describe('Split Block Bloom Filters', () => { +describe('Split Block Bloom Filters', function () { const expectedDefaultBytes = 29920; it('Mask works', function () { @@ -27,11 +27,12 @@ describe('Split Block Bloom Filters', () => { expect(testMaskRes[i]).to.eq(expectedVals[i]); } }); + it('block insert + check works', function () { - let blk = SplitBlockBloomFilter.initBlock(); - let isInsertedX: Long = Long.fromString('6f6f6f6f6', true, 16); - let isInsertedY: Long = Long.fromString('deadbeef', true, 16); - let notInsertedZ: Long = Long.fromNumber(3); + const blk = SplitBlockBloomFilter.initBlock(); + const isInsertedX: Long = Long.fromString('6f6f6f6f6', true, 16); + const isInsertedY: Long = Long.fromString('deadbeef', true, 16); + const notInsertedZ: Long = Long.fromNumber(3); SplitBlockBloomFilter.blockInsert(blk, isInsertedX); @@ -59,6 +60,7 @@ describe('Split Block Bloom Filters', () => { new Long(0x0, 0x1, true), new Long(793516929, -2061372197, true), // regression test; this one was failing get blockIndex ]; + // eslint-disable-next-line mocha/no-setup-in-describe const badVal = Long.fromNumber(0xfafafafa, true); it('filter insert + check works', function () { @@ -76,6 +78,7 @@ describe('Split Block Bloom Filters', () => { }); filter.check(badVal).then((isPresent) => expect(isPresent).to.eq(false)); }); + it('number of filter bytes is set to defaults on init', async function () { const filter = new SplitBlockBloomFilter().init(); expect(filter.getNumFilterBytes()).to.eq(expectedDefaultBytes); @@ -95,6 +98,7 @@ describe('Split Block Bloom Filters', () => { spy.restore(); }); }); + it('sets filter bytes to next power of 2', function () { let filter = new SplitBlockBloomFilter().init(); expect(filter.getNumFilterBytes()).to.eq(expectedDefaultBytes); @@ -109,6 +113,7 @@ describe('Split Block Bloom Filters', () => { filter = new SplitBlockBloomFilter().setOptionNumFilterBytes(below2).init(); expect(filter.getNumFilterBytes()).to.eq(2 ** 12); }); + it("can't be set twice after initializing", function () { const spy = sinon.spy(console, 'error'); const filter = new SplitBlockBloomFilter() @@ -128,6 +133,7 @@ describe('Split Block Bloom Filters', () => { const filter = new SplitBlockBloomFilter().setOptionFalsePositiveRate(0.00101); expect(filter.getFalsePositiveRate()).to.eq(0.00101); }); + it("can't be set twice after initializing", function () { const spy = sinon.spy(console, 'error'); const filter = new SplitBlockBloomFilter() @@ -147,6 +153,7 @@ describe('Split Block Bloom Filters', () => { const filter = new SplitBlockBloomFilter().setOptionNumDistinct(10000); expect(filter.getNumDistinct()).to.eq(10000); }); + it("can't be set twice after initializing", function () { const spy = sinon.spy(console, 'error'); const filter = new SplitBlockBloomFilter().setOptionNumDistinct(10000).setOptionNumDistinct(9999); @@ -165,12 +172,14 @@ describe('Split Block Bloom Filters', () => { expect(spy.calledOnce); spy.restore(); }); + it('allocates the filter', function () { const filter = new SplitBlockBloomFilter().setOptionNumFilterBytes(1024).init(); expect(filter.getNumFilterBlocks()).to.eq(32); expect(filter.getFilter().length).to.eq(32); }); }); + describe('optimal number of blocks', function () { // Some general ideas of what size filters are needed for different parameters // Note there is a small but non-negligible difference between this and what @@ -230,9 +239,13 @@ describe('Split Block Bloom Filters', () => { url: 'http://placekitten.com/800/600', }; + /* eslint mocha/no-setup-in-describe: 'off' */ describe('insert, check', function () { - type testCase = { name: string; val: any }; - const testCases: Array = [ + interface testCase { + name: string; + val: any; + } + const testCases: testCase[] = [ { name: 'boolean', val: true }, { name: 'int number', val: 23423 }, { name: 'float number', val: 23334.23 }, @@ -254,7 +267,7 @@ describe('Split Block Bloom Filters', () => { }); }); - describe('insert throws on unsupported type', async function () { + describe('insert throws on unsupported type', function () { const throwCases = [ { name: 'POJO', val: pojo }, { name: 'Array', val: [383838, 222, 5898, 1, 0] }, diff --git a/test/statistics.js b/test/statistics.js index 16e8f4a4..6b2759e4 100644 --- a/test/statistics.js +++ b/test/statistics.js @@ -24,7 +24,7 @@ const schema = new parquet.ParquetSchema({ meta_json: { type: 'BSON', optional: true, statistics: false }, }); -describe('statistics', async function () { +describe('statistics', function () { let row, reader; before(async function () { diff --git a/test/thrift.js b/test/thrift.js index b65ee758..327bb485 100644 --- a/test/thrift.js +++ b/test/thrift.js @@ -1,7 +1,6 @@ 'use strict'; const chai = require('chai'); const assert = chai.assert; -const thrift = require('thrift'); const parquet_thrift = require('../gen-nodejs/parquet_types'); const parquet_util = require('../lib/util'); diff --git a/test/types.js b/test/types.js index 054f7903..6be32a68 100644 --- a/test/types.js +++ b/test/types.js @@ -1,184 +1,184 @@ 'use strict'; -const { toPrimitive, fromPrimitive } = require('../lib/types'); +const { toPrimitive } = require('../lib/types'); const chai = require('chai'); const assert = chai.assert; -describe('toPrimitive* should give the correct values back', () => { - it('toPrimitive(INT_8, 127n)', () => { +describe('toPrimitive* should give the correct values back', function () { + it('toPrimitive(INT_8, 127n)', function () { assert.equal(toPrimitive('INT_8', 127n), 127n); }), - it('toPrimitive(UINT_8, 255n)', () => { + it('toPrimitive(UINT_8, 255n)', function () { assert.equal(toPrimitive('UINT_8', 255n), 255n); }), - it('toPrimitive(INT_16, 32767n)', () => { + it('toPrimitive(INT_16, 32767n)', function () { assert.equal(toPrimitive('INT_16', 32767n), 32767n); }), - it('toPrimitive(UINT_16, 65535n)', () => { + it('toPrimitive(UINT_16, 65535n)', function () { assert.equal(toPrimitive('UINT_16', 65535n), 65535n); }), - it('toPrimitive(INT32, 2147483647n)', () => { + it('toPrimitive(INT32, 2147483647n)', function () { assert.equal(toPrimitive('INT32', 2147483647n), 2147483647n); }), - it('toPrimitive(UINT_32, 4294967295n)', () => { + it('toPrimitive(UINT_32, 4294967295n)', function () { assert.equal(toPrimitive('UINT_32', 4294967295n), 4294967295n); }), - it('toPrimitive(INT64, 9223372036854775807n)', () => { + it('toPrimitive(INT64, 9223372036854775807n)', function () { assert.equal(toPrimitive('INT64', 9223372036854775807n), 9223372036854775807n); }), - it('toPrimitive(UINT_64, 9223372036854775807n)', () => { + it('toPrimitive(UINT_64, 9223372036854775807n)', function () { assert.equal(toPrimitive('UINT_64', 9223372036854775807n), 9223372036854775807n); }), - it('toPrimitive(INT96, 9223372036854775807n)', () => { + it('toPrimitive(INT96, 9223372036854775807n)', function () { assert.equal(toPrimitive('INT96', 9223372036854775807n), 9223372036854775807n); }); }); -describe('toPrimitive INT* should give the correct values back with string value', () => { - it('toPrimitive(INT_8, "127")', () => { +describe('toPrimitive INT* should give the correct values back with string value', function () { + it('toPrimitive(INT_8, "127")', function () { assert.equal(toPrimitive('INT_8', '127'), 127n); }), - it('toPrimitive(UINT_8, "255")', () => { + it('toPrimitive(UINT_8, "255")', function () { assert.equal(toPrimitive('UINT_8', '255'), 255n); }), - it('toPrimitive(INT_16, "32767")', () => { + it('toPrimitive(INT_16, "32767")', function () { assert.equal(toPrimitive('INT_16', '32767'), 32767n); }), - it('toPrimitive(UINT_16, "65535")', () => { + it('toPrimitive(UINT_16, "65535")', function () { assert.equal(toPrimitive('UINT_16', '65535'), 65535n); }), - it('toPrimitive(INT32, "2147483647")', () => { + it('toPrimitive(INT32, "2147483647")', function () { assert.equal(toPrimitive('INT32', '2147483647'), 2147483647n); }), - it('toPrimitive(UINT_32, "4294967295")', () => { + it('toPrimitive(UINT_32, "4294967295")', function () { assert.equal(toPrimitive('UINT_32', '4294967295'), 4294967295n); }), - it('toPrimitive(INT64, "9223372036854775807")', () => { + it('toPrimitive(INT64, "9223372036854775807")', function () { assert.equal(toPrimitive('INT64', '9223372036854775807'), 9223372036854775807n); }), - it('toPrimitive(UINT_64, "9223372036854775807")', () => { + it('toPrimitive(UINT_64, "9223372036854775807")', function () { assert.equal(toPrimitive('UINT_64', '9223372036854775807'), 9223372036854775807n); }), - it('toPrimitive(INT96, "9223372036854775807")', () => { + it('toPrimitive(INT96, "9223372036854775807")', function () { assert.equal(toPrimitive('INT96', '9223372036854775807'), 9223372036854775807n); }); }); -describe('toPrimitive INT* should throw when given invalid value', () => { - describe('Testing toPrimitive_INT_8 values', () => { - it('toPrimitive(INT_8, 128) is too large', () => { +describe('toPrimitive INT* should throw when given invalid value', function () { + describe('Testing toPrimitive_INT_8 values', function () { + it('toPrimitive(INT_8, 128) is too large', function () { assert.throws(() => toPrimitive('INT_8', 128)); }), - it('toPrimitive(INT_8, -256) is too small', () => { + it('toPrimitive(INT_8, -256) is too small', function () { assert.throws(() => toPrimitive('INT_8', -256)); }), - it('toPrimitive(INT_8, "asd12@!$1") is given gibberish and should throw', () => { + it('toPrimitive(INT_8, "asd12@!$1") is given gibberish and should throw', function () { assert.throws(() => toPrimitive('INT_8', 'asd12@!$1')); }); }), - describe('Testing toPrimitive_UINT8 values', () => { - it('toPrimitive(UINT_8, 128) is too large', () => { + describe('Testing toPrimitive_UINT8 values', function () { + it('toPrimitive(UINT_8, 128) is too large', function () { assert.throws(() => toPrimitive('UINT_8', 256)); }), - it('toPrimitive(UINT_8, -256) is too small', () => { + it('toPrimitive(UINT_8, -256) is too small', function () { assert.throws(() => toPrimitive('UINT_8', -1)); }), - it('toPrimitive(UINT_8, "asd12@!$1") is given gibberish and should throw', () => { + it('toPrimitive(UINT_8, "asd12@!$1") is given gibberish and should throw', function () { assert.throws(() => toPrimitive('UINT_8', 'asd12@!$1')); }); }), - describe('Testing toPrimitive_INT16 values', () => { - it('toPrimitive(INT_16, 9999999) is too large', () => { + describe('Testing toPrimitive_INT16 values', function () { + it('toPrimitive(INT_16, 9999999) is too large', function () { assert.throws(() => toPrimitive('INT_16', 9999999)); }), - it('toPrimitive(INT_16, -9999999) is too small', () => { + it('toPrimitive(INT_16, -9999999) is too small', function () { assert.throws(() => toPrimitive('INT_16', -9999999)); }), - it('toPrimitive(INT_16, "asd12@!$1") is given gibberish and should throw', () => { + it('toPrimitive(INT_16, "asd12@!$1") is given gibberish and should throw', function () { assert.throws(() => toPrimitive('INT_16', 'asd12@!$1')); }); }), - describe('Testing toPrimitive_UINT16 values', () => { - it('toPrimitive(UINT_16, 9999999999999) is too large', () => { + describe('Testing toPrimitive_UINT16 values', function () { + it('toPrimitive(UINT_16, 9999999999999) is too large', function () { assert.throws(() => toPrimitive('UINT_16', 9999999999999)); }), - it('toPrimitive(UINT_16, -999999999999) is too small', () => { + it('toPrimitive(UINT_16, -999999999999) is too small', function () { assert.throws(() => toPrimitive('UINT_16', -9999999999999)); }), - it('toPrimitive(UINT_16, "asd12@!$1") is given gibberish and should throw', () => { + it('toPrimitive(UINT_16, "asd12@!$1") is given gibberish and should throw', function () { assert.throws(() => toPrimitive('UINT_16', 'asd12@!$1')); }); }), - describe('Testing toPrimitive_INT32 values', () => { - it('toPrimitive(INT_32, 999999999999) is too large', () => { + describe('Testing toPrimitive_INT32 values', function () { + it('toPrimitive(INT_32, 999999999999) is too large', function () { assert.throws(() => toPrimitive('INT_32', 999999999999)); }), - it('toPrimitive(INT_32, -999999999999) is too small', () => { + it('toPrimitive(INT_32, -999999999999) is too small', function () { assert.throws(() => toPrimitive('INT_32', -999999999999)); }), - it('toPrimitive(INT_32, "asd12@!$1") is given gibberish and should throw', () => { + it('toPrimitive(INT_32, "asd12@!$1") is given gibberish and should throw', function () { assert.throws(() => toPrimitive('INT_32', 'asd12@!$1')); }); }), - describe('Testing toPrimitive_UINT32 values', () => { - it('toPrimitive(UINT_32, 999999999999) is too large', () => { + describe('Testing toPrimitive_UINT32 values', function () { + it('toPrimitive(UINT_32, 999999999999) is too large', function () { assert.throws(() => toPrimitive('UINT_32', 999999999999999)); }), - it('toPrimitive(UINT_32, -999999999999) is too small', () => { + it('toPrimitive(UINT_32, -999999999999) is too small', function () { assert.throws(() => toPrimitive('UINT_32', -999999999999)); }), - it('toPrimitive(UINT_32, "asd12@!$1") is given gibberish and should throw', () => { + it('toPrimitive(UINT_32, "asd12@!$1") is given gibberish and should throw', function () { assert.throws(() => toPrimitive('UINT_32', 'asd12@!$1')); }); }), - describe('Testing toPrimitive_INT64 values', () => { - it('toPrimitive(INT_64, "9999999999999999999999") is too large', () => { + describe('Testing toPrimitive_INT64 values', function () { + it('toPrimitive(INT_64, "9999999999999999999999") is too large', function () { assert.throws(() => toPrimitive('INT_64', 9999999999999999999999)); }), - it('toPrimitive(INT_64, "-9999999999999999999999999") is too small', () => { + it('toPrimitive(INT_64, "-9999999999999999999999999") is too small', function () { assert.throws(() => toPrimitive('INT_64', -9999999999999999999999999)); }), - it('toPrimitive(INT_64, "asd12@!$1") is given gibberish and should throw', () => { + it('toPrimitive(INT_64, "asd12@!$1") is given gibberish and should throw', function () { assert.throws(() => toPrimitive('INT_64', 'asd12@!$1')); }); }), - describe('Testing toPrimitive_UINT64 values', () => { - it('toPrimitive(UINT_64, 9999999999999999999999) is too large', () => { + describe('Testing toPrimitive_UINT64 values', function () { + it('toPrimitive(UINT_64, 9999999999999999999999) is too large', function () { assert.throws(() => toPrimitive('UINT_64', 9999999999999999999999)); }), - it('toPrimitive(UINT_64, -999999999999) is too small', () => { + it('toPrimitive(UINT_64, -999999999999) is too small', function () { assert.throws(() => toPrimitive('UINT_64', -999999999999)); }), - it('toPrimitive(UINT_64, "asd12@!$1") is given gibberish and should throw', () => { + it('toPrimitive(UINT_64, "asd12@!$1") is given gibberish and should throw', function () { assert.throws(() => toPrimitive('UINT_64', 'asd12@!$1')); }); }), - describe('Testing toPrimitive_INT96 values', () => { - it('toPrimitive(UINT_96, 9999999999999999999999) is too large', () => { + describe('Testing toPrimitive_INT96 values', function () { + it('toPrimitive(UINT_96, 9999999999999999999999) is too large', function () { assert.throws(() => toPrimitive('INT_96', 9999999999999999999999)); }), - it('toPrimitive(UINT_96, -9999999999999999999999) is too small', () => { + it('toPrimitive(UINT_96, -9999999999999999999999) is too small', function () { assert.throws(() => toPrimitive('INT_96', -9999999999999999999999)); }), - it('toPrimitive(UINT_96, "asd12@!$1") is given gibberish and should throw', () => { + it('toPrimitive(UINT_96, "asd12@!$1") is given gibberish and should throw', function () { assert.throws(() => toPrimitive('INT_96', 'asd12@!$1')); }); }); - describe('toPrimitive ', () => { - const date = new Date(Date.parse('2022-12-01:00:00:01 GMT')); - + describe('toPrimitive ', function () { + // eslint-disable-next-line mocha/no-setup-in-describe ['TIME_MILLIS', 'TIME_MICROS', 'DATE', 'TIMESTAMP_MILLIS', 'TIMESTAMP_MICROS'].forEach((typeName) => { - it(`for type ${typeName} happy path`, () => { + it(`for type ${typeName} happy path`, function () { assert.equal(1234, toPrimitive(typeName, 1234)); assert.equal(1234, toPrimitive(typeName, '1234')); }); - it(`for type ${typeName} fails with negative values`, () => { + it(`for type ${typeName} fails with negative values`, function () { assert.throws(() => toPrimitive(typeName, '-1'), `${typeName} value is out of bounds: -1`); assert.throws(() => toPrimitive(typeName, -1), `${typeName} value is out of bounds: -1`); }); }); + // eslint-disable-next-line mocha/no-setup-in-describe ['DATE', 'TIMESTAMP_MILLIS', 'TIME_MILLIS'].forEach((typeName) => { - it(`${typeName} throws when number too large`, () => { + it(`${typeName} throws when number too large`, function () { assert.throws(() => toPrimitive(typeName, 9999999999999999999999), `${typeName} value is out of bounds: 1e+22`); assert.throws( () => toPrimitive(typeName, '9999999999999999999999'), @@ -186,15 +186,20 @@ describe('toPrimitive INT* should throw when given invalid value', () => { ); }); }); - it('DATE conversion works for DATE type', () => { + + it('DATE conversion works for DATE type', function () { + const date = new Date(Date.parse('2022-12-01:00:00:01 GMT')); assert.equal(toPrimitive('DATE', date), 19327.000011574073); }); - it('TIMESTAMP_MICROS works for a Date type and bigint', () => { + + it('TIMESTAMP_MICROS works for a Date type and bigint', function () { + const date = new Date(Date.parse('2022-12-01:00:00:01 GMT')); assert.equal(toPrimitive('TIMESTAMP_MICROS', date), 1669852801000000n); assert.equal(toPrimitive('TIMESTAMP_MICROS', '9999999999999999999999'), 9999999999999999999999n); assert.equal(toPrimitive('TIMESTAMP_MICROS', 98989898n), 98989898n); }); - it('TIME_MICROS works for a bigint', () => { + + it('TIME_MICROS works for a bigint', function () { const timestampBigint = 1932733334490741n; assert.equal(toPrimitive('TIME_MICROS', timestampBigint), 1932733334490741); assert.equal(toPrimitive('TIME_MICROS', '9999999999999999999999'), 9999999999999999999999n);