From c9160d375e21931c75e1fc821fb8ac25a8dd00c7 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Wed, 13 Nov 2024 13:11:06 -0500 Subject: [PATCH 01/13] test(NODE-6534): add spec test runner for Binary vector --- test/node/bson_binary_vector.spec.test.ts | 72 +++-------------------- 1 file changed, 7 insertions(+), 65 deletions(-) diff --git a/test/node/bson_binary_vector.spec.test.ts b/test/node/bson_binary_vector.spec.test.ts index aa40fb36..87f573ab 100644 --- a/test/node/bson_binary_vector.spec.test.ts +++ b/test/node/bson_binary_vector.spec.test.ts @@ -5,14 +5,6 @@ import { expect } from 'chai'; const { toHex, fromHex } = BSON.onDemand.ByteUtils; -const FLOAT = new Float64Array(1); -const FLOAT_BYTES = new Uint8Array(FLOAT.buffer, 0, 8); - -FLOAT[0] = -1; -// Little endian [0, 0, 0, 0, 0, 0, 240, 191] -// Big endian [191, 240, 0, 0, 0, 0, 0, 0] -const isBigEndian = FLOAT_BYTES[7] === 0; - type VectorHexType = '0x03' | '0x27' | '0x10'; type VectorTest = { description: string; @@ -24,36 +16,6 @@ type VectorTest = { }; type VectorSuite = { description: string; test_key: string; tests: VectorTest[] }; -function validateVector(vector: Binary): void { - const VECTOR_TYPE = Object.freeze({ - Int8: 0x03, - Float32: 0x27, - PackedBit: 0x10 - } as const); - - if (vector.sub_type !== 9) return; - - const size = vector.position; - const d_type = vector.buffer[0] ?? 0; - const padding = vector.buffer[1] ?? 0; - - if ((d_type === VECTOR_TYPE.Float32 || d_type === VECTOR_TYPE.Int8) && padding !== 0) { - throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors'); - } - - if (d_type === VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) { - throw new BSONError( - 'Invalid Vector: padding must be zero for packed bit vectors that are empty' - ); - } - - if (d_type === VECTOR_TYPE.PackedBit && padding > 7) { - throw new BSONError( - `Invalid Vector: padding must be a value between 0 and 7. found: ${padding}` - ); - } -} - function fixFloats(f: string | number): number { if (typeof f === 'number') { return f; @@ -90,31 +52,15 @@ function fixBits(f: number | string): number { function make(vector: (number | string)[], dtype_hex: VectorHexType, padding?: number): Binary { let binary: Binary; switch (dtype_hex) { - case '0x10': /* packed_bit */ - case '0x03': /* int8 */ { - const array = new Int8Array(vector.map(dtype_hex === '0x03' /* int8 */ ? fixInt8s : fixBits)); - const buffer = new Uint8Array(array.byteLength + 2); - buffer.set(new Uint8Array(array.buffer), 2); - binary = new Binary(buffer, 9); + case '0x10' /* packed_bit */: + binary = Binary.fromPackedBits(new Uint8Array(vector.map(fixBits)), padding); break; - } - - case '0x27': /* float32 */ { - const array = new Float32Array(vector.map(fixFloats)); - const buffer = new Uint8Array(array.byteLength + 2); - if (isBigEndian) { - for (let i = 0; i < array.length; i++) { - const bytes = new Uint8Array(array.buffer, i * 4, 4); - bytes.reverse(); - buffer.set(bytes, i * 4 + 2); - } - } else { - buffer.set(new Uint8Array(array.buffer), 2); - } - binary = new Binary(buffer, 9); + case '0x03' /* int8 */: + binary = Binary.fromInt8Array(new Int8Array(vector.map(fixInt8s))); + break; + case '0x27' /* float32 */: + binary = Binary.fromFloat32Array(new Float32Array(vector.map(fixFloats))); break; - } - default: throw new Error(`Unknown dtype_hex: ${dtype_hex}`); } @@ -206,8 +152,6 @@ describe('BSON Binary Vector spec tests', () => { try { const bin = make(test.vector, test.dtype_hex, test.padding); BSON.serialize({ bin }); - // TODO(NODE-6537): The following validation MUST be a part of serialize - validateVector(bin); } catch (error) { thrownError = error; } @@ -229,8 +173,6 @@ describe('BSON Binary Vector spec tests', () => { try { const bin = make(test.vector, test.dtype_hex, test.padding); BSON.EJSON.stringify({ bin }); - // TODO(NODE-6537): The following validation MUST be a part of stringify - validateVector(bin); } catch (error) { thrownError = error; } From af3f9cdd962e0b9fd048c2416a3656237ffebd17 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Fri, 15 Nov 2024 12:43:57 -0500 Subject: [PATCH 02/13] feat(NODE-6537): add support for binary vectors --- .evergreen/run-big-endian-test.sh | 23 ++- .github/docker/Dockerfile | 10 ++ src/binary.ts | 222 +++++++++++++++++++++++++ src/parser/serializer.ts | 4 + src/utils/number_utils.ts | 4 + test/node/binary.test.ts | 261 +++++++++++++++++++++++++++++- 6 files changed, 520 insertions(+), 4 deletions(-) create mode 100644 .github/docker/Dockerfile diff --git a/.evergreen/run-big-endian-test.sh b/.evergreen/run-big-endian-test.sh index a1cc8240..0b67133d 100644 --- a/.evergreen/run-big-endian-test.sh +++ b/.evergreen/run-big-endian-test.sh @@ -1,5 +1,22 @@ -#!/usr/bin/env bash +#! /usr/bin/env bash -source $DRIVERS_TOOLS/.evergreen/init-node-and-npm-env.sh +# At the time of writing. This script is not used in CI. +# but can be used to locally iterate on big endian bugs. +# buildx requires an output, so I put docs which should be a no-op. -npx mocha test/s390x/big_endian.test.ts +set -o errexit +set -o nounset +set -o pipefail +set -o xtrace + +# If you get an error you may have an outdated buildkit version +# Try running this: +# docker buildx rm builder && docker buildx create --name builder --bootstrap --use + +docker buildx build \ + --progress=plain \ + --platform linux/s390x \ + --build-arg="NODE_ARCH=s390x" \ + -f ./.github/docker/Dockerfile \ + --output type=local,dest=./docs,platform-split=false \ + . diff --git a/.github/docker/Dockerfile b/.github/docker/Dockerfile new file mode 100644 index 00000000..3ceb2a50 --- /dev/null +++ b/.github/docker/Dockerfile @@ -0,0 +1,10 @@ +FROM node:22 AS build + +WORKDIR /bson +COPY . . + +RUN rm -rf node_modules && npm install && npm test + +FROM scratch + +COPY --from=build /bson/docs/ / diff --git a/src/binary.ts b/src/binary.ts index d3b496c3..206c5c64 100644 --- a/src/binary.ts +++ b/src/binary.ts @@ -4,6 +4,7 @@ import { BSONError } from './error'; import { BSON_BINARY_SUBTYPE_UUID_NEW } from './constants'; import { ByteUtils } from './utils/byte_utils'; import { BSONValue } from './bson_value'; +import { NumberUtils } from './utils/number_utils'; /** @public */ export type BinarySequence = Uint8Array | number[]; @@ -58,9 +59,18 @@ export class Binary extends BSONValue { static readonly SUBTYPE_COLUMN = 7; /** Sensitive BSON type */ static readonly SUBTYPE_SENSITIVE = 8; + /** Vector BSON type */ + static readonly SUBTYPE_VECTOR = 9; /** User BSON type */ static readonly SUBTYPE_USER_DEFINED = 128; + /** d_type of a Binary Vector (subtype: 9) */ + static readonly VECTOR_TYPE = Object.freeze({ + Int8: 0x03, + Float32: 0x27, + PackedBit: 0x10 + } as const); + /** * The bytes of the Binary value. * @@ -238,6 +248,11 @@ export class Binary extends BSONValue { /** @internal */ toExtendedJSON(options?: EJSONOptions): BinaryExtendedLegacy | BinaryExtended { options = options || {}; + + if (this.sub_type === Binary.SUBTYPE_VECTOR) { + Binary.validateVector(this); + } + const base64String = ByteUtils.toBase64(this.buffer); const subType = Number(this.sub_type).toString(16); @@ -310,6 +325,213 @@ export class Binary extends BSONValue { const subTypeArg = inspect(this.sub_type, options); return `Binary.createFromBase64(${base64Arg}, ${subTypeArg})`; } + + /** + * If this Binary represents a Int8 Vector, + * returns a copy of the bytes in a new Int8Array. + */ + public toInt8Array(): Int8Array { + if (this.sub_type !== Binary.SUBTYPE_VECTOR) { + throw new BSONError('Binary sub_type is not Vector'); + } + + if (this.buffer[0] !== Binary.VECTOR_TYPE.Int8) { + throw new BSONError('Binary d_type field is not Int8'); + } + + return new Int8Array( + this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position) + ); + } + + /** + * If this Binary represents a Float32 Vector, + * returns a copy of the bytes in a new Float32Array. + */ + public toFloat32Array(): Float32Array { + if (this.sub_type !== Binary.SUBTYPE_VECTOR) { + throw new BSONError('Binary sub_type is not Vector'); + } + + if (this.buffer[0] !== Binary.VECTOR_TYPE.Float32) { + throw new BSONError('Binary d_type field is not Float32'); + } + + const floatBytes = new Uint8Array( + this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position) + ); + if (NumberUtils.isBigEndian) { + for (let i = 0; i < floatBytes.byteLength; i += 4) { + const byte0 = floatBytes[i]; + const byte1 = floatBytes[i + 1]; + const byte2 = floatBytes[i + 2]; + const byte3 = floatBytes[i + 3]; + floatBytes[i] = byte3; + floatBytes[i + 1] = byte2; + floatBytes[i + 2] = byte1; + floatBytes[i + 3] = byte0; + } + } + return new Float32Array(floatBytes.buffer); + } + + /** + * If this Binary represents packed bit Vector, + * returns a copy of the bytes that are packed bits. + * + * Use `toBits` to get the unpacked bits. + */ + public toPackedBits(): Uint8Array { + if (this.sub_type !== Binary.SUBTYPE_VECTOR) { + throw new BSONError('Binary sub_type is not Vector'); + } + + if (this.buffer[0] !== Binary.VECTOR_TYPE.PackedBit) { + throw new BSONError('Binary d_type field is not packed bit'); + } + + return new Uint8Array( + this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position) + ); + } + + /** + * If this Binary represents a Packed bit Vector, + * returns a copy of the bit unpacked into a new Int8Array. + */ + public toBits(): Int8Array { + if (this.sub_type !== Binary.SUBTYPE_VECTOR) { + throw new BSONError('Binary sub_type is not Vector'); + } + + if (this.buffer[0] !== Binary.VECTOR_TYPE.PackedBit) { + throw new BSONError('Binary d_type field is not packed bit'); + } + + const byteCount = this.length() - 2; + const bitCount = byteCount * 8 - this.buffer[1]; + const bits = new Int8Array(bitCount); + + for (let bitOffset = 0; bitOffset < bits.length; bitOffset++) { + const byteOffset = (bitOffset / 8) | 0; + const byte = this.buffer[byteOffset + 2]; + const shift = 7 - (bitOffset % 8); + const bit = (byte >> shift) & 1; + bits[bitOffset] = bit; + } + + return bits; + } + + /** + * Constructs a Binary representing an Int8 Vector. + * @param array - The array to store as a view on the Binary class + */ + public static fromInt8Array(array: Int8Array): Binary { + const buffer = ByteUtils.allocate(array.byteLength + 2); + buffer[0] = Binary.VECTOR_TYPE.Int8; + buffer[1] = 0; + const intBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength); + buffer.set(intBytes, 2); + return new this(buffer, this.SUBTYPE_VECTOR); + } + + /** Constructs a Binary representing an Float32 Vector. */ + public static fromFloat32Array(array: Float32Array): Binary { + const binaryBytes = ByteUtils.allocate(array.byteLength + 2); + binaryBytes[0] = Binary.VECTOR_TYPE.Float32; + binaryBytes[1] = 0; + + const floatBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength); + binaryBytes.set(floatBytes, 2); + + if (NumberUtils.isBigEndian) { + for (let i = 2; i < binaryBytes.byteLength; i += 4) { + const byte0 = binaryBytes[i]; + const byte1 = binaryBytes[i + 1]; + const byte2 = binaryBytes[i + 2]; + const byte3 = binaryBytes[i + 3]; + binaryBytes[i] = byte3; + binaryBytes[i + 1] = byte2; + binaryBytes[i + 2] = byte1; + binaryBytes[i + 3] = byte0; + } + } + + return new this(binaryBytes, this.SUBTYPE_VECTOR); + } + + /** + * Constructs a Binary representing a packed bit Vector. + * + * Use `fromBits` to pack an array of 1s and 0s. + */ + public static fromPackedBits(array: Uint8Array, padding = 0): Binary { + const buffer = ByteUtils.allocate(array.byteLength + 2); + buffer[0] = Binary.VECTOR_TYPE.PackedBit; + buffer[1] = padding; + buffer.set(array, 2); + return new this(buffer, this.SUBTYPE_VECTOR); + } + + /** + * Constructs a Binary representing an Packed Bit Vector. + * @param array - The array of 1s and 0s to pack into the Binary instance + */ + public static fromBits(bits: ArrayLike): Binary { + const byteLength = Math.ceil(bits.length / 8); + const bytes = new Uint8Array(byteLength + 2); + bytes[0] = Binary.VECTOR_TYPE.PackedBit; + + const remainder = bits.length % 8; + bytes[1] = remainder === 0 ? 0 : 8 - remainder; + + for (let bitOffset = 0; bitOffset < bits.length; bitOffset++) { + const byteOffset = Math.floor(bitOffset / 8); + const bit = bits[bitOffset]; + + if (bit !== 0 && bit !== 1) { + throw new BSONError( + `Invalid bit value at ${bitOffset}: must be 0 or 1, found ${bits[bitOffset]}` + ); + } + + if (bit === 0) continue; + + const shift = 7 - (bitOffset % 8); + bytes[byteOffset + 2] |= bit << shift; + } + + return new this(bytes, Binary.SUBTYPE_VECTOR); + } + + /** @internal */ + static validateVector(vector: Binary): void { + if (vector.sub_type !== this.SUBTYPE_VECTOR) return; + + const size = vector.position; + const d_type = vector.buffer[0]; + const padding = vector.buffer[1]; + + if ( + (d_type === this.VECTOR_TYPE.Float32 || d_type === this.VECTOR_TYPE.Int8) && + padding !== 0 + ) { + throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors'); + } + + if (d_type === this.VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) { + throw new BSONError( + 'Invalid Vector: padding must be zero for packed bit vectors that are empty' + ); + } + + if (d_type === this.VECTOR_TYPE.PackedBit && padding > 7) { + throw new BSONError( + `Invalid Vector: padding must be a value between 0 and 7. found: ${padding}` + ); + } + } } /** @public */ diff --git a/src/parser/serializer.ts b/src/parser/serializer.ts index fbb47245..d304eded 100644 --- a/src/parser/serializer.ts +++ b/src/parser/serializer.ts @@ -495,6 +495,10 @@ function serializeBinary(buffer: Uint8Array, key: string, value: Binary, index: index += NumberUtils.setInt32LE(buffer, index, size); } + if (value.sub_type === Binary.SUBTYPE_VECTOR) { + Binary.validateVector(value); + } + if (size <= 16) { for (let i = 0; i < size; i++) buffer[index + i] = data[i]; } else { diff --git a/src/utils/number_utils.ts b/src/utils/number_utils.ts index 32f6f5cc..02f4dbeb 100644 --- a/src/utils/number_utils.ts +++ b/src/utils/number_utils.ts @@ -13,6 +13,8 @@ const isBigEndian = FLOAT_BYTES[7] === 0; * A collection of functions that get or set various numeric types and bit widths from a Uint8Array. */ export type NumberUtils = { + /** Is true if the current system is big endian. */ + isBigEndian: boolean; /** * Parses a signed int32 at offset. Throws a `RangeError` if value is negative. */ @@ -35,6 +37,8 @@ export type NumberUtils = { * @public */ export const NumberUtils: NumberUtils = { + isBigEndian, + getNonnegativeInt32LE(source: Uint8Array, offset: number): number { if (source[offset + 3] > 127) { throw new RangeError(`Size cannot be negative at offset: ${offset}`); diff --git a/test/node/binary.test.ts b/test/node/binary.test.ts index 1f94a619..8c09d3c5 100644 --- a/test/node/binary.test.ts +++ b/test/node/binary.test.ts @@ -1,6 +1,6 @@ import { expect } from 'chai'; import * as vm from 'node:vm'; -import { __isWeb__, Binary, BSON } from '../register-bson'; +import { __isWeb__, Binary, BSON, BSONError } from '../register-bson'; import * as util from 'node:util'; describe('class Binary', () => { @@ -249,4 +249,263 @@ describe('class Binary', () => { expect(roundTrippedBin.bin.toJSON()).to.equal(bin.toJSON()); }); }); + + describe('sub_type vector', () => { + describe('d_type constants', () => { + it('has Int8, Float32 and PackedBit', () => { + expect(Binary.VECTOR_TYPE).to.have.property('Int8', 0x03); + expect(Binary.VECTOR_TYPE).to.have.property('Float32', 0x27); + expect(Binary.VECTOR_TYPE).to.have.property('PackedBit', 0x10); + }); + }); + + describe('toInt8Array()', () => { + it('returns a copy of the bytes', function () { + const binary = Binary.fromInt8Array(new Int8Array([1, 2, 3])); + expect(binary.toInt8Array().buffer).to.not.equal(binary.buffer.buffer); + }); + + it('returns at the correct offset when ArrayBuffer is much larger than content', function () { + const space = new ArrayBuffer(400); + const view = new Uint8Array(space, 56, 4); // random view in a much larger buffer + const binary = new Binary(view, 9); + binary.buffer[0] = Binary.VECTOR_TYPE.Int8; + binary.buffer[1] = 0; + binary.buffer[2] = 255; + binary.buffer[3] = 255; + expect(binary.toInt8Array()).to.deep.equal(new Int8Array([-1, -1])); + }); + + it('returns Int8Array when sub_type is vector and d_type is INT8', () => { + const int8Array = new Int8Array([1, 2, 3]); + const binary = Binary.fromInt8Array(int8Array); + expect(binary.toInt8Array()).to.deep.equal(int8Array); + }); + + it('throws error when sub_type is not vector', () => { + const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY); + expect(() => binary.toInt8Array()).to.throw(BSONError, 'Binary sub_type is not Vector'); + }); + + it('throws error when d_type is not INT8', () => { + const binary = new Binary( + new Uint8Array([Binary.VECTOR_TYPE.Float32, 0, 1, 2, 3]), + Binary.SUBTYPE_VECTOR + ); + expect(() => binary.toInt8Array()).to.throw(BSONError, 'Binary d_type field is not Int8'); + }); + }); + + describe('toFloat32Array()', () => { + it('returns a copy of the bytes', function () { + const binary = Binary.fromFloat32Array(new Float32Array([1.1, 2.2, 3.3])); + expect(binary.toFloat32Array().buffer).to.not.equal(binary.buffer.buffer); + }); + + it('returns at the correct offset when ArrayBuffer is much larger than content', function () { + const space = new ArrayBuffer(400); + const view = new Uint8Array(space, 56, 6); // random view in a much larger buffer + const binary = new Binary(view, 9); + binary.buffer[0] = Binary.VECTOR_TYPE.Float32; + binary.buffer[1] = 0; + // For reference: + // [ 0, 0, 128, 191 ] is -1 in little endian + binary.buffer[2] = 0; + binary.buffer[3] = 0; + binary.buffer[4] = 128; + binary.buffer[5] = 191; + expect(binary.toFloat32Array()).to.deep.equal(new Float32Array([-1])); + }); + + it('returns Float32Array when sub_type is vector and d_type is FLOAT32', () => { + const float32Array = new Float32Array([1.1, 2.2, 3.3]); + const binary = Binary.fromFloat32Array(float32Array); + expect(binary.toFloat32Array()).to.deep.equal(float32Array); + }); + + it('throws error when sub_type is not vector', () => { + const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY); + expect(() => binary.toFloat32Array()).to.throw(BSONError, 'Binary sub_type is not Vector'); + }); + + it('throws error when d_type is not FLOAT32', () => { + const binary = new Binary( + new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]), + Binary.SUBTYPE_VECTOR + ); + expect(() => binary.toFloat32Array()).to.throw( + BSONError, + 'Binary d_type field is not Float32' + ); + }); + + it('transforms endianness correctly', () => { + // The expectation is that this test is run on LE and BE machines to + // demonstrate that on BE machines we get the same result + const float32Vector = new Uint8Array([ + ...[Binary.VECTOR_TYPE.Float32, 0], // d_type, padding + ...[0, 0, 128, 191], // -1 + ...[0, 0, 128, 191] // -1 + ]); + const binary = new Binary(float32Vector, Binary.SUBTYPE_VECTOR); + + // For reference: + // [ 0, 0, 128, 191 ] is -1 in little endian + // [ 191, 128, 0, 0 ] is -1 in big endian + // REGARDLESS of platform, BSON is ALWAYS little endian + expect(binary.toFloat32Array()).to.deep.equal(new Float32Array([-1, -1])); + }); + }); + + describe('toBits()', () => { + it('returns Int8Array of bits when sub_type is vector and d_type is PACKED_BIT', () => { + const bits = new Int8Array([1, 0, 1, 1, 0, 0, 1, 0]); + const binary = Binary.fromBits(bits); + expect(binary.toBits()).to.deep.equal(bits); + }); + + it('returns at the correct offset when ArrayBuffer is much larger than content', function () { + const space = new ArrayBuffer(400); + const view = new Uint8Array(space, 56, 3); // random view in a much larger buffer + const binary = new Binary(view, 9); + binary.buffer[0] = Binary.VECTOR_TYPE.PackedBit; + binary.buffer[1] = 4; + binary.buffer[2] = 0xf0; + expect(binary.toBits()).to.deep.equal(new Int8Array([1, 1, 1, 1])); + }); + + it('throws error when sub_type is not vector', () => { + const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY); + expect(() => binary.toBits()).to.throw(BSONError, 'Binary sub_type is not Vector'); + }); + + it('throws error when d_type is not PACKED_BIT', () => { + const binary = new Binary( + new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]), + Binary.SUBTYPE_VECTOR + ); + expect(() => binary.toBits()).to.throw(BSONError, 'Binary d_type field is not packed bit'); + }); + }); + + describe('toPackedBits()', () => { + it('returns Uint8Array of packed bits when sub_type is vector and d_type is PACKED_BIT', () => { + const bits = new Uint8Array([127, 8]); + const binary = Binary.fromPackedBits(bits, 3); + expect(binary.toPackedBits()).to.deep.equal(bits); + expect(binary.toBits()).to.deep.equal( + new Int8Array([0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1]) + ); + }); + + it('returns at the correct offset when ArrayBuffer is much larger than content', function () { + const space = new ArrayBuffer(400); + const view = new Uint8Array(space, 56, 3); // random view in a much larger buffer + const binary = new Binary(view, 9); + binary.buffer[0] = Binary.VECTOR_TYPE.PackedBit; + binary.buffer[1] = 4; + binary.buffer[2] = 0xf0; + expect(binary.toPackedBits()).to.deep.equal(new Uint8Array([0xf0])); + }); + + it('throws error when sub_type is not vector', () => { + const binary = new Binary(new Uint8Array([1, 2, 3]), Binary.SUBTYPE_BYTE_ARRAY); + expect(() => binary.toPackedBits()).to.throw(BSONError, 'Binary sub_type is not Vector'); + }); + + it('throws error when d_type is not PACKED_BIT', () => { + const binary = new Binary( + new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]), + Binary.SUBTYPE_VECTOR + ); + expect(() => binary.toPackedBits()).to.throw( + BSONError, + 'Binary d_type field is not packed bit' + ); + }); + }); + + describe('fromInt8Array()', () => { + it('creates Binary instance from Int8Array', () => { + const int8Array = new Int8Array([1, 2, 3]); + const binary = Binary.fromInt8Array(int8Array); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Int8); + expect(binary.toInt8Array()).to.deep.equal(int8Array); + }); + + it('creates empty Binary instance when Int8Array is empty', () => { + const binary = Binary.fromInt8Array(new Int8Array(0)); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Int8); + expect(binary.buffer[1]).to.equal(0); + expect(binary.toInt8Array()).to.deep.equal(new Int8Array(0)); + }); + }); + + describe('fromFloat32Array()', () => { + it('creates Binary instance from Float32Array', () => { + const float32Array = new Float32Array([1.1, 2.2, 3.3]); + const binary = Binary.fromFloat32Array(float32Array); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Float32); + expect(binary.toFloat32Array()).to.deep.equal(float32Array); + }); + + it('creates empty Binary instance when Float32Array is empty', () => { + const binary = Binary.fromFloat32Array(new Float32Array(0)); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Float32); + expect(binary.buffer[1]).to.equal(0); + expect(binary.toFloat32Array()).to.deep.equal(new Float32Array(0)); + }); + + it('transforms endianness correctly', () => { + // The expectation is that this test is run on LE and BE machines to + // demonstrate that on BE machines we get the same result + const float32Array = new Float32Array([-1, -1]); + const binary = Binary.fromFloat32Array(float32Array); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.Float32); + expect(binary.buffer[1]).to.equal(0); + + // For reference: + // [ 0, 0, 128, 191 ] is -1 in little endian + // [ 191, 128, 0, 0 ] is -1 in big endian + // REGARDLESS of platform, BSON is ALWAYS little endian + expect(Array.from(binary.buffer.subarray(2))).to.deep.equal([ + ...[0, 0, 128, 191], // -1 + ...[0, 0, 128, 191] // -1 + ]); + }); + }); + + describe('fromPackedBits()', () => { + it('creates Binary instance from packed bits', () => { + const bits = new Uint8Array([127, 8]); + const binary = Binary.fromPackedBits(bits, 3); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit); + expect(binary.buffer[1]).to.equal(3); + expect(binary.buffer.subarray(2)).to.deep.equal(bits); + }); + + it('creates empty Binary instance when bits are empty', () => { + const binary = Binary.fromBits(new Int8Array(0)); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit); + expect(binary.buffer[1]).to.equal(0); + expect(binary.toBits()).to.deep.equal(new Int8Array(0)); + }); + }); + + describe('fromBits()', () => { + it('creates Binary instance from bits', () => { + const bits = new Int8Array([1, 0, 1, 1, 0, 0, 1, 0]); + const binary = Binary.fromBits(bits); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit); + expect(binary.toBits()).to.deep.equal(bits); + }); + + it('creates empty Binary instance when bits are empty', () => { + const binary = Binary.fromBits(new Int8Array(0)); + expect(binary.buffer[0]).to.equal(Binary.VECTOR_TYPE.PackedBit); + expect(binary.buffer[1]).to.equal(0); + expect(binary.toBits()).to.deep.equal(new Int8Array(0)); + }); + }); + }); }); From 384dd929821cf41192636a3a37ef8c98d3d7f1f1 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 18 Nov 2024 12:00:03 -0500 Subject: [PATCH 03/13] test: check for error when bits are not 1 or 0 --- test/node/binary.test.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/node/binary.test.ts b/test/node/binary.test.ts index 8c09d3c5..6ba22632 100644 --- a/test/node/binary.test.ts +++ b/test/node/binary.test.ts @@ -506,6 +506,10 @@ describe('class Binary', () => { expect(binary.buffer[1]).to.equal(0); expect(binary.toBits()).to.deep.equal(new Int8Array(0)); }); + + it('throws when values are not 1 or 0', () => { + expect(() => Binary.fromBits([1, 0, 2])).to.throw(BSONError, /must be 0 or 1/); + }); }); }); }); From 2edc855aa4f932de9f2331c593e4e08f3a245b6f Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 18 Nov 2024 13:00:16 -0500 Subject: [PATCH 04/13] docs: mention errors thrown --- src/binary.ts | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/binary.ts b/src/binary.ts index 206c5c64..a6a38621 100644 --- a/src/binary.ts +++ b/src/binary.ts @@ -327,8 +327,10 @@ export class Binary extends BSONValue { } /** - * If this Binary represents a Int8 Vector, + * If this Binary represents a Int8 Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.Int8`), * returns a copy of the bytes in a new Int8Array. + * + * If the Binary is not a Vector, or the d_type is not Int8, an error is thrown. */ public toInt8Array(): Int8Array { if (this.sub_type !== Binary.SUBTYPE_VECTOR) { @@ -345,8 +347,10 @@ export class Binary extends BSONValue { } /** - * If this Binary represents a Float32 Vector, + * If this Binary represents a Float32 Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.Float32`), * returns a copy of the bytes in a new Float32Array. + * + * If the Binary is not a Vector, or the d_type is not Float32, an error is thrown. */ public toFloat32Array(): Float32Array { if (this.sub_type !== Binary.SUBTYPE_VECTOR) { @@ -376,10 +380,12 @@ export class Binary extends BSONValue { } /** - * If this Binary represents packed bit Vector, + * If this Binary represents packed bit Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.PackedBit`), * returns a copy of the bytes that are packed bits. * * Use `toBits` to get the unpacked bits. + * + * If the Binary is not a Vector, or the d_type is not PackedBit, an error is thrown. */ public toPackedBits(): Uint8Array { if (this.sub_type !== Binary.SUBTYPE_VECTOR) { @@ -396,8 +402,12 @@ export class Binary extends BSONValue { } /** - * If this Binary represents a Packed bit Vector, + * If this Binary represents a Packed bit Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.PackedBit`), * returns a copy of the bit unpacked into a new Int8Array. + * + * Use `toPackedBits` to get the bits still in packed form. + * + * If the Binary is not a Vector, or the d_type is not PackedBit, an error is thrown. */ public toBits(): Int8Array { if (this.sub_type !== Binary.SUBTYPE_VECTOR) { From efa2ea4a69255d742dbc660fbca76b8dbc3d32d7 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 18 Nov 2024 13:01:56 -0500 Subject: [PATCH 05/13] perf: remove Math --- src/binary.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/binary.ts b/src/binary.ts index a6a38621..76c84b02 100644 --- a/src/binary.ts +++ b/src/binary.ts @@ -489,7 +489,7 @@ export class Binary extends BSONValue { * @param array - The array of 1s and 0s to pack into the Binary instance */ public static fromBits(bits: ArrayLike): Binary { - const byteLength = Math.ceil(bits.length / 8); + const byteLength = (bits.length + 7) >>> 3; // ceil(bits.length / 8) const bytes = new Uint8Array(byteLength + 2); bytes[0] = Binary.VECTOR_TYPE.PackedBit; @@ -497,7 +497,7 @@ export class Binary extends BSONValue { bytes[1] = remainder === 0 ? 0 : 8 - remainder; for (let bitOffset = 0; bitOffset < bits.length; bitOffset++) { - const byteOffset = Math.floor(bitOffset / 8); + const byteOffset = bitOffset >>> 3; // floor(bitOffset / 8) const bit = bits[bitOffset]; if (bit !== 0 && bit !== 1) { From 1b73b0bd062472e2165684ebc25ed0052d46cee2 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 18 Nov 2024 13:08:04 -0500 Subject: [PATCH 06/13] docs: capture d_type and padding validation in comments --- src/binary.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/binary.ts b/src/binary.ts index 76c84b02..cb19d16a 100644 --- a/src/binary.ts +++ b/src/binary.ts @@ -520,8 +520,15 @@ export class Binary extends BSONValue { if (vector.sub_type !== this.SUBTYPE_VECTOR) return; const size = vector.position; + + // NOTE: Validation is only applied to **KNOWN** vector types + // If a new d_type is introduced, a future version of the library will need to add validation const d_type = vector.buffer[0]; - const padding = vector.buffer[1]; + + // NOTE: We do not enable noUncheckedIndexedAccess so TS believes this is always number + // a Binary vector may be empty, in which case the padding is undefined + // this possible value is tolerable for our validation checks + const padding: number | undefined = vector.buffer[1]; if ( (d_type === this.VECTOR_TYPE.Float32 || d_type === this.VECTOR_TYPE.Int8) && From 1b3680906a4796d0595687374f34e0abfa264592 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 18 Nov 2024 13:09:16 -0500 Subject: [PATCH 07/13] chore: use "datatype" --- src/binary.ts | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/binary.ts b/src/binary.ts index cb19d16a..c234a655 100644 --- a/src/binary.ts +++ b/src/binary.ts @@ -64,7 +64,7 @@ export class Binary extends BSONValue { /** User BSON type */ static readonly SUBTYPE_USER_DEFINED = 128; - /** d_type of a Binary Vector (subtype: 9) */ + /** datatype of a Binary Vector (subtype: 9) */ static readonly VECTOR_TYPE = Object.freeze({ Int8: 0x03, Float32: 0x27, @@ -330,7 +330,7 @@ export class Binary extends BSONValue { * If this Binary represents a Int8 Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.Int8`), * returns a copy of the bytes in a new Int8Array. * - * If the Binary is not a Vector, or the d_type is not Int8, an error is thrown. + * If the Binary is not a Vector, or the datatype is not Int8, an error is thrown. */ public toInt8Array(): Int8Array { if (this.sub_type !== Binary.SUBTYPE_VECTOR) { @@ -338,7 +338,7 @@ export class Binary extends BSONValue { } if (this.buffer[0] !== Binary.VECTOR_TYPE.Int8) { - throw new BSONError('Binary d_type field is not Int8'); + throw new BSONError('Binary datatype field is not Int8'); } return new Int8Array( @@ -350,7 +350,7 @@ export class Binary extends BSONValue { * If this Binary represents a Float32 Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.Float32`), * returns a copy of the bytes in a new Float32Array. * - * If the Binary is not a Vector, or the d_type is not Float32, an error is thrown. + * If the Binary is not a Vector, or the datatype is not Float32, an error is thrown. */ public toFloat32Array(): Float32Array { if (this.sub_type !== Binary.SUBTYPE_VECTOR) { @@ -358,7 +358,7 @@ export class Binary extends BSONValue { } if (this.buffer[0] !== Binary.VECTOR_TYPE.Float32) { - throw new BSONError('Binary d_type field is not Float32'); + throw new BSONError('Binary datatype field is not Float32'); } const floatBytes = new Uint8Array( @@ -385,7 +385,7 @@ export class Binary extends BSONValue { * * Use `toBits` to get the unpacked bits. * - * If the Binary is not a Vector, or the d_type is not PackedBit, an error is thrown. + * If the Binary is not a Vector, or the datatype is not PackedBit, an error is thrown. */ public toPackedBits(): Uint8Array { if (this.sub_type !== Binary.SUBTYPE_VECTOR) { @@ -393,7 +393,7 @@ export class Binary extends BSONValue { } if (this.buffer[0] !== Binary.VECTOR_TYPE.PackedBit) { - throw new BSONError('Binary d_type field is not packed bit'); + throw new BSONError('Binary datatype field is not packed bit'); } return new Uint8Array( @@ -407,7 +407,7 @@ export class Binary extends BSONValue { * * Use `toPackedBits` to get the bits still in packed form. * - * If the Binary is not a Vector, or the d_type is not PackedBit, an error is thrown. + * If the Binary is not a Vector, or the datatype is not PackedBit, an error is thrown. */ public toBits(): Int8Array { if (this.sub_type !== Binary.SUBTYPE_VECTOR) { @@ -415,7 +415,7 @@ export class Binary extends BSONValue { } if (this.buffer[0] !== Binary.VECTOR_TYPE.PackedBit) { - throw new BSONError('Binary d_type field is not packed bit'); + throw new BSONError('Binary datatype field is not packed bit'); } const byteCount = this.length() - 2; @@ -522,8 +522,8 @@ export class Binary extends BSONValue { const size = vector.position; // NOTE: Validation is only applied to **KNOWN** vector types - // If a new d_type is introduced, a future version of the library will need to add validation - const d_type = vector.buffer[0]; + // If a new datatype is introduced, a future version of the library will need to add validation + const datatype = vector.buffer[0]; // NOTE: We do not enable noUncheckedIndexedAccess so TS believes this is always number // a Binary vector may be empty, in which case the padding is undefined @@ -531,19 +531,19 @@ export class Binary extends BSONValue { const padding: number | undefined = vector.buffer[1]; if ( - (d_type === this.VECTOR_TYPE.Float32 || d_type === this.VECTOR_TYPE.Int8) && + (datatype === this.VECTOR_TYPE.Float32 || datatype === this.VECTOR_TYPE.Int8) && padding !== 0 ) { throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors'); } - if (d_type === this.VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) { + if (datatype === this.VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) { throw new BSONError( 'Invalid Vector: padding must be zero for packed bit vectors that are empty' ); } - if (d_type === this.VECTOR_TYPE.PackedBit && padding > 7) { + if (datatype === this.VECTOR_TYPE.PackedBit && padding > 7) { throw new BSONError( `Invalid Vector: padding must be a value between 0 and 7. found: ${padding}` ); From 6a9e098e1443a14d292bbc2164f0083153863736 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 18 Nov 2024 13:17:42 -0500 Subject: [PATCH 08/13] chore: reduce diff --- .evergreen/run-big-endian-test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.evergreen/run-big-endian-test.sh b/.evergreen/run-big-endian-test.sh index 0b67133d..be4fbfab 100644 --- a/.evergreen/run-big-endian-test.sh +++ b/.evergreen/run-big-endian-test.sh @@ -1,4 +1,4 @@ -#! /usr/bin/env bash +#!/usr/bin/env bash # At the time of writing. This script is not used in CI. # but can be used to locally iterate on big endian bugs. From 1dc36adc693cae95fc084da48ed74f469edf0899 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 18 Nov 2024 13:27:27 -0500 Subject: [PATCH 09/13] refactor: moved validation to free function --- src/binary.ts | 55 ++++++++++++++++++++-------------------- src/parser/serializer.ts | 4 +-- 2 files changed, 29 insertions(+), 30 deletions(-) diff --git a/src/binary.ts b/src/binary.ts index c234a655..09bb11e1 100644 --- a/src/binary.ts +++ b/src/binary.ts @@ -250,7 +250,7 @@ export class Binary extends BSONValue { options = options || {}; if (this.sub_type === Binary.SUBTYPE_VECTOR) { - Binary.validateVector(this); + validateBinaryVector(this); } const base64String = ByteUtils.toBase64(this.buffer); @@ -514,40 +514,39 @@ export class Binary extends BSONValue { return new this(bytes, Binary.SUBTYPE_VECTOR); } +} - /** @internal */ - static validateVector(vector: Binary): void { - if (vector.sub_type !== this.SUBTYPE_VECTOR) return; +export function validateBinaryVector(vector: Binary): void { + if (vector.sub_type !== Binary.SUBTYPE_VECTOR) return; - const size = vector.position; + const size = vector.position; - // NOTE: Validation is only applied to **KNOWN** vector types - // If a new datatype is introduced, a future version of the library will need to add validation - const datatype = vector.buffer[0]; + // NOTE: Validation is only applied to **KNOWN** vector types + // If a new datatype is introduced, a future version of the library will need to add validation + const datatype = vector.buffer[0]; - // NOTE: We do not enable noUncheckedIndexedAccess so TS believes this is always number - // a Binary vector may be empty, in which case the padding is undefined - // this possible value is tolerable for our validation checks - const padding: number | undefined = vector.buffer[1]; + // NOTE: We do not enable noUncheckedIndexedAccess so TS believes this is always number + // a Binary vector may be empty, in which case the padding is undefined + // this possible value is tolerable for our validation checks + const padding: number | undefined = vector.buffer[1]; - if ( - (datatype === this.VECTOR_TYPE.Float32 || datatype === this.VECTOR_TYPE.Int8) && - padding !== 0 - ) { - throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors'); - } + if ( + (datatype === Binary.VECTOR_TYPE.Float32 || datatype === Binary.VECTOR_TYPE.Int8) && + padding !== 0 + ) { + throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors'); + } - if (datatype === this.VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) { - throw new BSONError( - 'Invalid Vector: padding must be zero for packed bit vectors that are empty' - ); - } + if (datatype === Binary.VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) { + throw new BSONError( + 'Invalid Vector: padding must be zero for packed bit vectors that are empty' + ); + } - if (datatype === this.VECTOR_TYPE.PackedBit && padding > 7) { - throw new BSONError( - `Invalid Vector: padding must be a value between 0 and 7. found: ${padding}` - ); - } + if (datatype === Binary.VECTOR_TYPE.PackedBit && padding > 7) { + throw new BSONError( + `Invalid Vector: padding must be a value between 0 and 7. found: ${padding}` + ); } } diff --git a/src/parser/serializer.ts b/src/parser/serializer.ts index d304eded..787bfa8a 100644 --- a/src/parser/serializer.ts +++ b/src/parser/serializer.ts @@ -1,4 +1,4 @@ -import { Binary } from '../binary'; +import { Binary, validateBinaryVector } from '../binary'; import type { BSONSymbol, DBRef, Document, MaxKey } from '../bson'; import type { Code } from '../code'; import * as constants from '../constants'; @@ -496,7 +496,7 @@ function serializeBinary(buffer: Uint8Array, key: string, value: Binary, index: } if (value.sub_type === Binary.SUBTYPE_VECTOR) { - Binary.validateVector(value); + validateBinaryVector(value); } if (size <= 16) { From e8d6062d9101fc07f0d546cd163330f82d52dab5 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 18 Nov 2024 13:28:57 -0500 Subject: [PATCH 10/13] test: d_type -> datatype --- test/node/binary.test.ts | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/test/node/binary.test.ts b/test/node/binary.test.ts index 6ba22632..20ab1cf0 100644 --- a/test/node/binary.test.ts +++ b/test/node/binary.test.ts @@ -251,7 +251,7 @@ describe('class Binary', () => { }); describe('sub_type vector', () => { - describe('d_type constants', () => { + describe('datatype constants', () => { it('has Int8, Float32 and PackedBit', () => { expect(Binary.VECTOR_TYPE).to.have.property('Int8', 0x03); expect(Binary.VECTOR_TYPE).to.have.property('Float32', 0x27); @@ -276,7 +276,7 @@ describe('class Binary', () => { expect(binary.toInt8Array()).to.deep.equal(new Int8Array([-1, -1])); }); - it('returns Int8Array when sub_type is vector and d_type is INT8', () => { + it('returns Int8Array when sub_type is vector and datatype is INT8', () => { const int8Array = new Int8Array([1, 2, 3]); const binary = Binary.fromInt8Array(int8Array); expect(binary.toInt8Array()).to.deep.equal(int8Array); @@ -287,12 +287,12 @@ describe('class Binary', () => { expect(() => binary.toInt8Array()).to.throw(BSONError, 'Binary sub_type is not Vector'); }); - it('throws error when d_type is not INT8', () => { + it('throws error when datatype is not INT8', () => { const binary = new Binary( new Uint8Array([Binary.VECTOR_TYPE.Float32, 0, 1, 2, 3]), Binary.SUBTYPE_VECTOR ); - expect(() => binary.toInt8Array()).to.throw(BSONError, 'Binary d_type field is not Int8'); + expect(() => binary.toInt8Array()).to.throw(BSONError, 'Binary datatype field is not Int8'); }); }); @@ -317,7 +317,7 @@ describe('class Binary', () => { expect(binary.toFloat32Array()).to.deep.equal(new Float32Array([-1])); }); - it('returns Float32Array when sub_type is vector and d_type is FLOAT32', () => { + it('returns Float32Array when sub_type is vector and datatype is FLOAT32', () => { const float32Array = new Float32Array([1.1, 2.2, 3.3]); const binary = Binary.fromFloat32Array(float32Array); expect(binary.toFloat32Array()).to.deep.equal(float32Array); @@ -328,14 +328,14 @@ describe('class Binary', () => { expect(() => binary.toFloat32Array()).to.throw(BSONError, 'Binary sub_type is not Vector'); }); - it('throws error when d_type is not FLOAT32', () => { + it('throws error when datatype is not FLOAT32', () => { const binary = new Binary( new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]), Binary.SUBTYPE_VECTOR ); expect(() => binary.toFloat32Array()).to.throw( BSONError, - 'Binary d_type field is not Float32' + 'Binary datatype field is not Float32' ); }); @@ -343,7 +343,7 @@ describe('class Binary', () => { // The expectation is that this test is run on LE and BE machines to // demonstrate that on BE machines we get the same result const float32Vector = new Uint8Array([ - ...[Binary.VECTOR_TYPE.Float32, 0], // d_type, padding + ...[Binary.VECTOR_TYPE.Float32, 0], // datatype, padding ...[0, 0, 128, 191], // -1 ...[0, 0, 128, 191] // -1 ]); @@ -358,7 +358,7 @@ describe('class Binary', () => { }); describe('toBits()', () => { - it('returns Int8Array of bits when sub_type is vector and d_type is PACKED_BIT', () => { + it('returns Int8Array of bits when sub_type is vector and datatype is PACKED_BIT', () => { const bits = new Int8Array([1, 0, 1, 1, 0, 0, 1, 0]); const binary = Binary.fromBits(bits); expect(binary.toBits()).to.deep.equal(bits); @@ -379,17 +379,20 @@ describe('class Binary', () => { expect(() => binary.toBits()).to.throw(BSONError, 'Binary sub_type is not Vector'); }); - it('throws error when d_type is not PACKED_BIT', () => { + it('throws error when datatype is not PACKED_BIT', () => { const binary = new Binary( new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]), Binary.SUBTYPE_VECTOR ); - expect(() => binary.toBits()).to.throw(BSONError, 'Binary d_type field is not packed bit'); + expect(() => binary.toBits()).to.throw( + BSONError, + 'Binary datatype field is not packed bit' + ); }); }); describe('toPackedBits()', () => { - it('returns Uint8Array of packed bits when sub_type is vector and d_type is PACKED_BIT', () => { + it('returns Uint8Array of packed bits when sub_type is vector and datatype is PACKED_BIT', () => { const bits = new Uint8Array([127, 8]); const binary = Binary.fromPackedBits(bits, 3); expect(binary.toPackedBits()).to.deep.equal(bits); @@ -413,14 +416,14 @@ describe('class Binary', () => { expect(() => binary.toPackedBits()).to.throw(BSONError, 'Binary sub_type is not Vector'); }); - it('throws error when d_type is not PACKED_BIT', () => { + it('throws error when datatype is not PACKED_BIT', () => { const binary = new Binary( new Uint8Array([Binary.VECTOR_TYPE.Int8, 0, 1, 2, 3]), Binary.SUBTYPE_VECTOR ); expect(() => binary.toPackedBits()).to.throw( BSONError, - 'Binary d_type field is not packed bit' + 'Binary datatype field is not packed bit' ); }); }); From a0c63c17c2daca9b1ac90b821563365850f6f575 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 18 Nov 2024 14:27:36 -0500 Subject: [PATCH 11/13] refactor: move swapping to ByteUtils --- src/binary.ts | 28 ++++------------------------ src/utils/byte_utils.ts | 2 ++ src/utils/node_byte_utils.ts | 7 ++++++- src/utils/web_byte_utils.ts | 21 ++++++++++++++++++++- test/node/byte_utils.test.ts | 27 ++++++++++++++++++++++++--- 5 files changed, 56 insertions(+), 29 deletions(-) diff --git a/src/binary.ts b/src/binary.ts index 09bb11e1..1fe09805 100644 --- a/src/binary.ts +++ b/src/binary.ts @@ -364,18 +364,9 @@ export class Binary extends BSONValue { const floatBytes = new Uint8Array( this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position) ); - if (NumberUtils.isBigEndian) { - for (let i = 0; i < floatBytes.byteLength; i += 4) { - const byte0 = floatBytes[i]; - const byte1 = floatBytes[i + 1]; - const byte2 = floatBytes[i + 2]; - const byte3 = floatBytes[i + 3]; - floatBytes[i] = byte3; - floatBytes[i + 1] = byte2; - floatBytes[i + 2] = byte1; - floatBytes[i + 3] = byte0; - } - } + + if (NumberUtils.isBigEndian) ByteUtils.swap32(floatBytes); + return new Float32Array(floatBytes.buffer); } @@ -455,18 +446,7 @@ export class Binary extends BSONValue { const floatBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength); binaryBytes.set(floatBytes, 2); - if (NumberUtils.isBigEndian) { - for (let i = 2; i < binaryBytes.byteLength; i += 4) { - const byte0 = binaryBytes[i]; - const byte1 = binaryBytes[i + 1]; - const byte2 = binaryBytes[i + 2]; - const byte3 = binaryBytes[i + 3]; - binaryBytes[i] = byte3; - binaryBytes[i + 1] = byte2; - binaryBytes[i + 2] = byte1; - binaryBytes[i + 3] = byte0; - } - } + if (NumberUtils.isBigEndian) ByteUtils.swap32(new Uint8Array(binaryBytes.buffer, 2)); return new this(binaryBytes, this.SUBTYPE_VECTOR); } diff --git a/src/utils/byte_utils.ts b/src/utils/byte_utils.ts index f3da53fd..05e30515 100644 --- a/src/utils/byte_utils.ts +++ b/src/utils/byte_utils.ts @@ -39,6 +39,8 @@ export type ByteUtils = { encodeUTF8Into: (destination: Uint8Array, source: string, byteOffset: number) => number; /** Generate a Uint8Array filled with random bytes with byteLength */ randomBytes: (byteLength: number) => Uint8Array; + /** Interprets `buffer` as an array of 32-bit values and swaps the byte order in-place. */ + swap32: (buffer: Uint8Array) => Uint8Array; }; declare const Buffer: { new (): unknown; prototype?: { _isBuffer?: boolean } } | undefined; diff --git a/src/utils/node_byte_utils.ts b/src/utils/node_byte_utils.ts index 7836345f..b47e723d 100644 --- a/src/utils/node_byte_utils.ts +++ b/src/utils/node_byte_utils.ts @@ -9,6 +9,7 @@ type NodeJsBuffer = ArrayBufferView & copy(target: Uint8Array, targetStart: number, sourceStart: number, sourceEnd: number): number; toString: (this: Uint8Array, encoding: NodeJsEncoding, start?: number, end?: number) => string; equals: (this: Uint8Array, other: Uint8Array) => boolean; + swap32: (this: NodeJsBuffer) => NodeJsBuffer; }; type NodeJsBufferConstructor = Omit & { alloc: (size: number) => NodeJsBuffer; @@ -159,5 +160,9 @@ export const nodeJsByteUtils = { return nodeJsByteUtils.toLocalBufferType(buffer).write(source, byteOffset, undefined, 'utf8'); }, - randomBytes: nodejsRandomBytes + randomBytes: nodejsRandomBytes, + + swap32(buffer: Uint8Array): NodeJsBuffer { + return nodeJsByteUtils.toLocalBufferType(buffer).swap32(); + } }; diff --git a/src/utils/web_byte_utils.ts b/src/utils/web_byte_utils.ts index 9d9c343f..336d37ed 100644 --- a/src/utils/web_byte_utils.ts +++ b/src/utils/web_byte_utils.ts @@ -193,5 +193,24 @@ export const webByteUtils = { return bytes.byteLength; }, - randomBytes: webRandomBytes + randomBytes: webRandomBytes, + + swap32(buffer: Uint8Array): Uint8Array { + if (buffer.length % 4 !== 0) { + throw new RangeError('Buffer size must be a multiple of 32-bits'); + } + + for (let i = 0; i < buffer.length; i += 4) { + const byte0 = buffer[i]; + const byte1 = buffer[i + 1]; + const byte2 = buffer[i + 2]; + const byte3 = buffer[i + 3]; + buffer[i] = byte3; + buffer[i + 1] = byte2; + buffer[i + 2] = byte1; + buffer[i + 3] = byte0; + } + + return buffer; + } }; diff --git a/test/node/byte_utils.test.ts b/test/node/byte_utils.test.ts index 67a4721f..df1fed0c 100644 --- a/test/node/byte_utils.test.ts +++ b/test/node/byte_utils.test.ts @@ -12,7 +12,7 @@ import { utf8WebPlatformSpecTests } from './data/utf8_wpt_error_cases'; type ByteUtilTest = { name: string; - inputs: Parameters; + inputs: Parameters | (() => Parameters); expectation: (result: { web: boolean; output: ReturnType | null; @@ -500,6 +500,23 @@ const randomBytesTests: ByteUtilTest<'randomBytes'>[] = [ } } ]; +const swap32Tests: ByteUtilTest<'swap32'>[] = [ + { + name: 'swaps byte order in-place', + inputs: () => [Buffer.from([1, 2, 3, 4, 5, 6, 7, 8])], + expectation({ output, error }) { + expect(error).to.be.null; + expect(output).to.deep.equal(Buffer.from([4, 3, 2, 1, 8, 7, 6, 5])); + } + }, + { + name: 'throws if buffer is not a multiple of 4 bytes', + inputs: [Buffer.from([1, 2, 3])], + expectation({ error }) { + expect(error).to.be.instanceOf(RangeError); + } + } +]; const utils = new Map([ ['nodeJsByteUtils', nodeJsByteUtils], @@ -520,7 +537,8 @@ const table = new Map[]>([ ['encodeUTF8Into', fromUTF8Tests], ['toUTF8', toUTF8Tests], ['utf8ByteLength', utf8ByteLengthTests], - ['randomBytes', randomBytesTests] + ['randomBytes', randomBytesTests], + ['swap32', swap32Tests] ]); describe('ByteUtils', () => { @@ -790,7 +808,10 @@ describe('ByteUtils', () => { let error = null; try { - output = byteUtils[utility].call(null, ...test.inputs); + output = byteUtils[utility].call( + null, + ...(typeof test.inputs === 'function' ? test.inputs() : test.inputs) + ); } catch (thrownError) { error = thrownError; } From e78955416929a85bce9f83c37adfb0ab22ba1aee Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 18 Nov 2024 17:46:54 -0500 Subject: [PATCH 12/13] chore: move script out of .evg --- {.evergreen => etc}/run-big-endian-test.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {.evergreen => etc}/run-big-endian-test.sh (100%) diff --git a/.evergreen/run-big-endian-test.sh b/etc/run-big-endian-test.sh similarity index 100% rename from .evergreen/run-big-endian-test.sh rename to etc/run-big-endian-test.sh From 8c9fb3a31896984cef1b725d558f102595004f86 Mon Sep 17 00:00:00 2001 From: Neal Beeken Date: Mon, 18 Nov 2024 17:52:19 -0500 Subject: [PATCH 13/13] chore: move dockerfile --- {.github/docker => etc}/Dockerfile | 0 etc/run-big-endian-test.sh | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename {.github/docker => etc}/Dockerfile (100%) diff --git a/.github/docker/Dockerfile b/etc/Dockerfile similarity index 100% rename from .github/docker/Dockerfile rename to etc/Dockerfile diff --git a/etc/run-big-endian-test.sh b/etc/run-big-endian-test.sh index be4fbfab..775cc6ee 100644 --- a/etc/run-big-endian-test.sh +++ b/etc/run-big-endian-test.sh @@ -17,6 +17,6 @@ docker buildx build \ --progress=plain \ --platform linux/s390x \ --build-arg="NODE_ARCH=s390x" \ - -f ./.github/docker/Dockerfile \ + -f ./etc/Dockerfile \ --output type=local,dest=./docs,platform-split=false \ .