diff --git a/test/node/bson_binary_vector.spec.test.ts b/test/node/bson_binary_vector.spec.test.ts new file mode 100644 index 00000000..aa40fb36 --- /dev/null +++ b/test/node/bson_binary_vector.spec.test.ts @@ -0,0 +1,253 @@ +import * as fs from 'fs'; +import * as path from 'path'; +import { BSON, BSONError, Binary } from '../register-bson'; +import { expect } from 'chai'; + +const { toHex, fromHex } = BSON.onDemand.ByteUtils; + +const FLOAT = new Float64Array(1); +const FLOAT_BYTES = new Uint8Array(FLOAT.buffer, 0, 8); + +FLOAT[0] = -1; +// Little endian [0, 0, 0, 0, 0, 0, 240, 191] +// Big endian [191, 240, 0, 0, 0, 0, 0, 0] +const isBigEndian = FLOAT_BYTES[7] === 0; + +type VectorHexType = '0x03' | '0x27' | '0x10'; +type VectorTest = { + description: string; + vector: (number | string)[]; + valid: boolean; + dtype_hex: VectorHexType; + padding?: number; + canonical_bson?: string; +}; +type VectorSuite = { description: string; test_key: string; tests: VectorTest[] }; + +function validateVector(vector: Binary): void { + const VECTOR_TYPE = Object.freeze({ + Int8: 0x03, + Float32: 0x27, + PackedBit: 0x10 + } as const); + + if (vector.sub_type !== 9) return; + + const size = vector.position; + const d_type = vector.buffer[0] ?? 0; + const padding = vector.buffer[1] ?? 0; + + if ((d_type === VECTOR_TYPE.Float32 || d_type === VECTOR_TYPE.Int8) && padding !== 0) { + throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors'); + } + + if (d_type === VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) { + throw new BSONError( + 'Invalid Vector: padding must be zero for packed bit vectors that are empty' + ); + } + + if (d_type === VECTOR_TYPE.PackedBit && padding > 7) { + throw new BSONError( + `Invalid Vector: padding must be a value between 0 and 7. found: ${padding}` + ); + } +} + +function fixFloats(f: string | number): number { + if (typeof f === 'number') { + return f; + } + if (f === 'inf') { + return Infinity; + } + if (f === '-inf') { + return -Infinity; + } + throw new Error(`test format error: unknown float value: ${f}`); +} + +function fixInt8s(f: number | string): number { + if (typeof f !== 'number') throw new Error('test format error: unexpected test data'); + + if (f < -128 || f > 127) { + // Javascript Int8Array only supports values from -128 to 127 + throw new Error(`unsupported_error: int8 out of range: ${f}`); + } + return f; +} + +function fixBits(f: number | string): number { + if (typeof f !== 'number') throw new Error('test format error: unexpected test data'); + + if (f > 255 || f < 0 || !Number.isSafeInteger(f)) { + // Javascript Uint8Array only supports values from 0 to 255 + throw new Error(`unsupported_error: bit out of range: ${f}`); + } + return f; +} + +function make(vector: (number | string)[], dtype_hex: VectorHexType, padding?: number): Binary { + let binary: Binary; + switch (dtype_hex) { + case '0x10': /* packed_bit */ + case '0x03': /* int8 */ { + const array = new Int8Array(vector.map(dtype_hex === '0x03' /* int8 */ ? fixInt8s : fixBits)); + const buffer = new Uint8Array(array.byteLength + 2); + buffer.set(new Uint8Array(array.buffer), 2); + binary = new Binary(buffer, 9); + break; + } + + case '0x27': /* float32 */ { + const array = new Float32Array(vector.map(fixFloats)); + const buffer = new Uint8Array(array.byteLength + 2); + if (isBigEndian) { + for (let i = 0; i < array.length; i++) { + const bytes = new Uint8Array(array.buffer, i * 4, 4); + bytes.reverse(); + buffer.set(bytes, i * 4 + 2); + } + } else { + buffer.set(new Uint8Array(array.buffer), 2); + } + binary = new Binary(buffer, 9); + break; + } + + default: + throw new Error(`Unknown dtype_hex: ${dtype_hex}`); + } + + binary.buffer[0] = +dtype_hex; + binary.buffer[1] = padding ?? 0; + + return binary; +} + +const invalidTestExpectedError = new Map() + .set('FLOAT32 with padding', 'Invalid Vector: padding must be zero for int8 and float32 vectors') + .set('INT8 with padding', 'Invalid Vector: padding must be zero for int8 and float32 vectors') + .set( + 'Padding specified with no vector data PACKED_BIT', + 'Invalid Vector: padding must be zero for packed bit vectors that are empty' + ) + .set( + 'Padding specified with no vector data PACKED_BIT', + 'Invalid Vector: padding must be zero for packed bit vectors that are empty' + ) + .set( + 'Exceeding maximum padding PACKED_BIT', + 'Invalid Vector: padding must be a value between 0 and 7' + ) + .set('Negative padding PACKED_BIT', 'Invalid Vector: padding must be a value between 0 and 7') + // skipped + .set('Overflow Vector PACKED_BIT', false) + .set('Underflow Vector PACKED_BIT', false) + .set('Overflow Vector INT8', false) + .set('Underflow Vector INT8', false) + .set('INT8 with float inputs', false) + // duplicate test! but also skipped. + .set('Vector with float values PACKED_BIT', false) + .set('Vector with float values PACKED_BIT', false); + +describe('BSON Binary Vector spec tests', () => { + const tests: Record = Object.create(null); + + for (const file of fs.readdirSync(path.join(__dirname, 'specs/bson-binary-vector'))) { + tests[path.basename(file, '.json')] = JSON.parse( + fs.readFileSync(path.join(__dirname, 'specs/bson-binary-vector', file), 'utf8') + ); + } + + for (const [suiteName, suite] of Object.entries(tests)) { + describe(suiteName, function () { + const valid = suite.tests.filter(t => t.valid); + const invalid = suite.tests.filter(t => !t.valid); + describe('valid', function () { + /** + * 1. encode a document from the numeric values, dtype, and padding, along with the "test_key", and assert this matches the canonical_bson string. + * 2. decode the canonical_bson into its binary form, and then assert that the numeric values, dtype, and padding all match those provided in the JSON. + * + * > Note: For floating point number types, exact numerical matches may not be possible. + * > Drivers that natively support the floating-point type being tested (e.g., when testing float32 vector values in a driver that natively supports float32), + * > MUST assert that the input float array is the same after encoding and decoding. + */ + for (const test of valid) { + it(`encode ${test.description}`, function () { + const bin = make(test.vector, test.dtype_hex, test.padding); + + const buffer = BSON.serialize({ [suite.test_key]: bin }); + expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase()); + }); + + it(`decode ${test.description}`, function () { + const canonical_bson = fromHex(test.canonical_bson!.toLowerCase()); + const doc = BSON.deserialize(canonical_bson); + + expect(doc[suite.test_key].sub_type).to.equal(0x09); + expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex); + expect(doc[suite.test_key].buffer[1]).to.equal(test.padding); + }); + } + }); + + describe('invalid', function () { + /** + * To prove correct in an invalid case (valid:false), + * one MUST raise an exception when attempting to encode + * a document from the numeric values, dtype, and padding. + */ + for (const test of invalid) { + const expectedErrorMessage = invalidTestExpectedError.get(test.description); + + it(`bson: ${test.description}`, function () { + let thrownError: Error | undefined; + try { + const bin = make(test.vector, test.dtype_hex, test.padding); + BSON.serialize({ bin }); + // TODO(NODE-6537): The following validation MUST be a part of serialize + validateVector(bin); + } catch (error) { + thrownError = error; + } + + if (thrownError?.message.startsWith('unsupported_error')) { + expect( + expectedErrorMessage, + 'We expect a certain error message but got an unsupported error' + ).to.be.false; + this.skip(); + } + + expect(thrownError).to.be.instanceOf(BSONError); + expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage)); + }); + + it(`extended json: ${test.description}`, function () { + let thrownError: Error | undefined; + try { + const bin = make(test.vector, test.dtype_hex, test.padding); + BSON.EJSON.stringify({ bin }); + // TODO(NODE-6537): The following validation MUST be a part of stringify + validateVector(bin); + } catch (error) { + thrownError = error; + } + + if (thrownError?.message.startsWith('unsupported_error')) { + expect( + expectedErrorMessage, + 'We expect a certain error message but got an unsupported error' + ).to.be.false; + this.skip(); + } + + expect(thrownError).to.be.instanceOf(BSONError); + expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage)); + }); + } + }); + }); + } +}); diff --git a/test/node/specs/bson-binary-vector/float32.json b/test/node/specs/bson-binary-vector/float32.json new file mode 100644 index 00000000..872c4353 --- /dev/null +++ b/test/node/specs/bson-binary-vector/float32.json @@ -0,0 +1,51 @@ +{ + "description": "Tests of Binary subtype 9, Vectors, with dtype FLOAT32", + "test_key": "vector", + "tests": [ + { + "description": "Simple Vector FLOAT32", + "valid": true, + "vector": [127.0, 7.0], + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "padding": 0, + "canonical_bson": "1C00000005766563746F72000A0000000927000000FE420000E04000" + }, + { + "description": "Vector with decimals and negative value FLOAT32", + "valid": true, + "vector": [127.7, -7.7], + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "padding": 0, + "canonical_bson": "1C00000005766563746F72000A0000000927006666FF426666F6C000" + }, + { + "description": "Empty Vector FLOAT32", + "valid": true, + "vector": [], + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "padding": 0, + "canonical_bson": "1400000005766563746F72000200000009270000" + }, + { + "description": "Infinity Vector FLOAT32", + "valid": true, + "vector": ["-inf", 0.0, "inf"], + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "padding": 0, + "canonical_bson": "2000000005766563746F72000E000000092700000080FF000000000000807F00" + }, + { + "description": "FLOAT32 with padding", + "valid": false, + "vector": [127.0, 7.0], + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "padding": 3 + } + ] +} + diff --git a/test/node/specs/bson-binary-vector/int8.json b/test/node/specs/bson-binary-vector/int8.json new file mode 100644 index 00000000..7529721e --- /dev/null +++ b/test/node/specs/bson-binary-vector/int8.json @@ -0,0 +1,57 @@ +{ + "description": "Tests of Binary subtype 9, Vectors, with dtype INT8", + "test_key": "vector", + "tests": [ + { + "description": "Simple Vector INT8", + "valid": true, + "vector": [127, 7], + "dtype_hex": "0x03", + "dtype_alias": "INT8", + "padding": 0, + "canonical_bson": "1600000005766563746F7200040000000903007F0700" + }, + { + "description": "Empty Vector INT8", + "valid": true, + "vector": [], + "dtype_hex": "0x03", + "dtype_alias": "INT8", + "padding": 0, + "canonical_bson": "1400000005766563746F72000200000009030000" + }, + { + "description": "Overflow Vector INT8", + "valid": false, + "vector": [128], + "dtype_hex": "0x03", + "dtype_alias": "INT8", + "padding": 0 + }, + { + "description": "Underflow Vector INT8", + "valid": false, + "vector": [-129], + "dtype_hex": "0x03", + "dtype_alias": "INT8", + "padding": 0 + }, + { + "description": "INT8 with padding", + "valid": false, + "vector": [127, 7], + "dtype_hex": "0x03", + "dtype_alias": "INT8", + "padding": 3 + }, + { + "description": "INT8 with float inputs", + "valid": false, + "vector": [127.77, 7.77], + "dtype_hex": "0x03", + "dtype_alias": "INT8", + "padding": 0 + } + ] +} + diff --git a/test/node/specs/bson-binary-vector/packed_bit.json b/test/node/specs/bson-binary-vector/packed_bit.json new file mode 100644 index 00000000..035776e8 --- /dev/null +++ b/test/node/specs/bson-binary-vector/packed_bit.json @@ -0,0 +1,98 @@ +{ + "description": "Tests of Binary subtype 9, Vectors, with dtype PACKED_BIT", + "test_key": "vector", + "tests": [ + { + "description": "Padding specified with no vector data PACKED_BIT", + "valid": false, + "vector": [], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 1 + }, + { + "description": "Simple Vector PACKED_BIT", + "valid": true, + "vector": [127, 7], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 0, + "canonical_bson": "1600000005766563746F7200040000000910007F0700" + }, + { + "description": "Empty Vector PACKED_BIT", + "valid": true, + "vector": [], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 0, + "canonical_bson": "1400000005766563746F72000200000009100000" + }, + { + "description": "PACKED_BIT with padding", + "valid": true, + "vector": [127, 7], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 3, + "canonical_bson": "1600000005766563746F7200040000000910037F0700" + }, + { + "description": "Overflow Vector PACKED_BIT", + "valid": false, + "vector": [256], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 0 + }, + { + "description": "Underflow Vector PACKED_BIT", + "valid": false, + "vector": [-1], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 0 + }, + { + "description": "Vector with float values PACKED_BIT", + "valid": false, + "vector": [127.5], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 0 + }, + { + "description": "Padding specified with no vector data PACKED_BIT", + "valid": false, + "vector": [], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 1 + }, + { + "description": "Exceeding maximum padding PACKED_BIT", + "valid": false, + "vector": [1], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 8 + }, + { + "description": "Negative padding PACKED_BIT", + "valid": false, + "vector": [1], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": -1 + }, + { + "description": "Vector with float values PACKED_BIT", + "valid": false, + "vector": [127.5], + "dtype_hex": "0x10", + "dtype_alias": "PACKED_BIT", + "padding": 0 + } + ] +} + diff --git a/test/node/specs/bson-corpus/binary.json b/test/node/specs/bson-corpus/binary.json index 20aaef74..0e0056f3 100644 --- a/test/node/specs/bson-corpus/binary.json +++ b/test/node/specs/bson-corpus/binary.json @@ -74,6 +74,36 @@ "description": "$type query operator (conflicts with legacy $binary form with $type field)", "canonical_bson": "180000000378001000000010247479706500020000000000", "canonical_extjson": "{\"x\" : { \"$type\" : {\"$numberInt\": \"2\"}}}" + }, + { + "description": "subtype 0x09 Vector FLOAT32", + "canonical_bson": "170000000578000A0000000927000000FE420000E04000", + "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"JwAAAP5CAADgQA==\", \"subType\": \"09\"}}}" + }, + { + "description": "subtype 0x09 Vector INT8", + "canonical_bson": "11000000057800040000000903007F0700", + "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"AwB/Bw==\", \"subType\": \"09\"}}}" + }, + { + "description": "subtype 0x09 Vector PACKED_BIT", + "canonical_bson": "11000000057800040000000910007F0700", + "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"EAB/Bw==\", \"subType\": \"09\"}}}" + }, + { + "description": "subtype 0x09 Vector (Zero-length) FLOAT32", + "canonical_bson": "0F0000000578000200000009270000", + "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"JwA=\", \"subType\": \"09\"}}}" + }, + { + "description": "subtype 0x09 Vector (Zero-length) INT8", + "canonical_bson": "0F0000000578000200000009030000", + "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"AwA=\", \"subType\": \"09\"}}}" + }, + { + "description": "subtype 0x09 Vector (Zero-length) PACKED_BIT", + "canonical_bson": "0F0000000578000200000009100000", + "canonical_extjson": "{\"x\": {\"$binary\": {\"base64\": \"EAA=\", \"subType\": \"09\"}}}" } ], "decodeErrors": [