From 2622ff14fb6dd28c90ec02eab2f653ac0bcd9876 Mon Sep 17 00:00:00 2001 From: Jayson Jacobs Date: Thu, 18 Jan 2024 20:46:24 -0700 Subject: [PATCH] allow typeLength to come from opts.column when decoding FIXED_LEN_BYTE_ARRAY (#108) Problem ======= typeLength is present in column options but decoding is throwing an error. `thrown: "missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)"` options object for reference: ``` { type: 'FIXED_LEN_BYTE_ARRAY', rLevelMax: 0, dLevelMax: 1, compression: 'SNAPPY', column: { name: 'BLOCK_NUMBER', primitiveType: 'FIXED_LEN_BYTE_ARRAY', originalType: 'DECIMAL', path: [ 'BLOCK_NUMBER' ], repetitionType: 'OPTIONAL', encoding: 'PLAIN', statistics: undefined, compression: 'UNCOMPRESSED', precision: 38, scale: 0, typeLength: 16, rLevelMax: 0, dLevelMax: 1 }, num_values: { buffer: , offset: 0 } } ``` using `parquet-tools schema` here is the schema for this column: ``` optional fixed_len_byte_array(16) BLOCK_NUMBER (DECIMAL(38,0)) ``` The parquet file is a direct export from snowflake and the data type of the column is `NUMBER(38,0)`. Solution ======== I traced through the code to find where the decode was erroring and added the ability to take the `typeLength` from `column` in the column options when it is not present at the top level. Change summary: --------------- see above Steps to Verify: ---------------- decode a parquet file with this type of field. --------- Co-authored-by: Wil Wade Co-authored-by: Wil Wade --- lib/codec/plain.ts | 9 +++++---- test/reference-test/read-all.test.ts | 2 -- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/codec/plain.ts b/lib/codec/plain.ts index cc191a17..22871e08 100644 --- a/lib/codec/plain.ts +++ b/lib/codec/plain.ts @@ -264,16 +264,17 @@ function decodeValues_FIXED_LEN_BYTE_ARRAY( opts: Options ) { let values = []; - - if (!opts.typeLength) { + const typeLength = + opts.typeLength ?? (opts.column ? opts.column.typeLength : undefined); + if (!typeLength) { throw "missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY)"; } for (let i = 0; i < count; ++i) { values.push( - cursor.buffer.slice(cursor.offset, cursor.offset + opts.typeLength) + cursor.buffer.slice(cursor.offset, cursor.offset + typeLength) ); - cursor.offset += opts.typeLength; + cursor.offset += typeLength; } return values; diff --git a/test/reference-test/read-all.test.ts b/test/reference-test/read-all.test.ts index ee33f46f..492e4055 100644 --- a/test/reference-test/read-all.test.ts +++ b/test/reference-test/read-all.test.ts @@ -24,8 +24,6 @@ const unsupported = [ 'delta_encoding_optional_column.parquet', // DELTA_BINARY_PACKED unsupported 'delta_encoding_required_column.parquet', // DELTA_BINARY_PACKED unsupported 'delta_length_byte_array.parquet', // ZSTD unsupported, DELTA_BINARY_PACKED unsupported - 'float16_nonzeros_and_nans.parquet', // missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY) - 'float16_zeros_and_nans.parquet', // missing option: typeLength (required for FIXED_LEN_BYTE_ARRAY) 'large_string_map.brotli.parquet', // BUG? ];