Skip to content

Commit

Permalink
fix schema handling in BatchAggregators
Browse files Browse the repository at this point in the history
  • Loading branch information
ibgreen committed Oct 17, 2024
1 parent 8da8e0e commit 5c14a22
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 27 deletions.
2 changes: 1 addition & 1 deletion modules/csv/test/csv-arrow-loader.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ test('CSVArrowLoader#loadInBatches(incidents.csv)', async (t) => {
// t.comment(`BATCH: ${batch.length}`);
batchCount++;
}
t.equal(batchCount, 5, 'Correct number of batches received');
t.equal(batchCount, 1, 'Correct number of batches received');

t.end();
});
1 change: 1 addition & 0 deletions modules/csv/test/csv-loader.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ test('CSVLoader#loadInBatches(sample.csv, object-rows)', async (t) => {
// `BATCH ${batch.count}: ${batch.length} ${JSON.stringify(batch.data).slice(0, 200)}`
// );
t.equal(batch.length, 2, 'Got correct batch size');
debugger
t.deepEqual(batch.data[0], {column1: 'A', column2: 'B', column3: 1}, 'Got correct first row');
}
batchCount++;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

import type {Schema, ColumnarTableBatch, ArrowTableBatch} from '@loaders.gl/schema';
import type {Schema, ColumnarTableBatch, ArrowTableBatch, TypedArray} from '@loaders.gl/schema';
import {getArrayTypeFromDataType} from '../../schema/data-type';
import {TableBatchAggregator} from './table-batch-aggregator';

type ColumnarTableBatchOptions = {};

const DEFAULT_ROW_COUNT = 100;
Expand All @@ -13,7 +13,7 @@ export class ColumnarTableBatchAggregator implements TableBatchAggregator {
schema: Schema;
length: number = 0;
allocated: number = 0;
columns: {[columnName: string]: any[]} = {};
columns: Record<string, TypedArray | Array<any>> = {};

constructor(schema: Schema, options: ColumnarTableBatchOptions) {
this.schema = schema;
Expand Down Expand Up @@ -46,24 +46,11 @@ export class ColumnarTableBatchAggregator implements TableBatchAggregator {

getBatch(): ColumnarTableBatch | ArrowTableBatch | null {
this._pruneColumns();
const columns = Array.isArray(this.schema) ? this.columns : {};

// schema is an array if there're no headers
// object if there are headers
// columns should match schema format
if (!Array.isArray(this.schema)) {
for (const fieldName in this.schema) {
const field = this.schema[fieldName];
columns[field.name] = this.columns[field.index];
}
}

this.columns = {};

const batch: ColumnarTableBatch = {
shape: 'columnar-table',
batchType: 'data',
data: columns,
data: this.columns,
schema: this.schema,
length: this.length
};
Expand All @@ -82,23 +69,22 @@ export class ColumnarTableBatchAggregator implements TableBatchAggregator {
this.allocated = this.allocated > 0 ? (this.allocated *= 2) : DEFAULT_ROW_COUNT;
this.columns = {};

for (const fieldName in this.schema) {
const field = this.schema[fieldName];
const ArrayType = field.type || Float32Array;
const oldColumn = this.columns[field.index];
for (const field of this.schema.fields) {
const ArrayType = getArrayTypeFromDataType(field.type, field.nullable);
const oldColumn = this.columns[field.name];

if (oldColumn && ArrayBuffer.isView(oldColumn)) {
// Copy the old data to the new array
const typedArray = new ArrayType(this.allocated);
typedArray.set(oldColumn);

Check failure on line 79 in modules/schema-utils/src/lib/table/batch-builder/columnar-table-batch-aggregator.ts

View workflow job for this annotation

GitHub Actions / test

Property 'set' does not exist on type 'any[] | Int8Array | Uint8Array | Int16Array | Uint16Array | Int32Array | Uint32Array | Float32Array | Float64Array'.
this.columns[field.index] = typedArray;
this.columns[field.name] = typedArray;
} else if (oldColumn) {
// Plain array
oldColumn.length = this.allocated;
this.columns[field.index] = oldColumn;
this.columns[field.name] = oldColumn;
} else {
// Create new
this.columns[field.index] = new ArrayType(this.allocated);
this.columns[field.name] = new ArrayType(this.allocated);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ export class RowTableBatchAggregator implements TableBatchAggregator {
// object if there are headers
if (schema) {
this._headers = [];
for (const key in schema) {
this._headers[schema[key].index] = schema[key].name;
for (let i = 0; i < schema.fields.length; i++) {
this._headers[i] = schema.fields[i].name;
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import type {
import {convertTable} from './convert-table';
import {convertArrowToSchema} from '../../schema/convert-arrow-schema';
import {makeArrowRecordBatchIterator} from '../batches/make-arrow-batch-iterator';

/**
* * Convert a loaders.gl Table to an Apache Arrow Table
* @param mesh
Expand Down

0 comments on commit 5c14a22

Please sign in to comment.