From 7270a9c306584f68754937e245385ac5dfc36e99 Mon Sep 17 00:00:00 2001 From: Ib Green Date: Sun, 24 Sep 2023 20:39:24 -0400 Subject: [PATCH] lnt --- modules/arrow/src/arrow-loader.ts | 2 +- modules/arrow/src/index.ts | 7 ++++- modules/arrow/src/lib/arrow-table-batch.ts | 4 +-- modules/arrow/src/lib/arrow-table.ts | 26 +++++++++++++++++++ modules/arrow/src/lib/convert-table.ts | 3 ++- .../arrow/src/lib/parse-arrow-in-batches.ts | 2 +- modules/arrow/src/lib/parse-arrow-sync.ts | 3 ++- modules/schema/package.json | 3 +-- .../lib/table/arrow-api/arrow-like-table.ts | 3 ++- .../lib/table/simple-table/table-accessors.ts | 24 ++++++++++------- modules/schema/src/types/category-table.ts | 14 +++++----- modules/zip/package.json | 3 ++- 12 files changed, 66 insertions(+), 28 deletions(-) create mode 100644 modules/arrow/src/lib/arrow-table.ts diff --git a/modules/arrow/src/arrow-loader.ts b/modules/arrow/src/arrow-loader.ts index 6821e82394..9d349730da 100644 --- a/modules/arrow/src/arrow-loader.ts +++ b/modules/arrow/src/arrow-loader.ts @@ -1,6 +1,6 @@ // loaders.gl, MIT license import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils'; -import type {ArrowTable} from '@loaders.gl/schema'; +import type {ArrowTable} from './lib/arrow-table'; // __VERSION__ is injected by babel-plugin-version-inline // @ts-ignore TS2304: Cannot find name '__VERSION__'. diff --git a/modules/arrow/src/index.ts b/modules/arrow/src/index.ts index 0c90ddc69f..a640b1cba5 100644 --- a/modules/arrow/src/index.ts +++ b/modules/arrow/src/index.ts @@ -1,6 +1,10 @@ +// loaders.gl, MIT + import type {LoaderWithParser} from '@loaders.gl/loader-utils'; import type {ArrowLoaderOptions} from './arrow-loader'; -import {ArrowTable, ArrowTableBatch, ColumnarTable, ObjectRowTable} from '@loaders.gl/schema'; +import type {ArrowTableBatch, ColumnarTable, ObjectRowTable} from '@loaders.gl/schema'; +import type {ArrowTable} from './lib/arrow-table'; + import {TableBatchBuilder} from '@loaders.gl/schema'; import {ArrowLoader as ArrowWorkerLoader} from './arrow-loader'; import parseSync from './lib/parse-arrow-sync'; @@ -12,6 +16,7 @@ import {ArrowTableBatchAggregator} from './lib/arrow-table-batch'; TableBatchBuilder.ArrowBatch = ArrowTableBatchAggregator; // Types +export type {ArrowTable, ArrowTableBatch} from './lib/arrow-table'; export {VECTOR_TYPES} from './types'; // Arrow writer diff --git a/modules/arrow/src/lib/arrow-table-batch.ts b/modules/arrow/src/lib/arrow-table-batch.ts index f7bb20621b..9126b73996 100644 --- a/modules/arrow/src/lib/arrow-table-batch.ts +++ b/modules/arrow/src/lib/arrow-table-batch.ts @@ -1,4 +1,5 @@ -import type {ArrowTableBatch} from '@loaders.gl/schema'; +import {ColumnarTableBatchAggregator} from '@loaders.gl/schema'; +import type {ArrowTableBatch} from './arrow-table'; import { Table as ApacheArrowTable, Schema, @@ -10,7 +11,6 @@ import { Vector, Float32 } from 'apache-arrow'; -import {ColumnarTableBatchAggregator} from '@loaders.gl/schema'; export class ArrowTableBatchAggregator extends ColumnarTableBatchAggregator { arrowSchema: Schema | null; diff --git a/modules/arrow/src/lib/arrow-table.ts b/modules/arrow/src/lib/arrow-table.ts new file mode 100644 index 0000000000..eb0a9a79a3 --- /dev/null +++ b/modules/arrow/src/lib/arrow-table.ts @@ -0,0 +1,26 @@ +// loaders.gl, MIT license + +import type {Batch, Schema} from '@loaders.gl/schema'; +import type {Table as ApacheArrowTable} from 'apache-arrow'; + +/** + * A table organized as an Apache Arrow table + * @note This is a variant of the type from loaders.gl/schema + */ +export type ArrowTable = { + shape: 'arrow-table'; + schema?: Schema; + data: ApacheArrowTable; +}; + +/** + * Batch for a table organized as an Apache Arrow table + * @note This is a variant of the type from loaders.gl/schema + */ +export type ArrowTableBatch = Batch & { + shape: 'arrow-table'; + schemaType?: 'explicit' | 'deduced'; + schema?: Schema; + data: ApacheArrowTable; // ApacheRecordBatch; + length: number; +}; diff --git a/modules/arrow/src/lib/convert-table.ts b/modules/arrow/src/lib/convert-table.ts index bc0edef531..5e933d8e63 100644 --- a/modules/arrow/src/lib/convert-table.ts +++ b/modules/arrow/src/lib/convert-table.ts @@ -1,6 +1,7 @@ // loaders.gl, MIT license -import type {ColumnarTable, ObjectRowTable, ArrowTable} from '@loaders.gl/schema'; +import type {ColumnarTable, ObjectRowTable} from '@loaders.gl/schema'; import type {Table as ApacheArrowTable} from 'apache-arrow'; +import type {ArrowTable} from './arrow-table'; /** * Wrap an apache arrow table in a loaders.gl table wrapper. diff --git a/modules/arrow/src/lib/parse-arrow-in-batches.ts b/modules/arrow/src/lib/parse-arrow-in-batches.ts index 8d8235c979..4a0d0526ee 100644 --- a/modules/arrow/src/lib/parse-arrow-in-batches.ts +++ b/modules/arrow/src/lib/parse-arrow-in-batches.ts @@ -1,5 +1,5 @@ // TODO - this import defeats the sophisticated typescript checking in ArrowJS -import {ArrowTableBatch} from '@loaders.gl/schema'; +import type {ArrowTableBatch} from './arrow-table'; import {RecordBatchReader, Table as ApacheArrowTable} from 'apache-arrow'; // import {isIterable} from '@loaders.gl/core'; diff --git a/modules/arrow/src/lib/parse-arrow-sync.ts b/modules/arrow/src/lib/parse-arrow-sync.ts index be33241473..1ede98bbe7 100644 --- a/modules/arrow/src/lib/parse-arrow-sync.ts +++ b/modules/arrow/src/lib/parse-arrow-sync.ts @@ -1,4 +1,5 @@ -import type {ArrowTable, ColumnarTable, ObjectRowTable} from '@loaders.gl/schema'; +import type {ColumnarTable, ObjectRowTable} from '@loaders.gl/schema'; +import type {ArrowTable} from './arrow-table'; import {convertTable} from '@loaders.gl/schema'; import {tableFromIPC} from 'apache-arrow'; import type {ArrowLoaderOptions} from '../arrow-loader'; diff --git a/modules/schema/package.json b/modules/schema/package.json index b3bbafb146..64ff7ea139 100644 --- a/modules/schema/package.json +++ b/modules/schema/package.json @@ -32,8 +32,7 @@ "build-bundle": "esbuild src/bundle.ts --bundle --outfile=dist/dist.min.js" }, "dependencies": { - "@types/geojson": "^7946.0.7", - "apache-arrow": "13.0.0" + "@types/geojson": "^7946.0.7" }, "gitHead": "c95a4ff72512668a93d9041ce8636bac09333fd5" } diff --git a/modules/schema/src/lib/table/arrow-api/arrow-like-table.ts b/modules/schema/src/lib/table/arrow-api/arrow-like-table.ts index b7c761f1ac..39cc0e3771 100644 --- a/modules/schema/src/lib/table/arrow-api/arrow-like-table.ts +++ b/modules/schema/src/lib/table/arrow-api/arrow-like-table.ts @@ -34,7 +34,8 @@ class ArrowLikeVector { toArray(): ArrayLike { switch (this.table.shape) { case 'arrow-table': - return this.table.data.getChild(this.columnName)?.toArray(); + const arrowTable = this.table.data as any; + return arrowTable.getChild(this.columnName)?.toArray(); case 'columnar-table': return this.table.data[this.columnName]; default: diff --git a/modules/schema/src/lib/table/simple-table/table-accessors.ts b/modules/schema/src/lib/table/simple-table/table-accessors.ts index b7da53b51d..380391bc00 100644 --- a/modules/schema/src/lib/table/simple-table/table-accessors.ts +++ b/modules/schema/src/lib/table/simple-table/table-accessors.ts @@ -17,7 +17,8 @@ export function getTableLength(table: Table): number { return table.features.length; case 'arrow-table': - return table.data.numRows; + const arrowTable = table.data as any; + return arrowTable.numRows; case 'columnar-table': for (const column of Object.values(table.data)) { @@ -50,7 +51,8 @@ export function getTableNumCols(table: Table): number { case 'columnar-table': return Object.keys(table.data).length; case 'arrow-table': - return table.data.numCols; + const arrowTable = table.data as any; + return arrowTable.numCols; default: throw new Error('table'); } @@ -74,10 +76,11 @@ export function getTableCell(table: Table, rowIndex: number, columnName: string) return column[rowIndex]; case 'arrow-table': - const arrowColumnIndex = table.data.schema.fields.findIndex( + const arrowTable = table.data as any; + const arrowColumnIndex = arrowTable.schema.fields.findIndex( (field) => field.name === columnName ); - return table.data.getChildAt(arrowColumnIndex)?.get(rowIndex); + return arrowTable.getChildAt(arrowColumnIndex)?.get(rowIndex); default: throw new Error('todo'); @@ -104,7 +107,8 @@ export function getTableCellAt(table: Table, rowIndex: number, columnIndex: numb return column[rowIndex]; case 'arrow-table': - return table.data.getChildAt(columnIndex)?.get(rowIndex); + const arrowTable = table.data as any; + return arrowTable.getChildAt(columnIndex)?.get(rowIndex); default: throw new Error('todo'); @@ -201,9 +205,10 @@ export function getTableRowAsObject( } case 'arrow-table': + const arrowTable = table.data as any; const objectRow: {[columnName: string]: unknown} = target || {}; - const row = table.data.get(rowIndex); - const schema = table.data.schema; + const row = arrowTable.get(rowIndex); + const schema = arrowTable.schema; for (let i = 0; i < schema.fields.length; i++) { objectRow[schema.fields[i].name] = row?.[schema.fields[i].name]; } @@ -272,9 +277,10 @@ export function getTableRowAsArray( } case 'arrow-table': + const arrowTable = table.data as any; const arrayRow: unknown[] = target || []; - const row = table.data.get(rowIndex); - const schema = table.data.schema; + const row = arrowTable.get(rowIndex); + const schema = arrowTable.schema; for (let i = 0; i < schema.fields.length; i++) { arrayRow[i] = row?.[schema.fields[i].name]; } diff --git a/modules/schema/src/types/category-table.ts b/modules/schema/src/types/category-table.ts index 410bc8e2f8..d5d72c2e69 100644 --- a/modules/schema/src/types/category-table.ts +++ b/modules/schema/src/types/category-table.ts @@ -1,15 +1,14 @@ // loaders.gl, MIT license -import type {Table as ApacheArrowTable} from 'apache-arrow'; import type {Schema} from './schema'; import type {Batch} from './batch'; import type {Feature} from './category-gis'; -// Idea was to just import types, but it seems -// Seems this triggers more bundling and build issues than it is worth... -// import type {Table as ApacheArrowTable, RecordBatch} from 'apache-arrow'; -// type ApacheArrowTable = any; -// type RecordBatch = any; +// Avoid a big dependency, apparently even a type import can pull in a lot of code +// import type {Table as ApacheArrowTable} from 'apache-arrow'; + +type ApacheArrowTable = unknown; +type ApacheRecordBatch = unknown; /** A general table */ export type Table = @@ -122,7 +121,6 @@ export type ArrowTableBatch = Batch & { shape: 'arrow-table'; schemaType?: 'explicit' | 'deduced'; schema?: Schema; - data: ApacheArrowTable; - // recordBatch: RecordBatch; + data: ApacheRecordBatch; length: number; }; diff --git a/modules/zip/package.json b/modules/zip/package.json index 5f09043f18..b85e3d5136 100644 --- a/modules/zip/package.json +++ b/modules/zip/package.json @@ -32,7 +32,8 @@ "dependencies": { "@loaders.gl/compression": "4.0.0-alpha.25", "@loaders.gl/loader-utils": "4.0.0-alpha.25", - "jszip": "^3.1.5" + "jszip": "^3.1.5", + "md5": "^2.3.0" }, "gitHead": "c95a4ff72512668a93d9041ce8636bac09333fd5" }