Skip to content

Commit

Permalink
chore(arrow): table conversion (#3108)
Browse files Browse the repository at this point in the history
  • Loading branch information
ibgreen authored Oct 2, 2024
1 parent 5510655 commit 6a642b4
Show file tree
Hide file tree
Showing 44 changed files with 552 additions and 457 deletions.
4 changes: 4 additions & 0 deletions modules/arrow/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@
"types": "./dist/index.d.ts",
"import": "./dist/index.js",
"require": "./dist/index.cjs"
},
"./exports/*": {
"types": "./dist/exports/*.d.ts",
"import": "./dist/exports/*.js"
}
},
"sideEffects": false,
Expand Down
63 changes: 6 additions & 57 deletions modules/arrow/src/arrow-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,59 +2,12 @@
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
import type {
ArrayRowTable,
ArrowTableBatch,
ColumnarTable,
ObjectRowTable
} from '@loaders.gl/schema';
import type {ArrowTable} from './lib/arrow-table';
import {parseArrowSync} from './parsers/parse-arrow-sync';
import {parseArrowInBatches} from './parsers/parse-arrow-in-batches';
import type {LoaderWithParser} from '@loaders.gl/loader-utils';
import type {Table, ArrowTableBatch} from '@loaders.gl/schema';
import {parseArrowSync, parseArrowInBatches} from './lib/parsers/parse-arrow';

// __VERSION__ is injected by babel-plugin-version-inline
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';

/** ArrowLoader options */
export type ArrowLoaderOptions = LoaderOptions & {
/** ArrowLoader options */
arrow?: {
/** Shape of returned data */
shape: 'arrow-table' | 'columnar-table' | 'array-row-table' | 'object-row-table';
/** Debounce time between batches (prevent excessive numbers of small batches) */
batchDebounceMs?: number;
/** Override the URL to the worker bundle (by default loads from unpkg.com) */
workerUrl?: string;
};
};

/** ArrowJS table loader */
export const ArrowWorkerLoader = {
dataType: null as unknown as ArrowTable,
batchType: null as never,

name: 'Apache Arrow',
id: 'arrow',
module: 'arrow',
version: VERSION,
// worker: true,
category: 'table',
extensions: ['arrow', 'feather'],
mimeTypes: [
'application/vnd.apache.arrow.file',
'application/vnd.apache.arrow.stream',
'application/octet-stream'
],
binary: true,
tests: ['ARROW'],
options: {
arrow: {
shape: 'columnar-table'
}
}
} as const satisfies Loader<ArrowTable, never, ArrowLoaderOptions>;
import type {ArrowLoaderOptions} from './exports/arrow-loader';
import {ArrowWorkerLoader} from './exports/arrow-loader';

/** ArrowJS table loader */
export const ArrowLoader = {
Expand All @@ -64,8 +17,4 @@ export const ArrowLoader = {
parseSync: (arraybuffer: ArrayBuffer, options?: ArrowLoaderOptions) =>
parseArrowSync(arraybuffer, options?.arrow),
parseInBatches: parseArrowInBatches
} as const satisfies LoaderWithParser<
ArrowTable | ColumnarTable | ObjectRowTable | ArrayRowTable,
ArrowTableBatch,
ArrowLoaderOptions
>;
} as const satisfies LoaderWithParser<Table, ArrowTableBatch, ArrowLoaderOptions>;
2 changes: 1 addition & 1 deletion modules/arrow/src/arrow-writer.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
// import type {} from '@loaders.gl/loader-utils';

import type {WriterWithEncoder, WriterOptions} from '@loaders.gl/loader-utils';
import {ColumnarTable, encodeArrowSync} from './lib/encode-arrow';
import {ColumnarTable, encodeArrowSync} from './lib/encoders/encode-arrow';

// __VERSION__ is injected by babel-plugin-version-inline
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
Expand Down
49 changes: 49 additions & 0 deletions modules/arrow/src/exports/arrow-loader.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// loaders.gl
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';
import type {ArrowTable} from '../schema/arrow-table-type';

// __VERSION__ is injected by babel-plugin-version-inline
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest';

/** ArrowLoader options */
export type ArrowLoaderOptions = LoaderOptions & {
/** ArrowLoader options */
arrow?: {
/** Shape of returned data */
shape: 'arrow-table' | 'columnar-table' | 'array-row-table' | 'object-row-table';
/** Debounce time between batches (prevent excessive numbers of small batches) */
batchDebounceMs?: number;
/** Override the URL to the worker bundle (by default loads from unpkg.com) */
workerUrl?: string;
};
};

/** ArrowJS table loader */
export const ArrowWorkerLoader = {
dataType: null as unknown as ArrowTable,
batchType: null as never,

name: 'Apache Arrow',
id: 'arrow',
module: 'arrow',
version: VERSION,
// worker: true,
category: 'table',
extensions: ['arrow', 'feather'],
mimeTypes: [
'application/vnd.apache.arrow.file',
'application/vnd.apache.arrow.stream',
'application/octet-stream'
],
binary: true,
tests: ['ARROW'],
options: {
arrow: {
shape: 'columnar-table'
}
}
} as const satisfies Loader<ArrowTable, never, ArrowLoaderOptions>;
24 changes: 24 additions & 0 deletions modules/arrow/src/exports/geoarrow-loader.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// loaders.gl
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

import type {Loader, LoaderOptions} from '@loaders.gl/loader-utils';
import {ArrowWorkerLoader} from './arrow-loader';
import type {BinaryGeometry} from '@loaders.gl/schema';
import type {ArrowTable, ArrowTableBatch} from '../schema/arrow-table-type';

export type GeoArrowLoaderOptions = LoaderOptions & {
arrow?: {
shape?: 'arrow-table' | 'binary-geometry';
};
};

/** ArrowJS table loader */
export const GeoArrowWorkerLoader = {
...ArrowWorkerLoader,
options: {
arrow: {
shape: 'arrow-table'
}
}
} as const satisfies Loader<ArrowTable | BinaryGeometry, ArrowTableBatch, GeoArrowLoaderOptions>;
28 changes: 6 additions & 22 deletions modules/arrow/src/geoarrow-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,12 @@
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
import {ArrowWorkerLoader} from './arrow-loader';
import type {GeoJSONTable, GeoJSONTableBatch, BinaryGeometry} from '@loaders.gl/schema';
import type {ArrowTable, ArrowTableBatch} from './lib/arrow-table';
import {parseGeoArrowSync} from './parsers/parse-geoarrow-sync';
import {parseGeoArrowInBatches} from './parsers/parse-geoarrow-in-batches';

export type GeoArrowLoaderOptions = LoaderOptions & {
arrow?: {
shape?: 'arrow-table' | 'binary-geometry';
};
};

/** ArrowJS table loader */
export const GeoArrowWorkerLoader = {
...ArrowWorkerLoader,
options: {
arrow: {
shape: 'arrow-table'
}
}
} as const satisfies Loader<ArrowTable | BinaryGeometry, never, GeoArrowLoaderOptions>;
import type {LoaderWithParser} from '@loaders.gl/loader-utils';
import type {GeoJSONTable, GeoJSONTableBatch} from '@loaders.gl/schema';
import type {ArrowTable, ArrowTableBatch} from './schema/arrow-table-type';
import {parseGeoArrowSync, parseGeoArrowInBatches} from './lib/parsers/parse-geoarrow';
import type {GeoArrowLoaderOptions} from './exports/geoarrow-loader';
import {GeoArrowWorkerLoader} from './exports/geoarrow-loader';

/**
* GeoArrowLoader loads an Apache Arrow table, parses GeoArrow type extension data
Expand Down
2 changes: 1 addition & 1 deletion modules/arrow/src/geoarrow-writer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import type {WriterWithEncoder, WriterOptions} from '@loaders.gl/loader-utils';
import {GeoJSONTable, BinaryGeometry} from '@loaders.gl/schema';
import {encodeGeoArrowSync} from './lib/encode-geoarrow';
import {encodeGeoArrowSync} from './lib/encoders/encode-geoarrow';

// __VERSION__ is injected by babel-plugin-version-inline
// @ts-ignore TS2304: Cannot find name '__VERSION__'.
Expand Down
46 changes: 23 additions & 23 deletions modules/arrow/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,35 +2,33 @@
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

import type {ArrowLoaderOptions} from './arrow-loader';
import {TableBatchBuilder} from '@loaders.gl/schema';
import {ArrowTableBatchAggregator} from './lib/arrow-table-batch';
// Types
export type {ArrowTable, ArrowTableBatch} from './schema/arrow-table-type';
export {VECTOR_TYPES} from './lib/types';
import {ArrowTableBatchAggregator} from './schema/arrow-table-batch';

// Make the ArrowBatch type available
import {TableBatchBuilder} from '@loaders.gl/schema';
TableBatchBuilder.ArrowBatch = ArrowTableBatchAggregator;

// TYPES

export {getArrowType} from './schema/arrow-type-utils';

// SCHEMA

// Types
export type {ArrowTable, ArrowTableBatch} from './lib/arrow-table';
export {VECTOR_TYPES} from './types';

// Arrow loader / Writer

export type {ArrowLoaderOptions};
export {ArrowLoader, ArrowWorkerLoader} from './arrow-loader';
export type {ArrowLoaderOptions} from './exports/arrow-loader';
export {ArrowWorkerLoader} from './exports/arrow-loader';
export {ArrowLoader} from './arrow-loader';

export {ArrowWriter} from './arrow-writer';

// Geoarrow loader
export {GeoArrowLoader, GeoArrowWorkerLoader} from './geoarrow-loader';
export {GeoArrowWorkerLoader} from './exports/geoarrow-loader';
export {GeoArrowLoader} from './geoarrow-loader';

// Schema utils
export {
convertArrowToSchema,
convertSchemaToArrow,

// DETAILED FUNCTIONS
serializeArrowSchema,
deserializeArrowSchema,
serializeArrowMetadata,
Expand All @@ -39,7 +37,11 @@ export {
deserializeArrowField,
serializeArrowType,
deserializeArrowType
} from './schema/convert-arrow-schema';
} from './lib//tables/convert-arrow-schema';

// Table utils
export {convertArrowToTable} from './lib/tables/convert-arrow-to-table';
export {convertTableToArrow} from './lib/tables/convert-table-to-arrow';

// EXPERIMENTAL

Expand All @@ -51,19 +53,17 @@ export type {GeoArrowEncoding} from '@loaders.gl/gis';
export type {
BinaryDataFromGeoArrow,
BinaryGeometriesFromArrowOptions
} from './geoarrow/convert-geoarrow-to-binary-geometry';
} from './lib/geoarrow/convert-geoarrow-to-binary-geometry';
export {
getBinaryGeometryTemplate,
getBinaryGeometriesFromArrow,
getTriangleIndices,
getMeanCentersFromBinaryGeometries
} from './geoarrow/convert-geoarrow-to-binary-geometry';

export {updateBoundsFromGeoArrowSamples} from './geoarrow/get-arrow-bounds';
} from './lib/geoarrow/convert-geoarrow-to-binary-geometry';

export {parseGeometryFromArrow} from './geoarrow/convert-geoarrow-to-geojson-geometry';
export {updateBoundsFromGeoArrowSamples} from './lib/geoarrow/get-arrow-bounds';

export {convertArrowToGeoJSONTable} from './tables/convert-arrow-to-geojson-table';
export {parseGeometryFromArrow} from './lib/geoarrow/convert-geoarrow-to-geojson-geometry';

// EXPERIMENTAL WORKER
export {hardClone} from './workers/hard-clone';
Expand Down
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,19 @@
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

import type {ArrowTableBatch} from '../lib/arrow-table';
import * as arrow from 'apache-arrow';
import {ArrowLoaderOptions} from '../arrow-loader';
// import {isIterable} from '@loaders.gl/core';
import type {Table} from '@loaders.gl/schema';
import type {ArrowTableBatch} from '../../schema/arrow-table-type';
import {ArrowLoaderOptions} from '../../exports/arrow-loader';
import {convertArrowToTable} from '../tables/convert-arrow-to-table';

/** Parses arrow to a loaders.gl table. Defaults to `arrow-table` */
export function parseArrowSync(arrayBuffer, options?: {shape?: Table['shape']}): Table {
const shape = options?.shape || 'arrow-table';
const arrowTable = arrow.tableFromIPC([new Uint8Array(arrayBuffer)]);
return convertArrowToTable(arrowTable, shape);
}

/**
*/
export function parseArrowInBatches(
asyncIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>,
options?: ArrowLoaderOptions
Expand Down
32 changes: 32 additions & 0 deletions modules/arrow/src/lib/parsers/parse-geoarrow.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// loaders.gl
// SPDX-License-Identifier: MIT
// Copyright (c) vis.gl contributors

import type {GeoJSONTable, GeoJSONTableBatch} from '@loaders.gl/schema';
import type {ArrowTable, ArrowTableBatch} from '../../schema/arrow-table-type';
import {parseArrowSync, parseArrowInBatches} from './parse-arrow';
import {convertArrowToTable} from '../tables/convert-arrow-to-table';

// Parses arrow to a columnar table
export function parseGeoArrowSync(
arrayBuffer,
options?: {shape?: 'arrow-table' | 'geojson-table' | 'binary-geometry'}
): ArrowTable | GeoJSONTable {
// | BinaryGeometry
const table = parseArrowSync(arrayBuffer, {shape: 'arrow-table'}) as ArrowTable;
switch (options?.shape) {
case 'geojson-table':
return convertArrowToTable(table.data, 'geojson-table');
default:
return table;
}
}

/**
*/
export function parseGeoArrowInBatches(
asyncIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>
): AsyncIterable<ArrowTableBatch | GeoJSONTableBatch> {
// | BinaryGeometry
return parseArrowInBatches(asyncIterator);
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,16 @@
import type {DataType, Field, Schema, SchemaMetadata} from '@loaders.gl/schema';
import * as arrow from 'apache-arrow';

/** Convert Apache Arrow Schema (class instance) to a serialized Schema (plain data) */
export function convertArrowToSchema(arrowSchema: arrow.Schema): Schema {
return serializeArrowSchema(arrowSchema);
}

/** Convert Apache Arrow Schema (class instance) to a serialized Schema (plain data) */
export function convertSchemaToArrow(schema: Schema): arrow.Schema {
return deserializeArrowSchema(schema);
}

/** Convert Apache Arrow Schema (class instance) to a serialized Schema (plain data) */
export function serializeArrowSchema(arrowSchema: arrow.Schema): Schema {
return {
Expand Down
Loading

0 comments on commit 6a642b4

Please sign in to comment.