From 87ec194ae067e04934cda197ddf6e224016e4e6e Mon Sep 17 00:00:00 2001 From: Ib Green Date: Fri, 18 Oct 2024 11:04:01 -0400 Subject: [PATCH] feat(loader-utils): New Format type (#3141) --- docs/arrowjs/api-reference/builder.md | 16 ++++++++++++ docs/arrowjs/developer-guide/builders.md | 26 +++++++++++++++++++ modules/bson/src/bson-format.ts | 15 +++++++++++ modules/bson/src/bson-loader.ts | 3 +++ modules/bson/src/bson-writer.ts | 2 ++ modules/draco/src/draco-writer.ts | 1 + modules/gis/src/index.ts | 3 +-- modules/images/src/image-writer.ts | 1 + modules/loader-utils/src/format-types.ts | 24 +++++++++++++++++ modules/loader-utils/src/index.ts | 4 +++ modules/loader-utils/src/loader-types.ts | 20 ++++++-------- modules/loader-utils/src/source-types.ts | 3 ++- modules/loader-utils/src/writer-types.ts | 22 +++++++--------- .../schema/src/categories/category-table.ts | 10 +++---- modules/schema/src/types/batch.ts | 4 ++- .../textures/src/compressed-texture-writer.ts | 1 + modules/textures/src/ktx2-basis-writer.ts | 1 + modules/wkt/src/twkb-writer.ts | 1 + modules/wkt/src/wkb-writer.ts | 1 + modules/wkt/src/wkt-writer.ts | 1 + 20 files changed, 126 insertions(+), 33 deletions(-) create mode 100644 modules/bson/src/bson-format.ts create mode 100644 modules/loader-utils/src/format-types.ts diff --git a/docs/arrowjs/api-reference/builder.md b/docs/arrowjs/api-reference/builder.md index 002f698c05..f772639565 100644 --- a/docs/arrowjs/api-reference/builder.md +++ b/docs/arrowjs/api-reference/builder.md @@ -77,3 +77,19 @@ type BuilderOptions { `makeBuilder()` returns `Builder` which is a base class for the various that Arrow JS builder subclasses that construct Arrow Vectors from JavaScript values. + +### append() + +### flush() + +```ts +builder.flush(): Data +``` + +### finish() + +```ts +builder.finish(); +``` + +When the builder is no longer needed, the application should call `builder.finish()`. diff --git a/docs/arrowjs/developer-guide/builders.md b/docs/arrowjs/developer-guide/builders.md index 359cda6363..78551191ba 100644 --- a/docs/arrowjs/developer-guide/builders.md +++ b/docs/arrowjs/developer-guide/builders.md @@ -23,3 +23,29 @@ const utf8Vector = utf8Builder.finish().toVector(); console.log(utf8Vector.toJSON()); // > ["hello", null, "world", null] ``` + +One way to build a table with multiple columns is to create an arrow `Struct` field type using the fields in the table's schema, +and then create a `Data` object using that `Field` object and the data + +```ts +function buildTable(arrowSchema: arrow.Schema, const data: any[][]) { + const arrowBuilders = this.arrowSchema.fields.map((field) => arrow.makeBuilder({type: field.type, [null])); + + // Application data + const row = [column0value, column1Value, ...]; + + for (let i = 0; i < this.arrowBuilders.length; i++) { + arrowBuilders[i].append(row[i]); + } + + const arrowDatas = arrowBuilders.map((builder) => builder.flush()); + const structField = new arrow.Struct(arrowSchema.fields); + const arrowStructData = new arrow.Data(structField, 0, length, 0, undefined, arrowDatas); + const arrowRecordBatch = new arrow.RecordBatch(arrowSchema, arrowStructData); + const arrowTable = new arrow.Table([arrowRecordBatch]) + + arrowBuilders.forEach((builder) => builder.finish()); + + return arrowTable; +} +``` diff --git a/modules/bson/src/bson-format.ts b/modules/bson/src/bson-format.ts new file mode 100644 index 0000000000..e0d24f3d23 --- /dev/null +++ b/modules/bson/src/bson-format.ts @@ -0,0 +1,15 @@ +// loaders.gl +// SPDX-License-Identifier: MIT +// Copyright (c) vis.gl contributors + +import type {Format} from '@loaders.gl/loader-utils'; + +export const BSONFormat = { + name: 'BSON', + id: 'bson', + module: 'bson', + extensions: ['bson'], + mimeTypes: ['application/bson'], + category: 'json', + binary: true +} as const satisfies Format; diff --git a/modules/bson/src/bson-loader.ts b/modules/bson/src/bson-loader.ts index 9a2b58cc7f..9ffc0cfd59 100644 --- a/modules/bson/src/bson-loader.ts +++ b/modules/bson/src/bson-loader.ts @@ -1,9 +1,11 @@ // loaders.gl // SPDX-License-Identifier: MIT // Copyright (c) vis.gl contributors + import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils'; import type {ParseBSONOptions} from './lib/parsers/parse-bson'; import {parseBSONSync} from './lib/parsers/parse-bson'; +import {BSONFormat} from './bson-format'; // __VERSION__ is injected by babel-plugin-version-inline // @ts-ignore TS2304: Cannot find name '__VERSION__'. @@ -18,6 +20,7 @@ export type BSONLoaderOptions = LoaderOptions & { }; export const BSONLoader = { + ...BSONFormat, dataType: null as unknown as Record, batchType: null as never, name: 'BSON', diff --git a/modules/bson/src/bson-writer.ts b/modules/bson/src/bson-writer.ts index 23232d408f..7aa88e21b0 100644 --- a/modules/bson/src/bson-writer.ts +++ b/modules/bson/src/bson-writer.ts @@ -5,6 +5,7 @@ import type {WriterWithEncoder, WriterOptions} from '@loaders.gl/loader-utils'; import type {EncodeBSONOptions} from './lib/encoders/encode-bson'; import {encodeBSONSync} from './lib/encoders/encode-bson'; +import {BSONFormat} from './bson-format'; // __VERSION__ is injected by babel-plugin-version-inline // @ts-ignore TS2304: Cannot find name '__VERSION__'. @@ -15,6 +16,7 @@ export type BSONWriterOptions = WriterOptions & { } export const BSONWriter = { + ...BSONFormat, name: 'BSON', id: 'bson', module: 'bson', diff --git a/modules/draco/src/draco-writer.ts b/modules/draco/src/draco-writer.ts index 90261f0245..37f65eb356 100644 --- a/modules/draco/src/draco-writer.ts +++ b/modules/draco/src/draco-writer.ts @@ -35,6 +35,7 @@ export const DracoWriter = { module: 'draco', version: VERSION, extensions: ['drc'], + mimeTypes: ['application/octet-stream'], options: { draco: DEFAULT_DRACO_WRITER_OPTIONS }, diff --git a/modules/gis/src/index.ts b/modules/gis/src/index.ts index 4e2f0f7398..e7003fd35d 100644 --- a/modules/gis/src/index.ts +++ b/modules/gis/src/index.ts @@ -73,7 +73,7 @@ export {parseWKTCRS} from './lib//wkt-crs/parse-wkt-crs'; export type {EncodeWKTCRSOptions} from './lib//wkt-crs/encode-wkt-crs'; export {encodeWKTCRS} from './lib//wkt-crs/encode-wkt-crs'; -// Arrow Geometries +// GEOARROW export type { BinaryDataFromGeoArrow, BinaryGeometriesFromArrowOptions @@ -87,7 +87,6 @@ export { getMeanCentersFromBinaryGeometries } from './lib/feature-collection-converters/convert-geoarrow-to-binary-features'; -// GEOARROW export {convertGeoArrowGeometryToGeoJSON} from './lib/geometry-converters/convert-geoarrow-to-geojson'; export {getGeometryColumnsFromSchema} from './lib/geoarrow/geoarrow-metadata'; export {updateBoundsFromGeoArrowSamples} from './lib/geoarrow/get-arrow-bounds'; diff --git a/modules/images/src/image-writer.ts b/modules/images/src/image-writer.ts index eecb76cf5b..dce8f4e9ed 100644 --- a/modules/images/src/image-writer.ts +++ b/modules/images/src/image-writer.ts @@ -21,6 +21,7 @@ export const ImageWriter = { module: 'images', version: VERSION, extensions: ['jpeg'], + mimeTypes: [], options: { image: { mimeType: 'image/png', diff --git a/modules/loader-utils/src/format-types.ts b/modules/loader-utils/src/format-types.ts new file mode 100644 index 0000000000..9d6f0426a5 --- /dev/null +++ b/modules/loader-utils/src/format-types.ts @@ -0,0 +1,24 @@ +/** + * A worker loader definition that can be used with `@loaders.gl/core` functions + */ +export type Format = { + /** Human readable name */ + name: string; + /** Unique lower-case id string for this format. Used for e.g. LoaderOptions */ + id: string; + /** loaders.gl module that contains the implementation of this format */ + module: string; + /** Which category does this loader belong to */ + category?: string; + /** File extensions that are potential matches with this loader. */ + extensions: string[]; + /** MIMETypes that indicate a match with this loader. @note Some MIMETypes are generic and supported by many loaders */ + mimeTypes: string[]; + /** Is this a binary format */ + binary?: boolean; + /** Is this a text format */ + text?: boolean; + + /** Test some initial bytes of content to see if this loader might be a match */ + tests?: (((ArrayBuffer: ArrayBuffer) => boolean) | ArrayBuffer | string)[]; +}; diff --git a/modules/loader-utils/src/index.ts b/modules/loader-utils/src/index.ts index 7e5cc7c4b4..660b9573c2 100644 --- a/modules/loader-utils/src/index.ts +++ b/modules/loader-utils/src/index.ts @@ -20,6 +20,10 @@ export type { FetchLike } from './types'; +// formats + +export type {Format} from './format-types'; + // loaders export type { diff --git a/modules/loader-utils/src/loader-types.ts b/modules/loader-utils/src/loader-types.ts index bedf2b86cb..9294b7724c 100644 --- a/modules/loader-utils/src/loader-types.ts +++ b/modules/loader-utils/src/loader-types.ts @@ -2,10 +2,8 @@ // SPDX-License-Identifier: MIT // Copyright (c) vis.gl contributors -import { - FetchLike, - TransformBatches /* , DataType, SyncDataType, BatchableDataType */ -} from './types'; +import type {Format} from './format-types'; +import {FetchLike, TransformBatches} from './types'; import {ReadableFile} from './lib/files/file'; // LOADERS @@ -113,7 +111,7 @@ type PreloadOptions = { /** * A worker loader definition that can be used with `@loaders.gl/core` functions */ -export type Loader = { +export type Loader = Format & { /** The result type of this loader */ dataType?: DataT; /** The batched result type of this loader */ @@ -123,6 +121,11 @@ export type Loader = options: LoaderOptionsT; /** Deprecated Options */ deprecatedOptions?: Record>; + /** Version should be injected by build tools */ + version: string; + /** A boolean, or a URL */ + worker?: string | boolean; + // end Worker /** Human readable name */ name: string; @@ -130,19 +133,12 @@ export type Loader = id: string; /** module is used to generate worker threads, need to be the module directory name */ module: string; - /** Version should be injected by build tools */ - version: string; - /** A boolean, or a URL */ - worker?: string | boolean; - // end Worker - /** Which category does this loader belong to */ category?: string; /** File extensions that are potential matches with this loader. */ extensions: string[]; /** MIMETypes that indicate a match with this loader. @note Some MIMETypes are generic and supported by many loaders */ mimeTypes: string[]; - /** Is the input of this loader binary */ binary?: boolean; /** Is the input of this loader text */ diff --git a/modules/loader-utils/src/source-types.ts b/modules/loader-utils/src/source-types.ts index 467a172bac..9ba285340b 100644 --- a/modules/loader-utils/src/source-types.ts +++ b/modules/loader-utils/src/source-types.ts @@ -2,6 +2,7 @@ // SPDX-License-Identifier: MIT // Copyright (c) vis.gl contributors +import type {Format} from './format-types'; import type {DataSource, DataSourceOptions} from './lib/sources/data-source'; /** @@ -15,7 +16,7 @@ export interface Source< unknown, DataSourceOptions > -> { +> extends Format { /** Type of source created by this service */ dataSource?: DataSourceT; /** Type of options used when creating sources */ diff --git a/modules/loader-utils/src/writer-types.ts b/modules/loader-utils/src/writer-types.ts index 98bd584c5b..b2cb8c5774 100644 --- a/modules/loader-utils/src/writer-types.ts +++ b/modules/loader-utils/src/writer-types.ts @@ -2,6 +2,8 @@ // SPDX-License-Identifier: MIT // Copyright (c) vis.gl contributors +import type {Format} from './format-types'; + // WRITERS /** Options for writers */ @@ -24,11 +26,18 @@ export type WriterOptions = { * A writer definition that can be used with `@loaders.gl/core` functions */ // eslint-disable-next-line @typescript-eslint/no-unused-vars -export type Writer = { +export type Writer = Format & { /** The result type of this loader */ dataType?: DataT; /** The batched result type of this loader */ batchType?: BatchT; + /** Version should be injected by build tools */ + version: string; + /** A boolean, or a URL */ + worker?: string | boolean; + // end Worker + options: WriterOptionsT; + deprecatedOptions?: Record; /** Human readable name */ name: string; @@ -36,27 +45,16 @@ export type Writer; }; /** diff --git a/modules/schema/src/categories/category-table.ts b/modules/schema/src/categories/category-table.ts index 91094df8b8..cd9add87f4 100644 --- a/modules/schema/src/categories/category-table.ts +++ b/modules/schema/src/categories/category-table.ts @@ -79,7 +79,7 @@ export type TableBatch = | ArrowTableBatch; /** Batch for a table organized as an array of rows, each row is an array of values */ -export type ArrayRowTableBatch = Batch & { +export type ArrayRowTableBatch = Batch & { shape: 'array-row-table'; schema?: Schema; schemaType?: 'explicit' | 'deduced'; @@ -88,7 +88,7 @@ export type ArrayRowTableBatch = Batch & { }; /** Batch for a table organized as an array of rows, each row is an object mapping columns to values */ -export type ObjectRowTableBatch = Batch & { +export type ObjectRowTableBatch = Batch & { shape: 'object-row-table'; schema?: Schema; schemaType?: 'explicit' | 'deduced'; @@ -97,7 +97,7 @@ export type ObjectRowTableBatch = Batch & { }; /** Batch for a table organized as an array of rows, each row is an array of values */ -export type GeoJSONTableBatch = Batch & { +export type GeoJSONTableBatch = Batch & { shape: 'geojson-table'; schema?: Schema; schemaType?: 'explicit' | 'deduced'; @@ -107,7 +107,7 @@ export type GeoJSONTableBatch = Batch & { }; /** Batch for a table organized as a map of columns, each column is an array of value */ -export type ColumnarTableBatch = Batch & { +export type ColumnarTableBatch = Batch & { shape: 'columnar-table'; schemaType?: 'explicit' | 'deduced'; schema?: Schema; @@ -116,7 +116,7 @@ export type ColumnarTableBatch = Batch & { }; /** Batch that wraps an Apache Arrow RecordBatch */ -export type ArrowTableBatch = Batch & { +export type ArrowTableBatch = Batch & { shape: 'arrow-table'; schemaType?: 'explicit' | 'deduced'; schema?: Schema; diff --git a/modules/schema/src/types/batch.ts b/modules/schema/src/types/batch.ts index 0dba4b1405..dd47f8026f 100644 --- a/modules/schema/src/types/batch.ts +++ b/modules/schema/src/types/batch.ts @@ -12,9 +12,11 @@ type ApacheRecordBatch = unknown; * @see parseInBatches() * @see loadInBatches() */ -export type Batch = { +export type Batch = { /** A batch can contain metadata, data, or in case of unstructured data (JSON) */ batchType: 'data' | 'metadata' | 'partial-result' | 'final-result'; + /** Metadata for this batch if batchType === 'metadata' */ + metadata?: MetadataT; /** A string identifying the shape of data in this batch (table, etc) */ shape: string; /** Schema of the data in this batch */ diff --git a/modules/textures/src/compressed-texture-writer.ts b/modules/textures/src/compressed-texture-writer.ts index e75b620168..a8d34ed77d 100644 --- a/modules/textures/src/compressed-texture-writer.ts +++ b/modules/textures/src/compressed-texture-writer.ts @@ -31,6 +31,7 @@ export const CompressedTextureWriter = { version: VERSION, extensions: ['dds'], + mimeTypes: ['image/vnd-ms.dds', 'image/x-dds', 'application/octet-stream'], options: { texture: { diff --git a/modules/textures/src/ktx2-basis-writer.ts b/modules/textures/src/ktx2-basis-writer.ts index 6200206775..445e1109b7 100644 --- a/modules/textures/src/ktx2-basis-writer.ts +++ b/modules/textures/src/ktx2-basis-writer.ts @@ -28,6 +28,7 @@ export const KTX2BasisWriter = { version: VERSION, extensions: ['ktx2'], + mimeTypes: ['image/ktx2'], options: { ['ktx2-basis-writer']: { useSRGB: false, diff --git a/modules/wkt/src/twkb-writer.ts b/modules/wkt/src/twkb-writer.ts index d173283cfb..e05177a10a 100644 --- a/modules/wkt/src/twkb-writer.ts +++ b/modules/wkt/src/twkb-writer.ts @@ -23,6 +23,7 @@ export const TWKBWriter = { module: 'wkt', version: VERSION, extensions: ['twkb'], + mimeTypes: ['application/text'], encode: async (geometry: Geometry, options?: TWKBWriterOptions) => convertGeometryToTWKB(geometry, options?.twkb), encodeSync: (geometry: Geometry, options?: TWKBWriterOptions) => diff --git a/modules/wkt/src/wkb-writer.ts b/modules/wkt/src/wkb-writer.ts index e3e9d804ec..bfc9dbd072 100644 --- a/modules/wkt/src/wkb-writer.ts +++ b/modules/wkt/src/wkb-writer.ts @@ -29,6 +29,7 @@ export const WKBWriter = { module: 'wkt', version: VERSION, extensions: ['wkb'], + mimeTypes: ['application/wkb', 'application/octet-stream'], options: { wkb: { hasZ: false, diff --git a/modules/wkt/src/wkt-writer.ts b/modules/wkt/src/wkt-writer.ts index e45252ed41..a40a1db19e 100644 --- a/modules/wkt/src/wkt-writer.ts +++ b/modules/wkt/src/wkt-writer.ts @@ -20,6 +20,7 @@ export const WKTWriter = { module: 'wkt', version: VERSION, extensions: ['wkt'], + mimeTypes: ['application/wkt', 'text/plain'], text: true, encode: async (geometry: Geometry) => convertGeometryToWKTSync(geometry), encodeSync: convertGeometryToWKTSync,