-
Notifications
You must be signed in to change notification settings - Fork 196
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(shapefile): DBFArrowLoader (#3142)
- Loading branch information
Showing
10 changed files
with
665 additions
and
3 deletions.
There are no files selected for viewing
53 changes: 53 additions & 0 deletions
53
modules/gis/src/lib/table-converters/make-arrow-batch-iterator.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
// loaders.gl | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) vis.gl contributors | ||
|
||
import * as arrow from 'apache-arrow'; | ||
import type {Table} from '@loaders.gl/schema'; | ||
import { | ||
convertSchemaToArrow, | ||
getTableLength, | ||
getTableNumCols, | ||
getTableCellAt | ||
} from '@loaders.gl/schema-utils'; | ||
|
||
export function* makeTableToArrowBatchesIterator( | ||
table: Table, | ||
options?: {batchSize?: number} | ||
): IterableIterator<arrow.RecordBatch> { | ||
const arrowSchema = convertSchemaToArrow(table.schema!); | ||
|
||
const length = getTableLength(table); | ||
const numColumns = getTableNumCols(table); | ||
const batchSize = options?.batchSize || length; | ||
|
||
const builders = arrowSchema?.fields.map((arrowField) => arrow.makeBuilder(arrowField)); | ||
const structField = new arrow.Struct(arrowSchema.fields); | ||
|
||
let batchLength = 0; | ||
for (let rowIndex = 0; rowIndex < length; rowIndex++) { | ||
for (let columnIndex = 0; columnIndex < numColumns; ++columnIndex) { | ||
const value = getTableCellAt(table, rowIndex, columnIndex); | ||
|
||
const builder = builders[columnIndex]; | ||
builder.append(value); | ||
batchLength++; | ||
|
||
if (batchLength >= batchSize) { | ||
const datas = builders.map((builder) => builder.flush()); | ||
const structData = new arrow.Data(structField, 0, batchLength, 0, undefined, datas); | ||
yield new arrow.RecordBatch(arrowSchema, structData); | ||
batchLength = 0; | ||
} | ||
} | ||
} | ||
|
||
if (batchLength > 0) { | ||
const datas = builders.map((builder) => builder.flush()); | ||
const structData = new arrow.Data(structField, 0, batchLength, 0, undefined, datas); | ||
yield new arrow.RecordBatch(arrowSchema, structData); | ||
batchLength = 0; | ||
} | ||
|
||
builders.map((builder) => builder.finish()); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
112 changes: 112 additions & 0 deletions
112
modules/schema-utils/src/lib/table/batch-builder/arrow-table-builder.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
// loaders.gl | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) vis.gl contributors | ||
|
||
import type {Schema, ArrowTable, ArrowTableBatch} from '@loaders.gl/schema'; | ||
import * as arrow from 'apache-arrow'; | ||
import {convertSchemaToArrow} from '@loaders.gl/schema-utils'; | ||
|
||
/** Builds an arrow table or batches */ | ||
export class ArrowTableBuilder { | ||
schema: Schema; | ||
arrowSchema: arrow.Schema; | ||
arrowBuilders: arrow.Builder[]; | ||
length: number; | ||
|
||
constructor(schema: Schema) { | ||
this.schema = schema; | ||
this.arrowSchema = convertSchemaToArrow(schema); | ||
this.arrowBuilders = this.arrowSchema.fields.map((field) => | ||
arrow.makeBuilder({type: field.type, nullValues: [null]}) | ||
); | ||
this.length = 0; | ||
} | ||
|
||
addObjectRow(row: {[key: string]: any}) { | ||
for (let i = 0; i < this.arrowBuilders.length; i++) { | ||
const columnName = this.schema.fields[i].name; | ||
const value = row[columnName]; | ||
// if (this.schema.fields[i].type.toString() === 'bool') { | ||
// debugger; | ||
// } | ||
this.arrowBuilders[i].append(value); | ||
} | ||
this.length++; | ||
} | ||
|
||
addArrayRow(row: any[]) { | ||
for (let i = 0; i < this.arrowBuilders.length; i++) { | ||
this.arrowBuilders[i].append(row[i]); | ||
} | ||
this.length++; | ||
} | ||
|
||
/** Makes sure that a first batch with schema is sent even if no rows */ | ||
firstBatch(): ArrowTableBatch | null { | ||
const arrowRecordBatch = this._getArrowRecordBatch(); | ||
// If there is data, a batch will be sent later | ||
if (arrowRecordBatch.numCols !== 0) { | ||
return null; | ||
} | ||
return { | ||
shape: 'arrow-table', | ||
batchType: 'data', | ||
length: arrowRecordBatch.numRows, | ||
schema: this.schema, | ||
data: new arrow.Table(arrowRecordBatch) | ||
}; | ||
} | ||
|
||
/** Flush the current batch if conditions are right */ | ||
flushBatch(): ArrowTableBatch | null { | ||
const arrowRecordBatch = this._getArrowRecordBatch(); | ||
if (arrowRecordBatch.numCols === 0) { | ||
return null; | ||
} | ||
return { | ||
shape: 'arrow-table', | ||
batchType: 'data', | ||
length: arrowRecordBatch.numRows, | ||
schema: this.schema, | ||
data: new arrow.Table(arrowRecordBatch) | ||
}; | ||
} | ||
|
||
/** Get a last batch if any data is left */ | ||
finishBatch(): ArrowTableBatch | null { | ||
const arrowRecordBatch = this._getArrowRecordBatch(); | ||
this.arrowBuilders.forEach((builder) => builder.finish()); | ||
if (arrowRecordBatch.numCols === 0) { | ||
return null; | ||
} | ||
return { | ||
shape: 'arrow-table', | ||
batchType: 'data', | ||
length: arrowRecordBatch.numRows, | ||
schema: this.schema, | ||
data: new arrow.Table(arrowRecordBatch) | ||
}; | ||
} | ||
|
||
/** Return a table with all the accumulated data */ | ||
finishTable(): ArrowTable { | ||
const arrowRecordBatch = this._getArrowRecordBatch(); | ||
this.arrowBuilders.forEach((builder) => builder.finish()); | ||
return { | ||
shape: 'arrow-table', | ||
schema: this.schema, | ||
data: new arrow.Table(arrowRecordBatch) | ||
}; | ||
} | ||
|
||
/** Extract a record batch flushing the currently accumulated data in the builders */ | ||
_getArrowRecordBatch(): arrow.RecordBatch { | ||
const {arrowBuilders, arrowSchema} = this; | ||
const arrowDatas = arrowBuilders.map((builder) => builder.flush()); | ||
const length = arrowDatas[0].length; | ||
const structField = new arrow.Struct(arrowSchema.fields); | ||
const arrowStructData = new arrow.Data(structField, 0, length, 0, undefined, arrowDatas); | ||
const arrowRecordBatch = new arrow.RecordBatch(arrowSchema, arrowStructData); | ||
return arrowRecordBatch; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
// loaders.gl | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) vis.gl contributors | ||
|
||
import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils'; | ||
import type {ArrowTable, ArrowTableBatch} from '@loaders.gl/schema'; | ||
import {parseDBF, parseDBFInBatches} from './lib/parsers/parse-dbf-to-arrow'; | ||
import {DBFFormat} from './dbf-format'; | ||
|
||
// __VERSION__ is injected by babel-plugin-version-inline | ||
// @ts-ignore TS2304: Cannot find name '__VERSION__'. | ||
const VERSION = typeof __VERSION__ !== 'undefined' ? __VERSION__ : 'latest'; | ||
|
||
export type DBFLoaderOptions = LoaderOptions & { | ||
dbf?: { | ||
encoding?: string; | ||
/** Override the URL to the worker bundle (by default loads from unpkg.com) */ | ||
workerUrl?: string; | ||
}; | ||
}; | ||
|
||
/** | ||
* DBFLoader - DBF files are used to contain non-geometry columns in Shapefiles | ||
*/ | ||
export const DBFArrowWorkerLoader = { | ||
...DBFFormat, | ||
dataType: null as unknown as ArrowTable, | ||
batchType: null as unknown as ArrowTableBatch, | ||
version: VERSION, | ||
worker: true, | ||
options: { | ||
dbf: { | ||
encoding: 'latin1' | ||
} | ||
} | ||
} as const satisfies Loader<ArrowTable, ArrowTableBatch, DBFLoaderOptions>; | ||
|
||
/** DBF file loader */ | ||
export const DBFArrowLoader = { | ||
...DBFArrowWorkerLoader, | ||
parse: async (arrayBuffer, options) => parseDBF(arrayBuffer, options), | ||
parseSync: parseDBF, | ||
parseInBatches(arrayBufferIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>, options) { | ||
return parseDBFInBatches(arrayBufferIterator, options); | ||
} | ||
} as const satisfies LoaderWithParser<ArrowTable, ArrowTableBatch, DBFLoaderOptions>; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
// loaders.gl | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) vis.gl contributors | ||
|
||
import type {Format} from '@loaders.gl/loader-utils'; | ||
|
||
/** Information about the DBF format */ | ||
export const DBFFormat = { | ||
name: 'DBF', | ||
id: 'dbf', | ||
module: 'shapefile', | ||
category: 'table', | ||
extensions: ['dbf'], | ||
mimeTypes: ['application/x-dbf'] | ||
} as const satisfies Format; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.