-
Notifications
You must be signed in to change notification settings - Fork 196
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
22 changed files
with
406 additions
and
105 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
// loaders.gl | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) vis.gl contributors | ||
|
||
import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils'; | ||
import type {ArrowTable, ArrowTableBatch} from '@loaders.gl/schema'; | ||
import {convertTable, convertBatches} from '@loaders.gl/schema-utils'; | ||
|
||
import type {CSVLoaderOptions} from './csv-loader'; | ||
import {CSVLoader} from './csv-loader'; | ||
|
||
export type CSVArrowLoaderOptions = LoaderOptions & { | ||
csv?: Omit<CSVLoaderOptions['csv'], 'shape'>; | ||
}; | ||
|
||
export const CSVArrowLoader = { | ||
...CSVLoader, | ||
|
||
dataType: null as unknown as ArrowTable, | ||
batchType: null as unknown as ArrowTableBatch, | ||
|
||
parse: async (arrayBuffer: ArrayBuffer, options?: CSVLoaderOptions) => | ||
parseCSVToArrow(new TextDecoder().decode(arrayBuffer), options), | ||
parseText: (text: string, options?: CSVLoaderOptions) => parseCSVToArrow(text, options), | ||
parseInBatches: parseCSVToArrowBatches | ||
} as const satisfies LoaderWithParser<ArrowTable, ArrowTableBatch, CSVArrowLoaderOptions>; | ||
|
||
async function parseCSVToArrow(csvText: string, options?: CSVLoaderOptions): Promise<ArrowTable> { | ||
// Apps can call the parse method directly, we so apply default options here | ||
// const csvOptions = {...CSVArrowLoader.options.csv, ...options?.csv}; | ||
const table = await CSVLoader.parseText(csvText, options); | ||
return convertTable(table, 'arrow-table'); | ||
} | ||
|
||
function parseCSVToArrowBatches( | ||
asyncIterator: AsyncIterable<ArrayBuffer> | Iterable<ArrayBuffer>, | ||
options?: CSVArrowLoaderOptions | ||
): AsyncIterable<ArrowTableBatch> { | ||
const tableIterator = CSVLoader.parseInBatches(asyncIterator, options); | ||
return convertBatches(tableIterator, 'arrow-table'); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
83 changes: 83 additions & 0 deletions
83
modules/schema-utils/src/lib/table/batches/convert-batches.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
// loaders.gl | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) vis.gl contributors | ||
|
||
import type { | ||
TableBatch, | ||
ArrayRowTableBatch, | ||
ObjectRowTableBatch, | ||
ColumnarTableBatch, | ||
ArrowTableBatch | ||
} from '@loaders.gl/schema'; | ||
import {convertTable} from '../tables/convert-table'; | ||
|
||
export function convertBatch(batches: TableBatch, shape: 'object-row-table'): ObjectRowTableBatch; | ||
export function convertBatch(batches: TableBatch, shape: 'array-row-table'): ArrayRowTableBatch; | ||
export function convertBatch(batches: TableBatch, shape: 'columnar-table'): ColumnarTableBatch; | ||
export function convertBatch(batches: TableBatch, shape: 'arrow-table'): ArrowTableBatch; | ||
|
||
/** Convert a table batch to a different shape */ | ||
export function convertBatch( | ||
batch: TableBatch, | ||
shape: 'object-row-table' | 'array-row-table' | 'columnar-table' | 'arrow-table' | ||
): TableBatch { | ||
switch (batch.shape) { | ||
case 'object-row-table': | ||
return {...batch, ...convertTable(batch, 'object-row-table')}; | ||
case 'array-row-table': | ||
return {...batch, ...convertTable(batch, 'array-row-table')}; | ||
case 'columnar-table': | ||
return {...batch, ...convertTable(batch, 'columnar-table')}; | ||
case 'arrow-table': | ||
return {...batch, ...convertTable(batch, 'arrow-table')}; | ||
default: | ||
throw new Error(shape); | ||
} | ||
} | ||
|
||
export function convertBatches( | ||
batches: Iterable<TableBatch> | AsyncIterable<TableBatch>, | ||
shape: 'object-row-table' | ||
): AsyncIterableIterator<ObjectRowTableBatch>; | ||
export function convertBatches( | ||
batches: Iterable<TableBatch> | AsyncIterable<TableBatch>, | ||
shape: 'array-row-table' | ||
): AsyncIterableIterator<ArrayRowTableBatch>; | ||
export function convertBatches( | ||
batches: Iterable<TableBatch> | AsyncIterable<TableBatch>, | ||
shape: 'columnar-table' | ||
): AsyncIterableIterator<ColumnarTableBatch>; | ||
export function convertBatches( | ||
batches: Iterable<TableBatch> | AsyncIterable<TableBatch>, | ||
shape: 'arrow-table' | ||
): AsyncIterableIterator<ArrowTableBatch>; | ||
|
||
/** | ||
* Convert batches to a different shape | ||
* @param table | ||
* @param shape | ||
* @returns | ||
*/ | ||
export async function* convertBatches( | ||
batches: Iterable<TableBatch> | AsyncIterable<TableBatch>, | ||
shape: 'object-row-table' | 'array-row-table' | 'columnar-table' | 'arrow-table' | ||
): AsyncIterableIterator<TableBatch> { | ||
for await (const batch of batches) { | ||
switch (shape) { | ||
case 'object-row-table': | ||
yield convertBatch(batch, 'object-row-table'); | ||
break; | ||
case 'array-row-table': | ||
yield convertBatch(batch, 'array-row-table'); | ||
break; | ||
case 'columnar-table': | ||
yield convertBatch(batch, 'columnar-table'); | ||
break; | ||
case 'arrow-table': | ||
yield convertBatch(batch, 'arrow-table'); | ||
break; | ||
default: | ||
throw new Error(shape); | ||
} | ||
} | ||
} |
75 changes: 75 additions & 0 deletions
75
modules/schema-utils/src/lib/table/batches/make-arrow-batch-iterator.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
// loaders.gl | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) vis.gl contributors | ||
|
||
import * as arrow from 'apache-arrow'; | ||
import type {Table, ArrowTableBatch} from '@loaders.gl/schema'; | ||
|
||
import {convertSchemaToArrow} from '../../schema/convert-arrow-schema'; | ||
import {getTableLength, getTableNumCols, getTableCellAt} from '../tables/table-accessors'; | ||
|
||
/** | ||
* Returns an iterator that yields a single table as a sequence of ArrowTable batches. | ||
* @note All batches will have the same shape and schema as the original table. | ||
*/ | ||
export function* makeArrowTableBatchIterator( | ||
table: Table, | ||
options?: {batchSize?: number} | ||
): IterableIterator<ArrowTableBatch> { | ||
for (const batch of makeArrowRecordBatchIterator(table, options)) { | ||
const arrowTable = new arrow.Table([batch]); | ||
yield { | ||
...batch, | ||
shape: 'arrow-table', | ||
schema: table.schema, | ||
batchType: 'data', | ||
length: arrowTable.numRows, | ||
data: arrowTable | ||
}; | ||
} | ||
} | ||
|
||
/** | ||
* Returns an iterator that yields a single table as a sequence of arrow.RecordBatch batches. | ||
* @note All batches will have the same shape and schema as the original table. | ||
*/ | ||
export function* makeArrowRecordBatchIterator( | ||
table: Table, | ||
options?: {batchSize?: number} | ||
): IterableIterator<arrow.RecordBatch> { | ||
const arrowSchema = convertSchemaToArrow(table.schema!); | ||
|
||
const length = getTableLength(table); | ||
const numColumns = getTableNumCols(table); | ||
const batchSize = options?.batchSize || length; | ||
|
||
const builders = arrowSchema?.fields.map((arrowField) => arrow.makeBuilder(arrowField)); | ||
const structField = new arrow.Struct(arrowSchema.fields); | ||
|
||
let batchLength = 0; | ||
for (let rowIndex = 0; rowIndex < length; rowIndex++) { | ||
for (let columnIndex = 0; columnIndex < numColumns; ++columnIndex) { | ||
const value = getTableCellAt(table, rowIndex, columnIndex); | ||
|
||
const builder = builders[columnIndex]; | ||
builder.append(value); | ||
batchLength++; | ||
|
||
if (batchLength >= batchSize) { | ||
const datas = builders.map((builder) => builder.flush()); | ||
const structData = new arrow.Data(structField, 0, batchLength, 0, undefined, datas); | ||
yield new arrow.RecordBatch(arrowSchema, structData); | ||
batchLength = 0; | ||
} | ||
} | ||
} | ||
|
||
if (batchLength > 0) { | ||
const datas = builders.map((builder) => builder.flush()); | ||
const structData = new arrow.Data(structField, 0, batchLength, 0, undefined, datas); | ||
yield new arrow.RecordBatch(arrowSchema, structData); | ||
batchLength = 0; | ||
} | ||
|
||
builders.map((builder) => builder.finish()); | ||
} |
25 changes: 25 additions & 0 deletions
25
modules/schema-utils/src/lib/table/batches/make-table-batch-iterator.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
// loaders.gl | ||
// SPDX-License-Identifier: MIT | ||
// Copyright (c) vis.gl contributors | ||
|
||
import type {TableBatch, Table} from '@loaders.gl/schema'; | ||
import {getTableLength} from '../tables/table-accessors'; | ||
|
||
/** | ||
* Returns an iterator that yields the contents of a table as a sequence of batches. | ||
* @todo Currently only a single batch is yielded. | ||
* @note All batches will have the same shape and schema as the original table. | ||
* @returns | ||
*/ | ||
export function* makeTableBatchIterator(table: Table): IterableIterator<TableBatch> { | ||
yield makeBatchFromTable(table); | ||
} | ||
|
||
/** | ||
* Returns a table packaged as a single table batch | ||
* @note The batch will have the same shape and schema as the original table. | ||
* @returns `null` if no batches are yielded by the async iterator | ||
*/ | ||
export function makeBatchFromTable(table: Table): TableBatch { | ||
return {...table, length: getTableLength(table), batchType: 'data'}; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.