Skip to content

Commit

Permalink
fix
Browse files Browse the repository at this point in the history
  • Loading branch information
ibgreen committed Oct 16, 2024
1 parent 451975d commit bbad322
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 16 deletions.
42 changes: 35 additions & 7 deletions modules/csv/src/csv-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
// Copyright (c) vis.gl contributors

import type {LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils';
import type {ArrayRowTable, ObjectRowTable, TableBatch} from '@loaders.gl/schema';
import type {Schema, ArrayRowTable, ObjectRowTable, TableBatch} from '@loaders.gl/schema';

import {
AsyncQueue,
deduceTableSchema,
TableBatchBuilder,
convertToArrayRow,
convertToObjectRow
Expand Down Expand Up @@ -89,7 +90,7 @@ async function parseCSV(
csvText: string,
options?: CSVLoaderOptions
): Promise<ObjectRowTable | ArrayRowTable> {
// Apps can call the parse method directly, we so apply default options here
// Apps can call the parse method directly, so we apply default options here
const csvOptions = {...CSVLoader.options.csv, ...options?.csv};

const firstRow = readFirstRow(csvText);
Expand All @@ -115,20 +116,25 @@ async function parseCSV(
const headerRow = result.meta.fields || generateHeader(csvOptions.columnPrefix, firstRow.length);

const shape = csvOptions.shape || DEFAULT_CSV_SHAPE;
let table: ArrayRowTable | ObjectRowTable;
switch (shape) {
case 'object-row-table':
return {
table = {
shape: 'object-row-table',
data: rows.map((row) => (Array.isArray(row) ? convertToObjectRow(row, headerRow) : row))
};
break;
case 'array-row-table':
return {
table = {
shape: 'array-row-table',
data: rows.map((row) => (Array.isArray(row) ? row : convertToArrayRow(row, headerRow)))
};
break;
default:
throw new Error(shape);
}
table.schema = deduceTableSchema(table!);
return table;
}

// TODO - support batch size 0 = no batching/single batch?
Expand All @@ -151,7 +157,7 @@ function parseCSVInBatches(
let isFirstRow: boolean = true;
let headerRow: string[] | null = null;
let tableBatchBuilder: TableBatchBuilder | null = null;
let schema: ObjectSchema | null = null;
let schema: Schema | null = null;

const config = {
// dynamicTyping: true, // Convert numbers and boolean values in rows from strings,
Expand Down Expand Up @@ -199,7 +205,7 @@ function parseCSVInBatches(
if (!headerRow) {
headerRow = generateHeader(csvOptions.columnPrefix, row.length);
}
schema = deduceSchema(row, headerRow);
schema = deduceCSVSchema(row, headerRow);
}

if (csvOptions.optimizeMemoryUsage) {
Expand Down Expand Up @@ -314,7 +320,29 @@ function generateHeader(columnPrefix: string, count: number = 0): string[] {
return headers;
}

function deduceSchema(row, headerRow): ObjectSchema {
function deduceCSVSchema(row, headerRow): Schema {
const fields: Schema['fields'] = [];
for (let i = 0; i < row.length; i++) {
const columnName = (headerRow && headerRow[i]) || i;
const value = row[i];
switch (typeof value) {
case 'number':
fields.push({name: String(columnName), type: 'float64', nullable: true});
break;
case 'boolean':
fields.push({name: String(columnName), type: 'bool', nullable: true});
break;
case 'string':
default:
fields.push({name: String(columnName), type: 'utf8', nullable: true});
// We currently only handle numeric rows
// TODO we could offer a function to map strings to numbers?
}
}
return {fields, metadata: {'loaders.gl': 'CSVLoader'}};
}

function deduceObjectSchema(row, headerRow): ObjectSchema {

Check failure on line 345 in modules/csv/src/csv-loader.ts

View workflow job for this annotation

GitHub Actions / test

'deduceObjectSchema' is declared but its value is never read.
const schema: ObjectSchema = headerRow ? {} : [];
for (let i = 0; i < row.length; i++) {
const columnName = (headerRow && headerRow[i]) || i;
Expand Down
26 changes: 18 additions & 8 deletions modules/csv/test/csv-arrow-loader.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,10 @@ import * as arrow from 'apache-arrow';
// Small CSV Sample Files
const CSV_NUMBERS_100_URL = '@loaders.gl/csv/test/data/numbers-100.csv';
const CSV_NUMBERS_10000_URL = '@loaders.gl/csv/test/data/numbers-10000.csv';
const CSV_INCIDENTS_URL_QUOTES = '@loaders.gl/csv/test/data/sf_incidents-small.csv';

test('CSVArrowLoader#loadInBatches(numbers-100.csv, arrow)', async (t) => {
test('CSVArrowLoader#loadInBatches(numbers-100.csv)', async (t) => {
const iterator = await loadInBatches(CSV_NUMBERS_100_URL, CSVArrowLoader, {
csv: {
shape: 'arrow-table'
},
batchSize: 40
});

Expand All @@ -32,11 +30,8 @@ test('CSVArrowLoader#loadInBatches(numbers-100.csv, arrow)', async (t) => {
t.end();
});

test('CSVArrowLoader#loadInBatches(numbers-10000.csv, arrow)', async (t) => {
test('CSVArrowLoader#loadInBatches(numbers-10000.csv)', async (t) => {
const iterator = await loadInBatches(CSV_NUMBERS_10000_URL, CSVArrowLoader, {
csv: {
shape: 'arrow-table'
},
batchSize: 2000
});
t.ok(isIterator(iterator) || isAsyncIterable(iterator), 'loadInBatches returned iterator');
Expand All @@ -51,3 +46,18 @@ test('CSVArrowLoader#loadInBatches(numbers-10000.csv, arrow)', async (t) => {

t.end();
});

test('CSVArrowLoader#loadInBatches(incidents.csv)', async (t) => {
const iterator = await loadInBatches(CSV_INCIDENTS_URL_QUOTES, CSVArrowLoader);
t.ok(isIterator(iterator) || isAsyncIterable(iterator), 'loadInBatches returned iterator');

let batchCount = 0;
for await (const batch of iterator) {
t.ok(batch.data instanceof arrow.Table, 'returns arrow RecordBatch');
// t.comment(`BATCH: ${batch.length}`);
batchCount++;
}
t.equal(batchCount, 5, 'Correct number of batches received');

t.end();
});
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ export function convertBatch(
batch: TableBatch,
shape: 'object-row-table' | 'array-row-table' | 'columnar-table' | 'arrow-table'
): TableBatch {
switch (batch.shape) {
switch (shape) {
case 'object-row-table':
return {...batch, ...convertTable(batch, 'object-row-table')};
case 'array-row-table':
Expand Down

0 comments on commit bbad322

Please sign in to comment.