From 73bf97f8611d1d47e7e6abeb22f00b38a847151e Mon Sep 17 00:00:00 2001 From: Ib Green Date: Thu, 16 Dec 2021 06:55:30 -0800 Subject: [PATCH] feat(wkt): Add TWKB parser --- docs/modules/shapefile/formats/shapefile.md | 2 +- docs/modules/wkt/formats/twkb.md | 21 + docs/modules/wkt/formats/wkb.md | 88 ++-- docs/modules/wkt/formats/wkt-crs.md | 22 +- docs/modules/wkt/formats/wkt.md | 24 +- modules/wkt/src/index.ts | 3 + modules/wkt/src/lib/encode-twkb.ts | 304 +++++++++++++ modules/wkt/src/lib/encode-wkb.ts | 6 +- modules/wkt/src/lib/parse-twkb.ts | 361 ++++++++++++++++ modules/wkt/src/lib/parse-wkb-header.ts | 14 + modules/wkt/src/lib/parse-wkb.ts | 16 +- modules/wkt/src/lib/parse-wkt.ts | 262 ++++++------ modules/wkt/src/lib/utils/binary-reader.ts | 72 ++++ modules/wkt/src/twkb-loader.ts | 42 ++ modules/wkt/src/twkb-writer.ts | 25 ++ modules/wkt/src/wkb-writer.ts | 6 +- modules/wkt/test/index.ts | 3 + modules/wkt/test/twkb-loader.spec.ts | 40 ++ modules/wkt/test/twkb-writer.spec.ts | 63 +++ modules/wkt/test/wip/wkt-loader.spec.ts | 450 -------------------- 20 files changed, 1174 insertions(+), 650 deletions(-) create mode 100644 docs/modules/wkt/formats/twkb.md create mode 100644 modules/wkt/src/lib/encode-twkb.ts create mode 100644 modules/wkt/src/lib/parse-twkb.ts create mode 100644 modules/wkt/src/lib/utils/binary-reader.ts create mode 100644 modules/wkt/src/twkb-loader.ts create mode 100644 modules/wkt/src/twkb-writer.ts create mode 100644 modules/wkt/test/twkb-loader.spec.ts create mode 100644 modules/wkt/test/twkb-writer.spec.ts delete mode 100644 modules/wkt/test/wip/wkt-loader.spec.ts diff --git a/docs/modules/shapefile/formats/shapefile.md b/docs/modules/shapefile/formats/shapefile.md index f1b793d757..829299ffd2 100644 --- a/docs/modules/shapefile/formats/shapefile.md +++ b/docs/modules/shapefile/formats/shapefile.md @@ -1,7 +1,7 @@ # Shapefile -- *[`@loaders.gl/shapefile`](/docs/modules/shapefile/formats/shapefile)* +- *[`@loaders.gl/shapefile`](/docs/modules/shapefile)* - *https://www.clicketyclick.dk/databases/xbase/format/data_types.html* - *http://www.dbase.com/Knowledgebase/INT/db7_file_fmt.htm* - *http://webhelp.esri.com/arcgisdesktop/9.3/index.cfm?TopicName=Geoprocessing_considerations_for_shapefile_output* diff --git a/docs/modules/wkt/formats/twkb.md b/docs/modules/wkt/formats/twkb.md new file mode 100644 index 0000000000..49aef3f9e5 --- /dev/null +++ b/docs/modules/wkt/formats/twkb.md @@ -0,0 +1,21 @@ +# TWKB + +- *[`@loaders.gl/wkt`](/docs/modules/wkt)* +- *[TWKB specification](https://github.com/TWKB/Specification/blob/master/twkb.md)* + +TWKB is a format for serializing vector geometry data into a binary byte buffer, similar to [WKB](./wkb) but with an emphasis on minimizing size of the buffer. + +## Memory Layout + +WKB uses IEEE doubles as the coordinate storage format, so for data with lots of spatially adjacent coordinates (typical for GIS data) it wastes precision, i.e. space on redundant coordinate information: + +- TWKB only stores the absolute position once, and stores all other positions as delta values relative to the preceding position. +- TWKB Only use as much address space as is necessary for any given value. Practically this means that "variable length integers" or "varints" are used throughout the specification for storing values in any situation where numbers greater than 128 might be encountered. + +## Ecosystem Support + +- PostGIS offers a function to return geometries in TWKB format: [ST_AsTWKB](https://postgis.net/docs/ST_AsTWKB.html). + +## Versions / History + +Unknown. diff --git a/docs/modules/wkt/formats/wkb.md b/docs/modules/wkt/formats/wkb.md index 25abe93fa8..1c00b576f4 100644 --- a/docs/modules/wkt/formats/wkb.md +++ b/docs/modules/wkt/formats/wkb.md @@ -2,7 +2,9 @@ ![ogc-logo](../../../images/logos/ogc-logo-60.png) -Well-known binary +- *[`@loaders.gl/wkt`](/docs/modules/wkt)* + +Well-Known Binary (WKB) is a binary version of Well-known Text ## Overview @@ -20,14 +22,22 @@ Well-known binary (WKB) representations are typically shown in hexadecimal strin - GML Geometry -| Format | Support | Description | -| ------ | ------- | -------------------------------------------------- | -| EWKT | ❌ | WKT that starts with a spatial reference id (SRID) | -| TWKB | ❌ | WKB variant reduces binary size ~2x. | +| Format | Support | Description | +| ------ | ------- | ------------------------------------ | +| WKB | ❌ | | +| TWKB | ❌ | WKB variant reduces binary size ~2x. | TWKB uses varints, precision truncation and zigzag point encoding to reduce binary size ~2x (however compressed size reduction is less) -## Encoding +## Version History + +TBA. + +## Ecosystem Support + +- PostGIS offers a function to return geometries in TWKB format: [ST_AsTWKB](https://postgis.net/docs/ST_AsTWKB.html). + +## Format Details The first byte indicates the byte order for the data: @@ -36,36 +46,38 @@ The first byte indicates the byte order for the data: The next 4 bytes are a 32-bit unsigned integer for the geometry type, as described below: -*Note that some implementations, including loaders.gl, only handle the core GeoJSON geometry equivalents (points, line strings, polygons and to a varying degrees geometry collections of the same).* - -| Type | 2D | Z | M | ZM | -| ------------------ | ---- | ---- | ---- | ---- | -| Geometry | 0000 | 1000 | 2000 | 3000 | -| Point | 0001 | 1001 | 2001 | 3001 | -| LineString | 0002 | 1002 | 2002 | 3002 | -| Polygon | 0003 | 1003 | 2003 | 3003 | -| MultiPoint | 0004 | 1004 | 2004 | 3004 | -| MultiLineString | 0005 | 1005 | 2005 | 3005 | -| MultiPolygon | 0006 | 1006 | 2006 | 3006 | -| GeometryCollection | 0007 | 1007 | 2007 | 3007 | -| | -| CircularString | 0008 | 1008 | 2008 | 3008 | -| CompoundCurve | 0009 | 1009 | 2009 | 3009 | -| CurvePolygon | 0010 | 1010 | 2010 | 3010 | -| MultiCurve | 0011 | 1011 | 2011 | 3011 | -| MultiSurface | 0012 | 1012 | 2012 | 3012 | -| Curve | 0013 | 1013 | 2013 | 3013 | -| Surface | 0014 | 1014 | 2014 | 3014 | -| PolyhedralSurface | 0015 | 1015 | 2015 | 3015 | -| TIN | 0016 | 1016 | 2016 | 3016 | -| Triangle | 0017 | 1017 | 2017 | 3017 | -| Circle | 0018 | 1018 | 2018 | 3018 | -| GeodesicString | 0019 | 1019 | 2019 | 3019 | -| EllipticalCurve | 0020 | 1020 | 2020 | 3020 | -| NurbsCurve | 0021 | 1021 | 2021 | 3021 | -| Clothoid | 0022 | 1022 | 2022 | 3022 | -| SpiralCurve | 0023 | 1023 | 2023 | 3023 | -| CompoundSurface | 0024 | 1024 | 2024 | 3024 | -| BrepSolid | | 1025 | | | -| AffinePlacement | 102 | 1102 | | | +| Type | Supported | 2D | Z | M | ZM | +| -------------------- | --------- | ---- | ---- | ---- | +| `Geometry` | ✅ | 0000 | 1000 | 2000 | 3000 | +| `Point` | ✅ | 0001 | 1001 | 2001 | 3001 | +| `LineString` | ✅ | 0002 | 1002 | 2002 | 3002 | +| `Polygon` | ✅ | 0003 | 1003 | 2003 | 3003 | +| `MultiPoint` | ✅ | 0004 | 1004 | 2004 | 3004 | +| `MultiLineString` | ✅ | 0005 | 1005 | 2005 | 3005 | +| `MultiPolygon` | ✅ | 0006 | 1006 | 2006 | 3006 | +| `GeometryCollection` | ✅ \* | 0007 | 1007 | 2007 | 3007 | +| | | | | | | +| `CircularString` | ❌ | 0008 | 1008 | 2008 | 3008 | +| `CompoundCurve` | ❌ | 0009 | 1009 | 2009 | 3009 | +| `CurvePolygon` | ❌ | 0010 | 1010 | 2010 | 3010 | +| `MultiCurve` | ❌ | 0011 | 1011 | 2011 | 3011 | +| `MultiSurface` | ❌ | 0012 | 1012 | 2012 | 3012 | +| `Curve` | ❌ | 0013 | 1013 | 2013 | 3013 | +| `Surface` | ❌ | 0014 | 1014 | 2014 | 3014 | +| `PolyhedralSurface` | ❌ | 0015 | 1015 | 2015 | 3015 | +| `TIN` | ❌ | 0016 | 1016 | 2016 | 3016 | +| `Triangle` | ❌ | 0017 | 1017 | 2017 | 3017 | +| `Circle` | ❌ | 0018 | 1018 | 2018 | 3018 | +| `GeodesicString` | ❌ | 0019 | 1019 | 2019 | 3019 | +| `EllipticalCurve` | ❌ | 0020 | 1020 | 2020 | 3020 | +| `NurbsCurve` | ❌ | 0021 | 1021 | 2021 | 3021 | +| `Clothoid` | ❌ | 0022 | 1022 | 2022 | 3022 | +| `SpiralCurve` | ❌ | 0023 | 1023 | 2023 | 3023 | +| `CompoundSurface` | ❌ | 0024 | 1024 | 2024 | 3024 | +| `BrepSolid` | ❌ | | 1025 | | | +| `AffinePlacement` | ❌ | 102 | 1102 | | | + +Remarks: +- *Many implementations, including loaders.gl, only handle the core GeoJSON geometry equivalents (points, line strings, polygons and to a varying degrees geometry collections of the same).* +- *`GeometryCollection`* can be difficult for some clients to handle. diff --git a/docs/modules/wkt/formats/wkt-crs.md b/docs/modules/wkt/formats/wkt-crs.md index ee44606b30..4ef4b8ce67 100644 --- a/docs/modules/wkt/formats/wkt-crs.md +++ b/docs/modules/wkt/formats/wkt-crs.md @@ -2,16 +2,20 @@ ![ogc-logo](../../../images/logos/ogc-logo-60.png) -- OGC Standard: https://www.ogc.org/standards/wkt-crs -- Wikipedia Page: https://en.wikipedia.org/wiki/Well-known_text_representation_of_coordinate_reference_systems +- *[`@loaders.gl/wkt`](/docs/modules/wkt)* +- *[OGC Standard](https://www.ogc.org/standards/wkt-crs)* +- *[Wikipedia Page](https://en.wikipedia.org/wiki/Well-known_text_representation_of_coordinate_reference_systems)* Well-known text representation of coordinate reference systems (WKT or WKT-CRS) is a text markup language for representing spatial reference systems and transformations between spatial reference systems. The formats were originally defined by the Open Geospatial Consortium (OGC) and described in their Simple Feature Access and Well-known text representation of coordinate reference systems specifications. The current standard definition is ISO 19162:2019. -# Version History +## Version History + +| Name | Year | Description | ISO | +| --------------------- | ------ | ------------------------------------------------------------ | ---------------- | +| WKT | (1999 | As initially defined by the Open Geospatial Consortium (OGC) | +| "WKT 1" | (2001 | WKT was extended in 2001. Sometimes known as "WKT 1". | ISO 19125-1:2004 | +| "WKT 2" / "WKT-CRS 1" | (2015) | Addresses new requirements and inconsistencies in WKT 1. | ISO 19162:2015 | +| "WKT-CRS 2" | (2018) | A newer revision. | ISO 19162:2019 | + +## Ecosystem Support -- WKT (1999) - As initially defined by the Open Geospatial Consortium (OGC) -- "WKT 1" (2001) - WKT was extended in 2001, to ISO 19125-1:2004, sometimes known as "WKT 1". -- "WKT 2" / "WKT-CRS 1" (2015) - The updated "Well-known text representation of coordinate reference systems" standard, adopted by Open Geospatial Consortium / ISO 19162:2015. - - Addresses new requirements and inconsistencies in implementation of WKT 1 format. - - Confusingly, this standard has a version number 1 for the new, stricter WKT-CRS specification. -- "WKT-CRS 2" (2018) - A newer revision called was published in 2018: ISO 19162:2019. \ No newline at end of file diff --git a/docs/modules/wkt/formats/wkt.md b/docs/modules/wkt/formats/wkt.md index 6c2cd0bc96..7ff24340c0 100644 --- a/docs/modules/wkt/formats/wkt.md +++ b/docs/modules/wkt/formats/wkt.md @@ -40,20 +40,24 @@ Well-known text (WKT) for geometry is a text markup language for representing ve | GeoJSON Geometry | JSON based, human-readable, slightly more verbose, easier to parse | | GML Geometry | XML based, human-readable, even more verbose, more complex to parse | +## Ecosystem Support + +- PostGIS and some other databases offer functions to return geometries in WKT format: [WKT](https://postgis.net/docs/ST_AsText.html), [ST_AsEWKT](https://postgis.net/docs/ST_AsEWKT.html). + ## Geometries WKT can represent a range of distinct geometric objects. *Note that some implementations, including loaders.gl, only handle the core GeoJSON geometry equivalents (points, line strings, polygons and to a varying degrees geometry collections of the same).* -| Geometry | -| ------------------------------------ | -| Point, MultiPoint | -| LineString, MultiLineString | -| Polygon, MultiPolygon | -| GeometryCollection | -| | -| Triangle | -| PolyhedralSurface | -| TIN (Triangulated irregular network) | +| Geometry | +| -------------------------------------- | +| `Point`, `MultiPoint` | +| `LineString`, `MultiLineString` | +| `Polygon`, `MultiPolygon` | +| `GeometryCollection` | +| | +| `Triangle` | +| `PolyhedralSurface` | +| `TIN` (Triangulated irregular network) | diff --git a/modules/wkt/src/index.ts b/modules/wkt/src/index.ts index e5351f28d3..3c8269c16c 100644 --- a/modules/wkt/src/index.ts +++ b/modules/wkt/src/index.ts @@ -3,6 +3,9 @@ export {WKBLoader, WKBWorkerLoader} from './wkb-loader'; export {WKBWriter} from './wkb-writer'; +export {TWKBLoader} from './twkb-loader'; +export {TWKBWriter} from './twkb-writer'; + export {HexWKBLoader} from './hex-wkb-loader'; export {WKTLoader, WKTWorkerLoader} from './wkt-loader'; diff --git a/modules/wkt/src/lib/encode-twkb.ts b/modules/wkt/src/lib/encode-twkb.ts new file mode 100644 index 0000000000..5248ceca97 --- /dev/null +++ b/modules/wkt/src/lib/encode-twkb.ts @@ -0,0 +1,304 @@ +// loaders.gl, MIT license +// Forked from https://github.com/cschwarz/wkx under MIT license, Copyright (c) 2013 Christian Schwarz + +import type {Point, MultiPoint, LineString} from '@loaders.gl/schema'; +import type { + MultiLineString, + Polygon, + MultiPolygon, + GeometryCollection, + Geometry +} from '@loaders.gl/schema'; + +import {BinaryWriter} from './utils/binary-writer'; +import {WKBGeometryType} from './parse-wkb-header'; + +type TWKBPrecision = { + xy: number; + z: number; + m: number; + xyFactor: number; + zFactor: number; + mFactor: number; +}; + +type TWKBEncoderContext = TWKBPrecision & { + hasZ?: boolean; + hasM?: boolean; +}; + +export function encodeTWKB( + geometry: Geometry, + options?: {hasZ?: boolean; hasM?: boolean} +): ArrayBuffer { + const writer = new BinaryWriter(0, true); + + const context: TWKBEncoderContext = { + ...getTwkbPrecision(5, 0, 0), + hasZ: options?.hasZ, + hasM: options?.hasM + }; + + encodeGeometry(writer, geometry, context); + + // TODO - we need to slice it? + return writer.arrayBuffer; +} + +function encodeGeometry(writer: BinaryWriter, geometry: Geometry, context: TWKBEncoderContext) { + switch (geometry.type) { + case 'Point': + return encodePoint(writer, context, geometry); + case 'LineString': + return encodeLineString(writer, context, geometry); + case 'Polygon': + return encodePolygon(writer, context, geometry); + case 'MultiPoint': + return encodeMultiPoint(writer, context, geometry); + case 'MultiLineString': + return encodeMultiLineString(writer, context, geometry); + case 'MultiPolygon': + return encodeMultiPolygon(writer, context, geometry); + case 'GeometryCollection': + return encodeGeometryCollection(writer, context, geometry); + default: + throw new Error('unsupported geometry type'); + } +} + +function encodePoint(writer: BinaryWriter, context: TWKBEncoderContext, point: Point): void { + const isEmpty = + point.coordinates.length === 0 || point[0] === 'undefined' || point[1] === 'undefined'; + + writeTwkbHeader(writer, context, WKBGeometryType.Point, isEmpty); + + if (!isEmpty) { + const previousPoint = [0, 0, 0, 0]; + writeTwkbPoint(writer, context, point.coordinates, previousPoint); + } +} + +function encodeLineString( + writer: BinaryWriter, + context: TWKBEncoderContext, + lineString: LineString +): ArrayBuffer { + const points = lineString.coordinates; + const isEmpty = points.length === 0; + + writeTwkbHeader(writer, context, WKBGeometryType.LineString, isEmpty); + + if (!isEmpty) { + writer.writeVarInt(points.length); + const previousPoint = [0, 0, 0, 0]; + for (const point of points) { + writeTwkbPoint(writer, context, point, previousPoint); + } + } + + return writer.arrayBuffer; +} + +function encodePolygon( + writer: BinaryWriter, + context: TWKBEncoderContext, + polygon: Polygon +): ArrayBuffer { + const polygonRings = polygon.coordinates; + + const isEmpty = polygonRings.length === 0; + + writeTwkbHeader(writer, context, WKBGeometryType.Polygon, isEmpty); + + if (!isEmpty) { + writer.writeVarInt(polygonRings.length); + + const previousPoint = [0, 0, 0, 0]; + for (const ring of polygonRings) { + writer.writeVarInt(ring.length); + for (const point of ring) { + writeTwkbPoint(writer, context, previousPoint, point); + } + } + } + + return writer.arrayBuffer; +} + +function encodeMultiPoint( + writer: BinaryWriter, + context: TWKBEncoderContext, + multiPoint: MultiPoint +): void { + const points = multiPoint.coordinates; + const isEmpty = points.length === 0; + + writeTwkbHeader(writer, context, WKBGeometryType.MultiPoint, isEmpty); + + if (!isEmpty) { + writer.writeVarInt(points.length); + + const previousPoint = [0, 0, 0, 0]; + for (let i = 0; i < points.length; i++) { + writeTwkbPoint(writer, context, previousPoint, points[i]); + } + } +} + +function encodeMultiLineString( + writer: BinaryWriter, + context: TWKBEncoderContext, + multiLineStrings: MultiLineString +): ArrayBuffer { + const lineStrings = multiLineStrings.coordinates; + const isEmpty = lineStrings.length === 0; + + writeTwkbHeader(writer, context, WKBGeometryType.MultiLineString, isEmpty); + + if (!isEmpty) { + writer.writeVarInt(lineStrings.length); + + const previousPoint = [0, 0, 0, 0]; + for (const lineString of lineStrings) { + writer.writeVarInt(lineString.length); + + for (const point of lineString) { + writeTwkbPoint(writer, context, previousPoint, point); + } + } + } + + return writer.arrayBuffer; +} + +function encodeMultiPolygon( + writer: BinaryWriter, + context: TWKBEncoderContext, + multiPolygon: MultiPolygon +): void { + const {coordinates} = multiPolygon; + const isEmpty = coordinates.length === 0; + + writeTwkbHeader(writer, context, WKBGeometryType.MultiPolygon, isEmpty); + + if (!isEmpty) { + const polygons = coordinates; + writer.writeVarInt(polygons.length); + + const previousPoint = [0, 0, 0, 0]; + + for (const polygonRings of polygons) { + writer.writeVarInt(polygonRings.length); + for (const ring of polygonRings) { + writer.writeVarInt(ring.length); + for (const point of ring) { + writeTwkbPoint(writer, context, previousPoint, point); + } + } + } + } +} + +function encodeGeometryCollection( + writer: BinaryWriter, + context: TWKBEncoderContext, + geometryCollection: GeometryCollection +): void { + const {geometries} = geometryCollection; + const isEmpty = geometries.length === 0; + + writeTwkbHeader(writer, context, WKBGeometryType.GeometryCollection, isEmpty); + + if (geometries.length > 0) { + writer.writeVarInt(geometries.length); + for (const geometry of geometries) { + encodeGeometry(writer, geometry, context); + } + } +} + +/** + * + * @param writer + * @param context + * @param geometryType + * @param isEmpty + */ +function writeTwkbHeader( + writer: BinaryWriter, + context: TWKBEncoderContext, + geometryType: WKBGeometryType, + isEmpty: boolean +) { + const type = (zigZagEncode(context.xy) << 4) + geometryType; + let metadataHeader = context.hasZ || context.hasM ? 1 << 3 : 0; + metadataHeader += isEmpty ? 1 << 4 : 0; + + writer.writeUInt8(type); + writer.writeUInt8(metadataHeader); + + if (context.hasZ || context.hasM) { + let extendedPrecision = 0; + if (context.hasZ) { + extendedPrecision |= 0x1; + } + if (context.hasM) { + extendedPrecision |= 0x2; + } + writer.writeUInt8(extendedPrecision); + } +} + +/** + * Write one point to array buffer. ZigZagEncoding the delta fdrom the previous point. Mutates previousPoint. + * @param writer + * @param context + * @param previousPoint - Mutated by this function + * @param point + */ +function writeTwkbPoint( + writer: BinaryWriter, + context: TWKBEncoderContext, + point: number[], + previousPoint: number[] +): void { + const x = point[0] * context.xyFactor; + const y = point[1] * context.xyFactor; + const z = point[2] * context.zFactor; + const m = point[3] * context.mFactor; + + writer.writeVarInt(zigZagEncode(x - previousPoint[0])); + writer.writeVarInt(zigZagEncode(y - previousPoint[1])); + if (context.hasZ) { + writer.writeVarInt(zigZagEncode(z - previousPoint[2])); + } + if (context.hasM) { + writer.writeVarInt(zigZagEncode(m - previousPoint[3])); + } + + previousPoint[0] = x; + previousPoint[1] = y; + previousPoint[2] = z; + previousPoint[3] = m; +} + +// HELPERS + +function zigZagEncode(value: number): number { + return (value << 1) ^ (value >> 31); +} + +function getTwkbPrecision( + xyPrecision: number, + zPrecision: number, + mPrecision: number +): TWKBPrecision { + return { + xy: xyPrecision, + z: zPrecision, + m: mPrecision, + xyFactor: Math.pow(10, xyPrecision), + zFactor: Math.pow(10, zPrecision), + mFactor: Math.pow(10, mPrecision) + }; +} diff --git a/modules/wkt/src/lib/encode-wkb.ts b/modules/wkt/src/lib/encode-wkb.ts index e1277a49cb..41236f4356 100644 --- a/modules/wkt/src/lib/encode-wkb.ts +++ b/modules/wkt/src/lib/encode-wkb.ts @@ -20,7 +20,7 @@ import {BinaryWriter} from './utils/binary-writer'; * Integer code for geometry type * Reference: https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary */ -enum WKB { +export enum WKB { Point = 1, LineString = 2, Polygon = 3, @@ -33,7 +33,7 @@ enum WKB { /** * Options for encodeWKB */ -interface WKBOptions { +type WKBOptions = { /** Does the GeoJSON input have Z values? */ hasZ?: boolean; @@ -42,7 +42,7 @@ interface WKBOptions { /** Spatial reference for input GeoJSON */ srid?: any; -} +}; /** * Encodes a GeoJSON object into WKB diff --git a/modules/wkt/src/lib/parse-twkb.ts b/modules/wkt/src/lib/parse-twkb.ts new file mode 100644 index 0000000000..30d986f23d --- /dev/null +++ b/modules/wkt/src/lib/parse-twkb.ts @@ -0,0 +1,361 @@ +// loaders.gl, MIT license +// Forked from https://github.com/cschwarz/wkx under MIT license, Copyright (c) 2013 Christian Schwarz + +import type {BinaryGeometry, Geometry, GeometryCollection} from '@loaders.gl/schema'; +import type {Point, LineString, Polygon} from '@loaders.gl/schema'; +import type {MultiPoint, MultiLineString, MultiPolygon} from '@loaders.gl/schema'; +import {BinaryReader} from './utils/binary-reader'; +import {WKBGeometryType} from './parse-wkb-header'; + +/** + * Check if an array buffer might be a TWKB array buffer + * @param arrayBuffer The array buffer to check + * @returns false if this is definitely not a TWKB array buffer, true if it might be a TWKB array buffer + */ +export function isTWKB(arrayBuffer: ArrayBuffer): boolean { + const binaryReader = new BinaryReader(arrayBuffer); + + const type = binaryReader.readUInt8(); + const geometryType = type & 0x0f; + + // Only geometry types 1 to 7 (point to geometry collection are currently defined) + if (geometryType < 1 || geometryType > 7) { + return false; + } + + return true; +} + +/** + * Parse a TWKB encoded array buffer + * @param arrayBuffer + */ +export function parseTWKB(arrayBuffer: ArrayBuffer): BinaryGeometry { + throw new Error('not implemented'); +} + +/** State passed around between parsing functions, extracted from the header */ +type ParseTWKBState = { + hasBoundingBox: boolean; + hasSizeAttribute: boolean; + hasIdList: boolean; + hasExtendedPrecision: boolean; + isEmpty: boolean; + + precision: number; + precisionFactor: number; + + hasZ: boolean; + zPrecision: number; + zPrecisionFactor: number; + + hasM: boolean; + mPrecision: number; + mPrecisionFactor: number; +}; + +export function parseTWKBGeometry(arrayBuffer: ArrayBuffer): Geometry { + const binaryReader = new BinaryReader(arrayBuffer); + + const type = binaryReader.readUInt8(); + const metadataHeader = binaryReader.readUInt8(); + + const geometryType = type & 0x0f; + + const precision = zigZagDecode(type >> 4); + + const hasExtendedPrecision = Boolean((metadataHeader >> 3) & 1); + let hasZ = false; + let hasM = false; + let zPrecision = 0; + let zPrecisionFactor = 1; + let mPrecision = 0; + let mPrecisionFactor = 1; + + if (hasExtendedPrecision) { + const extendedPrecision = binaryReader.readUInt8(); + hasZ = (extendedPrecision & 0x01) === 0x01; + hasM = (extendedPrecision & 0x02) === 0x02; + + zPrecision = zigZagDecode((extendedPrecision & 0x1c) >> 2); + zPrecisionFactor = Math.pow(10, zPrecision); + + mPrecision = zigZagDecode((extendedPrecision & 0xe0) >> 5); + mPrecisionFactor = Math.pow(10, mPrecision); + } + + const context: ParseTWKBState = { + precision, + precisionFactor: Math.pow(10, precision), + + hasBoundingBox: Boolean((metadataHeader >> 0) & 1), + hasSizeAttribute: Boolean((metadataHeader >> 1) & 1), + hasIdList: Boolean((metadataHeader >> 2) & 1), + hasExtendedPrecision, + isEmpty: Boolean((metadataHeader >> 4) & 1), + + hasZ, + hasM, + zPrecision, + zPrecisionFactor, + mPrecision, + mPrecisionFactor + }; + + if (context.hasSizeAttribute) { + binaryReader.readVarInt(); + } + + if (context.hasBoundingBox) { + let dimensions = 2; + + if (context.hasZ) { + dimensions++; + } + if (context.hasM) { + dimensions++; + } + + // TODO why are we throwing away these datums? + for (let i = 0; i < dimensions; i++) { + binaryReader.readVarInt(); + binaryReader.readVarInt(); + } + } + + return parseGeometry(binaryReader, context, geometryType); +} + +function parseGeometry( + binaryReader: BinaryReader, + context: ParseTWKBState, + geometryType: WKBGeometryType +): Geometry { + switch (geometryType) { + case WKBGeometryType.Point: + return parsePoint(binaryReader, context); + case WKBGeometryType.LineString: + return parseLineString(binaryReader, context); + case WKBGeometryType.Polygon: + return parsePolygon(binaryReader, context); + case WKBGeometryType.MultiPoint: + return parseMultiPoint(binaryReader, context); + case WKBGeometryType.MultiLineString: + return parseMultiLineString(binaryReader, context); + case WKBGeometryType.MultiPolygon: + return parseMultiPolygon(binaryReader, context); + case WKBGeometryType.GeometryCollection: + return parseGeometryCollection(binaryReader, context); + default: + throw new Error(`GeometryType ${ geometryType } not supported`); + } +} + +// GEOMETRIES + +function parsePoint(reader: BinaryReader, context: ParseTWKBState): Point { + if (context.isEmpty) { + return {type: 'Point', coordinates: []}; + } + + return {type: 'Point', coordinates: parsePointCoordinates(reader, context)}; +} + +function parseLineString(reader: BinaryReader, context: ParseTWKBState): LineString { + if (context.isEmpty) { + return {type: 'LineString', coordinates: []}; + } + + const pointCount = reader.readVarInt(); + + const previousPoint = makePreviousPoint(context); + + const points: number[][] = []; + for (let i = 0; i < pointCount; i++) { + points.push(parseNextPoint(reader, context, previousPoint)); + } + + return {type: 'LineString', coordinates: points}; +} + +function parsePolygon(reader: BinaryReader, context: ParseTWKBState): Polygon { + if (context.isEmpty) { + return {type: 'Polygon', coordinates: []}; + } + + const ringCount = reader.readVarInt(); + + const polygons: number[][][] = []; + + const previousPoint = makePreviousPoint(context); + + const exteriorRingCount = reader.readVarInt(); + const exteriorRing: number[][] = []; + + for (let i = 0; i < exteriorRingCount; i++) { + exteriorRing.push(parseNextPoint(reader, context, previousPoint)); + } + + const polygon: number[][][] = [exteriorRing]; + for (let i = 1; i < ringCount; i++) { + const interiorRingCount = reader.readVarInt(); + + const interiorRing: number[][] = []; + for (let j = 0; j < interiorRingCount; j++) { + interiorRing.push(parseNextPoint(reader, context, previousPoint)); + } + + polygon.push(interiorRing); + } + + return {type: 'Polygon', coordinates: polygons}; +} + +function parseMultiPoint(reader: BinaryReader, context: ParseTWKBState): MultiPoint { + if (context.isEmpty) { + return {type: 'MultiPoint', coordinates: []}; + } + + const previousPoint = makePreviousPoint(context); + const pointCount = reader.readVarInt(); + + const coordinates: number[][] = []; + for (let i = 0; i < pointCount; i++) { + coordinates.push(parseNextPoint(reader, context, previousPoint)); + } + + return {type: 'MultiPoint', coordinates}; +} + +function parseMultiLineString(reader: BinaryReader, context: ParseTWKBState): MultiLineString { + if (context.isEmpty) { + return {type: 'MultiLineString', coordinates: []}; + } + + const previousPoint = makePreviousPoint(context); + const lineStringCount = reader.readVarInt(); + + const coordinates: number[][][] = []; + for (let i = 0; i < lineStringCount; i++) { + const pointCount = reader.readVarInt(); + + const lineString: number[][] = []; + for (let j = 0; j < pointCount; j++) { + lineString.push(parseNextPoint(reader, context, previousPoint)); + } + + coordinates.push(lineString); + } + + return {type: 'MultiLineString', coordinates}; +} + +function parseMultiPolygon(reader: BinaryReader, context: ParseTWKBState): MultiPolygon { + if (context.isEmpty) { + return {type: 'MultiPolygon', coordinates: []}; + } + + const previousPoint = makePreviousPoint(context); + const polygonCount = reader.readVarInt(); + + const polygons: number[][][][] = []; + for (let i = 0; i < polygonCount; i++) { + const ringCount = reader.readVarInt(); + + const exteriorPointCount = reader.readVarInt(); + + const exteriorRing: number[][] = []; + for (let j = 0; j < exteriorPointCount; j++) { + exteriorRing.push(parseNextPoint(reader, context, previousPoint)); + } + + const polygon: number[][][] = [exteriorRing]; + + for (let j = 1; j < ringCount; j++) { + const interiorRing: number[][] = []; + + const interiorRingCount = reader.readVarInt(); + + for (let k = 0; k < interiorRingCount; k++) { + interiorRing.push(parseNextPoint(reader, context, previousPoint)); + } + + polygon.push(interiorRing); + } + + polygons.push(polygon); + } + + return {type: 'MultiPolygon', coordinates: polygons}; +} + +/** Geometry collection not yet supported */ +function parseGeometryCollection( + reader: BinaryReader, + context: ParseTWKBState +): GeometryCollection { + return {type: 'GeometryCollection', geometries: []}; + /** + if (context.isEmpty) { + return {type: 'GeometryCollection', geometries: []}; + } + + const geometryCount = reader.readVarInt(); + + const geometries: Geometry[] = new Array(geometryCount); + for (let i = 0; i < geometryCount; i++) { + const geometry = parseGeometry(reader, context, geometryType); + geometries.push(geometry); + } + + return {type: 'GeometryCollection', geometries: []}; + */ +} + +// HELPERS + +/** + * Maps negative values to positive values while going back and + forth (0 = 0, -1 = 1, 1 = 2, -2 = 3, 2 = 4, -3 = 5, 3 = 6 ...) + */ +function zigZagDecode(value: number): number { + return (value >> 1) ^ -(value & 1); +} + +function makePointCoordinates(x: number, y: number, z?: number, m?: number): number[] { + return (z !== undefined ? (m !== undefined ? [x, y, z, m] : [x, y, z]) : [x, y]) as number[]; +} + +function makePreviousPoint(context: ParseTWKBState): number[] { + return makePointCoordinates(0, 0, context.hasZ ? 0 : undefined, context.hasM ? 0 : undefined); +} + +function parsePointCoordinates(reader: BinaryReader, context: ParseTWKBState): number[] { + const x = zigZagDecode(reader.readVarInt()) / context.precisionFactor; + const y = zigZagDecode(reader.readVarInt()) / context.precisionFactor; + const z = context.hasZ ? zigZagDecode(reader.readVarInt()) / context.zPrecisionFactor : undefined; + const m = context.hasM ? zigZagDecode(reader.readVarInt()) / context.mPrecisionFactor : undefined; + return makePointCoordinates(x, y, z, m); +} + +/** + * Modifies previousPoint + */ +function parseNextPoint( + reader: BinaryReader, + context: ParseTWKBState, + previousPoint: number[] +): number[] { + previousPoint[0] += zigZagDecode(reader.readVarInt()) / context.precisionFactor; + previousPoint[1] += zigZagDecode(reader.readVarInt()) / context.precisionFactor; + + if (context.hasZ) { + previousPoint[2] += zigZagDecode(reader.readVarInt()) / context.zPrecisionFactor; + } + if (context.hasM) { + previousPoint[3] += zigZagDecode(reader.readVarInt()) / context.mPrecisionFactor; + } + + // Copy the point + return previousPoint.slice(); +} diff --git a/modules/wkt/src/lib/parse-wkb-header.ts b/modules/wkt/src/lib/parse-wkb-header.ts index 0a1f8f518b..87d1cd2974 100644 --- a/modules/wkt/src/lib/parse-wkb-header.ts +++ b/modules/wkt/src/lib/parse-wkb-header.ts @@ -6,6 +6,20 @@ const EWKB_FLAG_SRID = 0x20000000; const MAX_SRID = 10000; // TBD: Assume no more than 10K SRIDs are defined +/** + * Integer code for geometry types in WKB and related formats + * Reference: https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry#Well-known_binary + */ +export enum WKBGeometryType { + Point = 1, + LineString = 2, + Polygon = 3, + MultiPoint = 4, + MultiLineString = 5, + MultiPolygon = 6, + GeometryCollection = 7 +} + /** Parsed WKB header */ export type WKBHeader = { /** WKB variant */ diff --git a/modules/wkt/src/lib/parse-wkb.ts b/modules/wkt/src/lib/parse-wkb.ts index 33450d6180..bc0949147f 100644 --- a/modules/wkt/src/lib/parse-wkb.ts +++ b/modules/wkt/src/lib/parse-wkb.ts @@ -11,7 +11,7 @@ import type { import {binaryToGeometry} from '@loaders.gl/gis'; import type {WKBLoaderOptions} from '../wkb-loader'; -import {parseWKBHeader} from './parse-wkb-header'; +import {parseWKBHeader, WKBGeometryType} from './parse-wkb-header'; export function parseWKB( arrayBuffer: ArrayBuffer, @@ -41,28 +41,28 @@ export function parseWKBToBinary( const offset = wkbHeader.byteOffset; switch (geometryType) { - case 1: + case WKBGeometryType.Point: const point = parsePoint(dataView, offset, dimensions, littleEndian); return point.geometry; - case 2: + case WKBGeometryType.LineString: const line = parseLineString(dataView, offset, dimensions, littleEndian); return line.geometry; - case 3: + case WKBGeometryType.Polygon: const polygon = parsePolygon(dataView, offset, dimensions, littleEndian); return polygon.geometry; - case 4: + case WKBGeometryType.MultiPoint: const multiPoint = parseMultiPoint(dataView, offset, dimensions, littleEndian); multiPoint.type = 'Point'; return multiPoint; - case 5: + case WKBGeometryType.MultiLineString: const multiLine = parseMultiLineString(dataView, offset, dimensions, littleEndian); multiLine.type = 'LineString'; return multiLine; - case 6: + case WKBGeometryType.MultiPolygon: const multiPolygon = parseMultiPolygon(dataView, offset, dimensions, littleEndian); multiPolygon.type = 'Polygon'; return multiPolygon; - // case 7: + // case WKBGeometryType.GeometryCollection: // TODO: handle GeometryCollections // return parseGeometryCollection(dataView, offset, dimensions, littleEndian); default: diff --git a/modules/wkt/src/lib/parse-wkt.ts b/modules/wkt/src/lib/parse-wkt.ts index 031e724c82..5928d9c196 100644 --- a/modules/wkt/src/lib/parse-wkt.ts +++ b/modules/wkt/src/lib/parse-wkt.ts @@ -56,109 +56,35 @@ export function parseWKT(input: string, options?: ParseWKTOptions): Geometry { return parseWKTToGeometry(input, options)!; } -type ParseWKTContext = { +/** State of parser, passed around between parser functions */ +type ParseWKTState = { parts: string[]; _: string | undefined; i: number; }; -function $(regexp: RegExp, context: ParseWKTContext) { - const match = context._?.substring(context.i).match(regexp); - if (!match) return null; - else { - context.i += match[0].length; - return match[0]; - } -} - -function white(context: ParseWKTContext) { - $(/^\s*/, context); -} - -function multicoords(context: ParseWKTContext): number[][] | null { - white(context); - let depth = 0; - const rings: number[][] = []; - const stack = [rings]; - let pointer: any = rings; - let elem; - - while ( - (elem = $(/^(\()/, context) || $(/^(\))/, context) || $(/^(,)/, context) || $(tuples, context)) - ) { - if (elem === '(') { - stack.push(pointer); - pointer = []; - stack[stack.length - 1].push(pointer); - depth++; - } else if (elem === ')') { - // For the case: Polygon(), ... - if (pointer.length === 0) return null; - - // @ts-ignore - pointer = stack.pop(); - // the stack was empty, input was malformed - if (!pointer) return null; - depth--; - if (depth === 0) break; - } else if (elem === ',') { - pointer = []; - stack[stack.length - 1].push(pointer); - } else if (!elem.split(/\s/g).some(isNaN)) { - Array.prototype.push.apply(pointer, elem.split(/\s/g).map(parseFloat)); - } else { - return null; - } - white(context); - } - - if (depth !== 0) return null; - - return rings; -} - -function coords(context: ParseWKTContext): number[][] | null { - const list: number[][] = []; - let item: any; - let pt; - while ((pt = $(tuples, context) || $(/^(,)/, context))) { - if (pt === ',') { - list.push(item); - item = []; - } else if (!pt.split(/\s/g).some(isNaN)) { - if (!item) item = []; - Array.prototype.push.apply(item, pt.split(/\s/g).map(parseFloat)); - } - white(context); - } - - if (item) list.push(item); - else return null; - - return list.length ? list : null; -} /** Parse into GeoJSON geometry */ function parseWKTToGeometry(input: string, options?: ParseWKTOptions): Geometry | null { const parts = input.split(';'); let _ = parts.pop(); const srid = (parts.shift() || '').split('=').pop(); - const context: ParseWKTContext = {parts, _, i: 0}; + const state: ParseWKTState = {parts, _, i: 0}; - const geometry = getGeometry(context); + const geometry = parseGeometry(state); return options?.wkt?.crs ? addCRS(geometry, srid) : geometry; } -function getGeometry(context: ParseWKTContext): Geometry | null { +function parseGeometry(state: ParseWKTState): Geometry | null { return ( - point(context) || - linestring(context) || - polygon(context) || - multipoint(context) || - multilinestring(context) || - multipolygon(context) || - geometrycollection(context) + parsePoint(state) || + parseLineString(state) || + parsePolygon(state) || + parseMultiPoint(state) || + parseMultiLineString(state) || + parseMultiPolygon(state) || + parseGeometryCollection(state) ); } @@ -178,20 +104,22 @@ function addCRS(obj: Geometry | null, srid?: string): Geometry | null { return obj; } -function point(context: ParseWKTContext): Geometry | null { - if (!$(/^(point(\sz)?)/i, context)) { +// GEOMETRIES + +function parsePoint(state: ParseWKTState): Geometry | null { + if (!$(/^(parsePoint(\sz)?)/i, state)) { return null; } - white(context); - if (!$(/^(\()/, context)) { + white(state); + if (!$(/^(\()/, state)) { return null; } - const c = coords(context); + const c = coords(state); if (!c) { return null; } - white(context); - if (!$(/^(\))/, context)) { + white(state); + if (!$(/^(\))/, state)) { return null; } return { @@ -200,34 +128,34 @@ function point(context: ParseWKTContext): Geometry | null { }; } -function multipoint(context: ParseWKTContext): Geometry | null { - if (!$(/^(multipoint)/i, context)) { +function parseMultiPoint(state: ParseWKTState): Geometry | null { + if (!$(/^(parseMultiPoint)/i, state)) { return null; } - white(context); - const newCoordsFormat = context._?.substring(context._?.indexOf('(') + 1, context._.length - 1) + white(state); + const newCoordsFormat = state._?.substring(state._?.indexOf('(') + 1, state._.length - 1) .replace(/\(/g, '') .replace(/\)/g, ''); - context._ = 'MULTIPOINT (' + newCoordsFormat + ')'; - const c = multicoords(context); + state._ = 'MULTIPOINT (' + newCoordsFormat + ')'; + const c = multicoords(state); if (!c) { return null; } - white(context); + white(state); return { type: 'MultiPoint', coordinates: c }; } -function multilinestring(context: ParseWKTContext): Geometry | null { - if (!$(/^(multilinestring)/i, context)) return null; - white(context); - const c = multicoords(context); +function parseMultiLineString(state: ParseWKTState): Geometry | null { + if (!$(/^(parseMultiLineString)/i, state)) return null; + white(state); + const c = multicoords(state); if (!c) { return null; } - white(context); + white(state); return { type: 'MultiLineString', // @ts-expect-error @@ -235,19 +163,19 @@ function multilinestring(context: ParseWKTContext): Geometry | null { }; } -function linestring(context: ParseWKTContext): Geometry | null { - if (!$(/^(linestring(\sz)?)/i, context)) { +function parseLineString(state: ParseWKTState): Geometry | null { + if (!$(/^(parseLineString(\sz)?)/i, state)) { return null; } - white(context); - if (!$(/^(\()/, context)) { + white(state); + if (!$(/^(\()/, state)) { return null; } - const c = coords(context); + const c = coords(state); if (!c) { return null; } - if (!$(/^(\))/, context)) { + if (!$(/^(\))/, state)) { return null; } return { @@ -256,12 +184,12 @@ function linestring(context: ParseWKTContext): Geometry | null { }; } -function polygon(context: ParseWKTContext): Geometry | null { - if (!$(/^(polygon(\sz)?)/i, context)) { +function parsePolygon(state: ParseWKTState): Geometry | null { + if (!$(/^(parsePolygon(\sz)?)/i, state)) { return null; } - white(context); - const c = multicoords(context); + white(state); + const c = multicoords(state); if (!c) { return null; } @@ -272,12 +200,12 @@ function polygon(context: ParseWKTContext): Geometry | null { }; } -function multipolygon(context: ParseWKTContext): Geometry | null { - if (!$(/^(multipolygon)/i, context)) { +function parseMultiPolygon(state: ParseWKTState): Geometry | null { + if (!$(/^(parseMultiPolygon)/i, state)) { return null; } - white(context); - const c = multicoords(context); + white(state); + const c = multicoords(state); if (!c) { return null; } @@ -288,25 +216,25 @@ function multipolygon(context: ParseWKTContext): Geometry | null { }; } -function geometrycollection(context: ParseWKTContext): Geometry | null { +function parseGeometryCollection(state: ParseWKTState): Geometry | null { const geometries: Geometry[] = []; let geometry: Geometry | null; - if (!$(/^(geometrycollection)/i, context)) { + if (!$(/^(parseGeometryCollection)/i, state)) { return null; } - white(context); + white(state); - if (!$(/^(\()/, context)) { + if (!$(/^(\()/, state)) { return null; } - while ((geometry = getGeometry(context))) { + while ((geometry = parseGeometry(state))) { geometries.push(geometry); - white(context); - $(/^(,)/, context); - white(context); + white(state); + $(/^(,)/, state); + white(state); } - if (!$(/^(\))/, context)) { + if (!$(/^(\))/, state)) { return null; } @@ -315,3 +243,81 @@ function geometrycollection(context: ParseWKTContext): Geometry | null { geometries: geometries }; } + +// COORDINATES + +function multicoords(state: ParseWKTState): number[][] | null { + white(state); + let depth = 0; + const rings: number[][] = []; + const stack = [rings]; + let pointer: any = rings; + let elem; + + while ((elem = $(/^(\()/, state) || $(/^(\))/, state) || $(/^(,)/, state) || $(tuples, state))) { + if (elem === '(') { + stack.push(pointer); + pointer = []; + stack[stack.length - 1].push(pointer); + depth++; + } else if (elem === ')') { + // For the case: Polygon(), ... + if (pointer.length === 0) return null; + + // @ts-ignore + pointer = stack.pop(); + // the stack was empty, input was malformed + if (!pointer) return null; + depth--; + if (depth === 0) break; + } else if (elem === ',') { + pointer = []; + stack[stack.length - 1].push(pointer); + } else if (!elem.split(/\s/g).some(isNaN)) { + Array.prototype.push.apply(pointer, elem.split(/\s/g).map(parseFloat)); + } else { + return null; + } + white(state); + } + + if (depth !== 0) return null; + + return rings; +} + +function coords(state: ParseWKTState): number[][] | null { + const list: number[][] = []; + let item: any; + let pt; + while ((pt = $(tuples, state) || $(/^(,)/, state))) { + if (pt === ',') { + list.push(item); + item = []; + } else if (!pt.split(/\s/g).some(isNaN)) { + if (!item) item = []; + Array.prototype.push.apply(item, pt.split(/\s/g).map(parseFloat)); + } + white(state); + } + + if (item) list.push(item); + else return null; + + return list.length ? list : null; +} + +// HELPERS + +function $(regexp: RegExp, state: ParseWKTState) { + const match = state._?.substring(state.i).match(regexp); + if (!match) return null; + else { + state.i += match[0].length; + return match[0]; + } +} + +function white(state: ParseWKTState) { + $(/^\s*/, state); +} diff --git a/modules/wkt/src/lib/utils/binary-reader.ts b/modules/wkt/src/lib/utils/binary-reader.ts new file mode 100644 index 0000000000..cd41e6e579 --- /dev/null +++ b/modules/wkt/src/lib/utils/binary-reader.ts @@ -0,0 +1,72 @@ +/** A DataView that tracks byte offset when reading. */ +export class BinaryReader { + arrayBuffer: ArrayBuffer; + dataView: DataView; + byteOffset: number; + littleEndian: boolean; + + constructor(arrayBuffer: ArrayBuffer, isBigEndian: boolean = false) { + this.arrayBuffer = arrayBuffer; + this.dataView = new DataView(arrayBuffer); + this.byteOffset = 0; + this.littleEndian = !isBigEndian; + } + + readUInt8() { + const value = this.dataView.getUint8(this.byteOffset); + this.byteOffset += 1; + return value; + } + readUInt16() { + const value = this.dataView.getUint16(this.byteOffset, this.littleEndian); + this.byteOffset += 2; + return value; + } + readUInt32() { + const value = this.dataView.getUint32(this.byteOffset, this.littleEndian); + this.byteOffset += 4; + return value; + } + readInt8() { + const value = this.dataView.getInt8(this.byteOffset); + this.byteOffset += 1; + return value; + } + readInt16() { + const value = this.dataView.getInt16(this.byteOffset, this.littleEndian); + this.byteOffset += 2; + return value; + } + readInt32() { + const value = this.dataView.getInt32(this.byteOffset, this.littleEndian); + this.byteOffset += 4; + return value; + } + readFloat() { + const value = this.dataView.getFloat32(this.byteOffset, this.littleEndian); + this.byteOffset += 4; + return value; + } + readDouble() { + const value = this.dataView.getFloat64(this.byteOffset, this.littleEndian); + this.byteOffset += 8; + return value; + } + + readVarInt() { + let result = 0; + let bytesRead = 0; + + let nextByte; + do { + // TODO - this needs to be accessed via data view? + nextByte = this.arrayBuffer[this.byteOffset + bytesRead]; + result += (nextByte & 0x7f) << (7 * bytesRead); + bytesRead++; + } while (nextByte >= 0x80); + + this.byteOffset += bytesRead; + + return result; + } +} diff --git a/modules/wkt/src/twkb-loader.ts b/modules/wkt/src/twkb-loader.ts new file mode 100644 index 0000000000..2f17422a4e --- /dev/null +++ b/modules/wkt/src/twkb-loader.ts @@ -0,0 +1,42 @@ +// loaders.gl, MIT license + +import type {Loader, LoaderWithParser, LoaderOptions} from '@loaders.gl/loader-utils'; +import {BinaryGeometry, Geometry} from '@loaders.gl/schema'; +import {VERSION} from './lib/utils/version'; +import {parseTWKB, isTWKB} from './lib/parse-twkb'; + +export type WKBLoaderOptions = LoaderOptions & { + wkb?: { + shape: 'binary-geometry' | 'geometry'; + }; +}; + +/** + * Worker loader for WKB (Well-Known Binary) + */ +export const TWKBWorkerLoader: Loader = { + name: 'TWKB (Tiny Well-Known Binary)', + id: 'twkb', + module: 'wkt', + version: VERSION, + worker: true, + category: 'geometry', + extensions: ['twkb'], + mimeTypes: [], + // TODO can we define static, serializable tests, eg. some binary strings? + tests: [isTWKB], + options: { + wkb: { + shape: 'binary-geometry' + } + } +}; + +/** + * Loader for WKB (Well-Known Binary) + */ +export const TWKBLoader: LoaderWithParser = { + ...TWKBWorkerLoader, + parse: async (arrayBuffer: ArrayBuffer) => parseTWKB(arrayBuffer), + parseSync: parseTWKB +}; diff --git a/modules/wkt/src/twkb-writer.ts b/modules/wkt/src/twkb-writer.ts new file mode 100644 index 0000000000..b4f6bbf42b --- /dev/null +++ b/modules/wkt/src/twkb-writer.ts @@ -0,0 +1,25 @@ +// loaders.gl, MIT license + +import type {Writer, WriterOptions} from '@loaders.gl/loader-utils'; +import {VERSION} from './lib/utils/version'; +import {encodeTWKB} from './lib/encode-twkb'; +import {BinaryGeometry} from '@loaders.gl/schema'; + +/** + * WKB exporter + */ +export const TWKBWriter: Writer = { + name: 'TWKB (Tiny Well Known Binary)', + id: 'twkb', + module: 'wkt', + version: VERSION, + extensions: ['twkb'], + // @ts-expect-error not implemented yet + encodeSync: async (data: BinaryGeometry, options) => encodeTWKB, + options: { + twkb: { + // hasZ: false, + // hasM: false + } + } +}; diff --git a/modules/wkt/src/wkb-writer.ts b/modules/wkt/src/wkb-writer.ts index e7c947498d..f2346b9b14 100644 --- a/modules/wkt/src/wkb-writer.ts +++ b/modules/wkt/src/wkb-writer.ts @@ -1,19 +1,19 @@ // loaders.gl, MIT license -import type {Writer} from '@loaders.gl/loader-utils'; +import type {Writer, WriterOptions} from '@loaders.gl/loader-utils'; import {VERSION} from './lib/utils/version'; import {encodeWKB} from './lib/encode-wkb'; +import type {Geometry, Feature} from '@loaders.gl/schema'; /** * WKB exporter */ -export const WKBWriter: Writer = { +export const WKBWriter: Writer = { name: 'WKB (Well Known Binary)', id: 'wkb', module: 'wkt', version: VERSION, extensions: ['wkb'], - // encodeSync: encodeWKB, encodeSync: encodeWKB, options: { wkb: { diff --git a/modules/wkt/test/index.ts b/modules/wkt/test/index.ts index 4e1e198962..d5e2abbfdf 100644 --- a/modules/wkt/test/index.ts +++ b/modules/wkt/test/index.ts @@ -5,6 +5,9 @@ import './lib/utils/hex-transcoder.spec'; import './wkb-loader.spec'; import './wkb-writer.spec'; +import './twkb-loader.spec'; +import './twkb-writer.spec'; + import './hex-wkb-loader.spec'; import './wkt-loader.spec'; diff --git a/modules/wkt/test/twkb-loader.spec.ts b/modules/wkt/test/twkb-loader.spec.ts new file mode 100644 index 0000000000..7f2d19c16d --- /dev/null +++ b/modules/wkt/test/twkb-loader.spec.ts @@ -0,0 +1,40 @@ +/** + * import test from 'tape-promise/tape'; +import {fetchFile, parseSync} from '@loaders.gl/core'; +import {WKBLoader} from '@loaders.gl/wkt'; +import {parseTestCases} from './utils/parse-test-cases'; +import {isWKB} from '../src/lib/parse-wkb-header'; + +const WKB_2D_TEST_CASES = '@loaders.gl/wkt/test/data/wkb-testdata2d.json'; +const WKB_Z_TEST_CASES = '@loaders.gl/wkt/test/data/wkb-testdataZ.json'; + +test.only('WKBLoader#2D', async (t) => { + const response = await fetchFile(WKB_2D_TEST_CASES); + const TEST_CASES = parseTestCases(await response.json()); + + // TODO parseWKB outputs TypedArrays; testCase contains regular arrays + for (const testCase of Object.values(TEST_CASES)) { + // Big endian + if (testCase.twkb && testCase.binary) { + t.ok(isWKB(testCase.twkb), 'isWKB(2D)'); + t.deepEqual(parseSync(testCase.twkb, WKBLoader), testCase.binary); + } + } + + t.end(); +}); + +test('WKBLoader#Z', async (t) => { + const response = await fetchFile(WKB_Z_TEST_CASES); + const TEST_CASES = parseTestCases(await response.json()); + + // TODO parseWKB outputs TypedArrays; testCase contains regular arrays + for (const testCase of Object.values(TEST_CASES)) { + if (testCase.wkbXdr && testCase.binary && testCase.geoJSON) { + t.deepEqual(parseSync(testCase.wkbXdr, WKBLoader, {wkb: {shape: 'geometry'}}), testCase.geoJSON); + } + } + + t.end(); +}); +*/ diff --git a/modules/wkt/test/twkb-writer.spec.ts b/modules/wkt/test/twkb-writer.spec.ts new file mode 100644 index 0000000000..746dc17a50 --- /dev/null +++ b/modules/wkt/test/twkb-writer.spec.ts @@ -0,0 +1,63 @@ +/** +import test from 'tape-promise/tape'; +import {fetchFile, encodeSync} from '@loaders.gl/core'; +import {WKBWriter} from '@loaders.gl/wkt'; +import {parseTestCases} from './utils/parse-test-cases'; + +const WKB_2D_TEST_CASES = '@loaders.gl/wkt/test/data/wkb-testdata2d.json'; +const WKB_2D_NAN_TEST_CASES = '@loaders.gl/wkt/test/data/wkb-testdata2d-nan.json'; +const WKB_Z_TEST_CASES = '@loaders.gl/wkt/test/data/wkb-testdataZ.json'; +const WKB_Z_NAN_TEST_CASES = '@loaders.gl/wkt/test/data/wkb-testdataZ-nan.json'; + +test('WKBWriter#2D', async (t) => { + const response = await fetchFile(WKB_2D_TEST_CASES); + const TEST_CASES = parseTestCases(await response.json()); + + for (const testCase of Object.values(TEST_CASES)) { + const {geoJSON, wkb} = testCase; + const encoded = encodeSync(geoJSON, WKBWriter, {wkb: {hasZ: false, hasM: false}}); + t.deepEqual(encoded, wkb); + } + + t.end(); +}); + +test('WKBWriter#2D NaN', async (t) => { + const response = await fetchFile(WKB_2D_NAN_TEST_CASES); + const TEST_CASES = parseTestCases(await response.json()); + + for (const testCase of Object.values(TEST_CASES)) { + const {geoJSON, wkb} = testCase; + const encoded = encodeSync(geoJSON, WKBWriter, {wkb: {hasZ: false, hasM: false}}); + t.deepEqual(encoded, wkb); + } + + t.end(); +}); + +test('WKBWriter#Z', async (t) => { + const response = await fetchFile(WKB_Z_TEST_CASES); + const TEST_CASES = parseTestCases(await response.json()); + + for (const testCase of Object.values(TEST_CASES)) { + const {geoJSON, wkb} = testCase; + const encoded = encodeSync(geoJSON, WKBWriter, {wkb: {hasZ: true, hasM: false}}); + t.deepEqual(encoded, wkb); + } + + t.end(); +}); + +test('WKBWriter#Z NaN', async (t) => { + const response = await fetchFile(WKB_Z_NAN_TEST_CASES); + const TEST_CASES = parseTestCases(await response.json()); + + for (const testCase of Object.values(TEST_CASES)) { + const {geoJSON, wkb} = testCase; + const encoded = encodeSync(geoJSON, WKBWriter, {wkb: {hasZ: true, hasM: false}}); + t.deepEqual(encoded, wkb); + } + + t.end(); +}); + */ diff --git a/modules/wkt/test/wip/wkt-loader.spec.ts b/modules/wkt/test/wip/wkt-loader.spec.ts deleted file mode 100644 index 36875e24a3..0000000000 --- a/modules/wkt/test/wip/wkt-loader.spec.ts +++ /dev/null @@ -1,450 +0,0 @@ -// Fork of https://github.com/mapbox/wellknown under ISC license (MIT/BSD-2-clause equivalent) - -import test from 'tape-promise/tape'; -import {parseWKT} from '../../src/lib/parse-wkt'; -import {fetchFile} from '@loaders.gl/core'; -import fuzzer from 'fuzzer'; - -const GEOMETRYCOLLECTION_WKT_URL = '@loaders.gl/wkt/test/data/geometrycollection.wkt'; -const GEOMETRYCOLLECTION_GEOJSON_URL = '@loaders.gl/wkt/test/data/geometrycollection.geojson'; - -// eslint-disable-next-line max-statements -test('parseWKT', async (t) => { - let response = await fetchFile(GEOMETRYCOLLECTION_WKT_URL); - const GEOMETRYCOLLECTION_WKT = await response.text(); - - response = await fetchFile(GEOMETRYCOLLECTION_GEOJSON_URL); - const GEOMETRYCOLLECTION_GEOJSON = await response.json(); - - t.deepEqual(parseWKT('POINT (0 1)'), { - type: 'Point', - coordinates: [0, 1] - }); - t.deepEqual(parseWKT('POINT (1 1)'), { - type: 'Point', - coordinates: [1, 1] - }); - t.deepEqual(parseWKT('POINT(1 1)'), { - type: 'Point', - coordinates: [1, 1] - }); - t.deepEqual(parseWKT('POINT\n\r(1 1)'), { - type: 'Point', - coordinates: [1, 1] - }); - t.deepEqual(parseWKT('POINT(1.1 1.1)'), { - type: 'Point', - coordinates: [1.1, 1.1] - }); - t.deepEqual(parseWKT('point(1.1 1.1)'), { - type: 'Point', - coordinates: [1.1, 1.1] - }); - t.deepEqual(parseWKT('point(1 2 3)'), { - type: 'Point', - coordinates: [1, 2, 3] - }); - t.deepEqual(parseWKT('point(1 2 3 4)'), { - type: 'Point', - coordinates: [1, 2, 3, 4] - }); - t.deepEqual(parseWKT('SRID=3857;POINT (1 2 3)'), { - type: 'Point', - coordinates: [1, 2, 3], - crs: { - type: 'name', - properties: { - name: 'urn:ogc:def:crs:EPSG::3857' - } - } - }); - t.deepEqual(parseWKT('LINESTRING (30 10, 10 30, 40 40)'), { - type: 'LineString', - coordinates: [ - [30, 10], - [10, 30], - [40, 40] - ] - }); - t.deepEqual(parseWKT('LINESTRING(30 10, 10 30, 40 40)'), { - type: 'LineString', - coordinates: [ - [30, 10], - [10, 30], - [40, 40] - ] - }); - t.deepEqual(parseWKT('LineString(30 10, 10 30, 40 40)'), { - type: 'LineString', - coordinates: [ - [30, 10], - [10, 30], - [40, 40] - ] - }); - t.deepEqual(parseWKT('LINESTRING (1 2 3, 4 5 6)'), { - type: 'LineString', - coordinates: [ - [1, 2, 3], - [4, 5, 6] - ] - }); - t.deepEqual(parseWKT('LINESTRING (1 2 3 4, 5 6 7 8)'), { - type: 'LineString', - coordinates: [ - [1, 2, 3, 4], - [5, 6, 7, 8] - ] - }); - t.deepEqual(parseWKT('SRID=3857;LINESTRING (30 10, 10 30, 40 40)'), { - type: 'LineString', - coordinates: [ - [30, 10], - [10, 30], - [40, 40] - ], - crs: { - type: 'name', - properties: { - name: 'urn:ogc:def:crs:EPSG::3857' - } - } - }); - t.deepEqual(parseWKT('POLYGON ((30 10, 10 20, 20 40, 40 40, 30 10))'), { - type: 'Polygon', - coordinates: [ - [ - [30, 10], - [10, 20], - [20, 40], - [40, 40], - [30, 10] - ] - ] - }); - t.deepEqual(parseWKT('POLYGON((30 10, 10 20, 20 40, 40 40, 30 10))'), { - type: 'Polygon', - coordinates: [ - [ - [30, 10], - [10, 20], - [20, 40], - [40, 40], - [30, 10] - ] - ] - }); - t.deepEqual(parseWKT('SRID=3857;POLYGON ((30 10, 10 20, 20 40, 40 40, 30 10))'), { - type: 'Polygon', - coordinates: [ - [ - [30, 10], - [10, 20], - [20, 40], - [40, 40], - [30, 10] - ] - ], - crs: { - type: 'name', - properties: { - name: 'urn:ogc:def:crs:EPSG::3857' - } - } - }); - t.deepEqual( - parseWKT('POLYGON ((35 10, 10 20, 15 40, 45 45, 35 10),(20 30, 35 35, 30 20, 20 30))'), - { - type: 'Polygon', - coordinates: [ - [ - [35, 10], - [10, 20], - [15, 40], - [45, 45], - [35, 10] - ], - [ - [20, 30], - [35, 35], - [30, 20], - [20, 30] - ] - ] - } - ); - t.deepEqual(parseWKT('MULTIPOINT (0 0, 2 3)'), { - type: 'MultiPoint', - coordinates: [ - [0, 0], - [2, 3] - ] - }); - t.deepEqual(parseWKT('MULTIPOINT (1 1, 2 3)'), { - type: 'MultiPoint', - coordinates: [ - [1, 1], - [2, 3] - ] - }); - t.deepEqual(parseWKT('MultiPoint (1 1, 2 3)'), { - type: 'MultiPoint', - coordinates: [ - [1, 1], - [2, 3] - ] - }); - t.deepEqual(parseWKT('SRID=3857;MULTIPOINT (1 1, 2 3)'), { - type: 'MultiPoint', - coordinates: [ - [1, 1], - [2, 3] - ], - crs: { - type: 'name', - properties: { - name: 'urn:ogc:def:crs:EPSG::3857' - } - } - }); - t.deepEqual(parseWKT('MULTIPOINT ((0 0), (2 3))'), { - type: 'MultiPoint', - coordinates: [ - [0, 0], - [2, 3] - ] - }); - t.deepEqual(parseWKT('MULTIPOINT ((1 1), (2 3))'), { - type: 'MultiPoint', - coordinates: [ - [1, 1], - [2, 3] - ] - }); - t.deepEqual(parseWKT('MultiPoint ((1 1), (2 3))'), { - type: 'MultiPoint', - coordinates: [ - [1, 1], - [2, 3] - ] - }); - t.deepEqual(parseWKT('SRID=3857;MULTIPOINT ((1 1), (2 3))'), { - type: 'MultiPoint', - coordinates: [ - [1, 1], - [2, 3] - ], - crs: { - type: 'name', - properties: { - name: 'urn:ogc:def:crs:EPSG::3857' - } - } - }); - t.deepEqual(parseWKT('MULTILINESTRING ((30 10, 10 30, 40 40), (30 10, 10 30, 40 40))'), { - type: 'MultiLineString', - coordinates: [ - [ - [30, 10], - [10, 30], - [40, 40] - ], - [ - [30, 10], - [10, 30], - [40, 40] - ] - ] - }); - t.deepEqual( - parseWKT('SRID=3857;MULTILINESTRING ((30 10, 10 30, 40 40), (30 10, 10 30, 40 40))'), - { - type: 'MultiLineString', - coordinates: [ - [ - [30, 10], - [10, 30], - [40, 40] - ], - [ - [30, 10], - [10, 30], - [40, 40] - ] - ], - crs: { - type: 'name', - properties: { - name: 'urn:ogc:def:crs:EPSG::3857' - } - } - } - ); - t.deepEqual( - parseWKT('MULTIPOLYGON (((30 20, 10 40, 45 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))'), - { - type: 'MultiPolygon', - coordinates: [ - [ - [ - [30, 20], - [10, 40], - [45, 40], - [30, 20] - ] - ], - [ - [ - [15, 5], - [40, 10], - [10, 20], - [5, 10], - [15, 5] - ] - ] - ] - } - ); - t.deepEqual(parseWKT('MULTIPOLYGON (((-74.03349399999999 40.688348)))'), { - type: 'MultiPolygon', - coordinates: [[[[-74.03349399999999, 40.688348]]]] - }); - t.deepEqual( - parseWKT( - 'MULTIPOLYGON (((30 20, 10 40, 45 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5), (10 10, 15 10, 15 15, 10 10)))' - ), - { - type: 'MultiPolygon', - coordinates: [ - [ - [ - [30, 20], - [10, 40], - [45, 40], - [30, 20] - ] - ], - [ - [ - [15, 5], - [40, 10], - [10, 20], - [5, 10], - [15, 5] - ], - [ - [10, 10], - [15, 10], - [15, 15], - [10, 10] - ] - ] - ] - } - ); - t.deepEqual( - parseWKT( - 'SRID=3857;MULTIPOLYGON (((30 20, 10 40, 45 40, 30 20)), ((15 5, 40 10, 10 20, 5 10, 15 5)))' - ), - { - type: 'MultiPolygon', - coordinates: [ - [ - [ - [30, 20], - [10, 40], - [45, 40], - [30, 20] - ] - ], - [ - [ - [15, 5], - [40, 10], - [10, 20], - [5, 10], - [15, 5] - ] - ] - ], - crs: { - type: 'name', - properties: { - name: 'urn:ogc:def:crs:EPSG::3857' - } - } - } - ); - t.deepEqual(parseWKT(GEOMETRYCOLLECTION_WKT), GEOMETRYCOLLECTION_GEOJSON); - t.deepEqual(parseWKT('GeometryCollection(POINT(4 6),LINESTRING(4 6,7 10))'), { - type: 'GeometryCollection', - geometries: [ - { - type: 'Point', - coordinates: [4, 6] - }, - { - type: 'LineString', - coordinates: [ - [4, 6], - [7, 10] - ] - } - ] - }); - t.deepEqual(parseWKT('GeometryCollection(POINT(4 6),\nLINESTRING(4 6,7 10))'), { - type: 'GeometryCollection', - geometries: [ - { - type: 'Point', - coordinates: [4, 6] - }, - { - type: 'LineString', - coordinates: [ - [4, 6], - [7, 10] - ] - } - ] - }); - t.deepEqual(parseWKT('POINT (1e-6 1E+2)'), { - type: 'Point', - coordinates: [1e-6, 1e2] - }); - t.equal(parseWKT('POINT(100)'), null); - t.equal(parseWKT('POINT(100, 100)'), null); - t.equal(parseWKT('POINT()'), null); - t.equal(parseWKT('MULTIPOINT()'), null); - t.equal(parseWKT('MULTIPOINT(1)'), null); - t.equal(parseWKT('MULTIPOINT(1 1, 1)'), null); - - t.deepEqual(parseWKT('POINT Z (1 2 3)'), { - type: 'Point', - coordinates: [1, 2, 3] - }); - - t.deepEqual(parseWKT('LINESTRING Z (30 10 1, 10 30 2, 40 40 3)'), { - type: 'LineString', - coordinates: [ - [30, 10, 1], - [10, 30, 2], - [40, 40, 3] - ] - }); - - t.deepEqual(parseWKT('POLYGON Z ((30 10 1, 10 20 2, 20 40 3, 40 40 4, 30 10 5))'), { - type: 'Polygon', - coordinates: [ - [ - [30, 10, 1], - [10, 20, 2], - [20, 40, 3], - [40, 40, 4], - [30, 10, 5] - ] - ] - }); - - t.end(); -});