From 7ed9fcd83e0257b3faf57141925249fcd00cf58f Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Mon, 23 Sep 2024 16:14:23 -0600 Subject: [PATCH 1/2] test(core): assert geoarrow bbox is struct array --- core/src/geoarrow/mod.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/src/geoarrow/mod.rs b/core/src/geoarrow/mod.rs index 34a87d83..7c03bae2 100644 --- a/core/src/geoarrow/mod.rs +++ b/core/src/geoarrow/mod.rs @@ -143,13 +143,16 @@ pub fn from_table(table: Table) -> Result { #[cfg(all(test, feature = "geoparquet"))] mod tests { use crate::{Item, ItemCollection}; + use arrow_schema::DataType; use geoarrow::io::parquet::GeoParquetRecordBatchReaderBuilder; use std::fs::File; #[test] fn to_table() { let item: Item = crate::read("examples/simple-item.json").unwrap(); - let _ = super::to_table(vec![item]).unwrap(); + let table = super::to_table(vec![item]).unwrap(); + let (_, bbox_field) = table.schema().column_with_name("bbox").unwrap(); + assert!(matches!(bbox_field.data_type(), DataType::Struct(_))); } #[test] From b6b23adcc3181dc3671f012d643c4711f12611b0 Mon Sep 17 00:00:00 2001 From: Pete Gadomski Date: Mon, 23 Sep 2024 16:27:21 -0600 Subject: [PATCH 2/2] feat(core): don't put type in geoparquet --- core/src/geoarrow/json.rs | 1 + core/src/geoarrow/mod.rs | 2 ++ scripts/validate-stac-geoparquet | 56 ++++++++++++++++++++++++++------ 3 files changed, 49 insertions(+), 10 deletions(-) diff --git a/core/src/geoarrow/json.rs b/core/src/geoarrow/json.rs index 809d4c04..c91eac4c 100644 --- a/core/src/geoarrow/json.rs +++ b/core/src/geoarrow/json.rs @@ -586,6 +586,7 @@ pub fn from_table(table: Table) -> Result>, c "geometry".into(), serde_json::to_value(geojson::Geometry::new(value))?, ); + let _ = row.insert("type".into(), crate::ITEM_TYPE.into()); items.push(unflatten(row)); } } diff --git a/core/src/geoarrow/mod.rs b/core/src/geoarrow/mod.rs index 7c03bae2..aaf543c3 100644 --- a/core/src/geoarrow/mod.rs +++ b/core/src/geoarrow/mod.rs @@ -54,6 +54,7 @@ pub fn to_table(item_collection: impl Into) -> Result { let value = value .as_object_mut() .expect("a flat item should serialize to an object"); + let _ = value.remove("type"); let _ = value.remove("geometry"); if let Some(bbox) = value.remove("bbox") { let bbox = bbox @@ -153,6 +154,7 @@ mod tests { let table = super::to_table(vec![item]).unwrap(); let (_, bbox_field) = table.schema().column_with_name("bbox").unwrap(); assert!(matches!(bbox_field.data_type(), DataType::Struct(_))); + assert!(table.schema().column_with_name("type").is_none()); } #[test] diff --git a/scripts/validate-stac-geoparquet b/scripts/validate-stac-geoparquet index 04aca91f..f132bd50 100755 --- a/scripts/validate-stac-geoparquet +++ b/scripts/validate-stac-geoparquet @@ -1,27 +1,33 @@ #!/usr/bin/env python import json -import sys import shutil import subprocess +import sys import tempfile -from typing import Any -from deepdiff import DeepDiff from pathlib import Path +from typing import Any + +import pyarrow import pyarrow.parquet import stac_geoparquet.arrow -import pyarrow +from deepdiff import DeepDiff root = Path(__file__).parents[1] path = root / "spec-examples" / "v1.1.0" / "extended-item.json" directory = tempfile.mkdtemp() parquet_path = Path(directory) / "extended-item.parquet" + def clean_item(item: dict[str, Any]) -> None: - if item["geometry"]["type"] == "MultiPolygon" and len(item["geometry"]["coordinates"]) == 1: + if ( + item["geometry"]["type"] == "MultiPolygon" + and len(item["geometry"]["coordinates"]) == 1 + ): item["geometry"]["type"] = "Polygon" item["geometry"]["coordinates"] = item["geometry"]["coordinates"][0] + def clean_report(report: dict[str, Any]) -> dict[str, Any]: """We expect datetime values to be changed in the report.""" if report.get("values_changed"): @@ -32,12 +38,29 @@ def clean_report(report: dict[str, Any]) -> dict[str, Any]: del report["values_changed"]["root['properties']['datetime']"] if not report["values_changed"]: del report["values_changed"] + if report.get("dictionary_item_removed"): + report["dictionary_item_removed"] = [ + item for item in report["dictionary_item_removed"] if item != "root['type']" + ] + if not report["dictionary_item_removed"]: + del report["dictionary_item_removed"] return report + try: # Writing subprocess.check_call( - ["cargo", "run", "--no-default-features", "-F", "geoparquet", "--", "translate", path, parquet_path] + [ + "cargo", + "run", + "--no-default-features", + "-F", + "geoparquet", + "--", + "translate", + path, + parquet_path, + ] ) table = pyarrow.parquet.read_table(parquet_path) after = next(stac_geoparquet.arrow.stac_table_to_items(table)) @@ -54,11 +77,24 @@ try: # Reading table = stac_geoparquet.arrow.parse_stac_items_to_arrow([before]) stac_geoparquet.arrow.to_parquet(table, parquet_path) - item_collection = json.loads(subprocess.check_output( - ["cargo", "run", "--no-default-features", "-F", "geoparquet", "--", "translate", parquet_path] - )) + item_collection = json.loads( + subprocess.check_output( + [ + "cargo", + "run", + "--no-default-features", + "-F", + "geoparquet", + "--", + "translate", + parquet_path, + ] + ) + ) assert len(item_collection["features"]) == 1 - clean_item(item_collection["features"][0]) # stac-geoparquet writes as a multi-polygon + clean_item( + item_collection["features"][0] + ) # stac-geoparquet writes as a multi-polygon report = DeepDiff(before, item_collection["features"][0]).to_dict() report = clean_report(report) if report: