diff --git a/DESCRIPTION b/DESCRIPTION index aefb0dcd9..3c8eaee37 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -73,6 +73,7 @@ Imports: utils Suggests: blob, + nanoarrow, covr, dplyr (>= 0.8-3), ggplot2, diff --git a/R/RcppExports.R b/R/RcppExports.R index c5e8b4254..b43e501de 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -113,6 +113,10 @@ CPL_read_ogr <- function(datasource, layer, query, options, quiet, toTypeUser, f .Call(`_sf_CPL_read_ogr`, datasource, layer, query, options, quiet, toTypeUser, fid_column_name, drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, width) } +CPL_read_gdal_stream <- function(stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column, width) { + .Call(`_sf_CPL_read_gdal_stream`, stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column, width) +} + CPL_gdalinfo <- function(obj, options, oo, co) { .Call(`_sf_CPL_gdalinfo`, obj, options, oo, co) } diff --git a/R/read.R b/R/read.R index e25aa75b7..316a5410a 100644 --- a/R/read.R +++ b/R/read.R @@ -51,7 +51,7 @@ set_utf8 = function(x) { #' of LineString and MultiLineString, or of Polygon and MultiPolygon, convert #' all to the Multi variety; defaults to \code{TRUE} #' @param stringsAsFactors logical; logical: should character vectors be -#' converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is +#' converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is #' \code{FALSE}, for \code{st_read} and R version < 4.1.0 equal to #' \code{default.stringsAsFactors()} #' @param int64_as_string logical; if TRUE, Int64 attributes are returned as @@ -146,7 +146,7 @@ st_read.default = function(dsn, layer, ...) { } process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE, - stringsAsFactors = ifelse(as_tibble, FALSE, sf_stringsAsFactors()), + stringsAsFactors = ifelse(as_tibble, FALSE, sf_stringsAsFactors()), geometry_column = 1, as_tibble = FALSE, optional = FALSE) { which.geom = which(vapply(x, function(f) inherits(f, "sfc"), TRUE)) @@ -156,7 +156,7 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE, # in case no geometry is present: if (length(which.geom) == 0) { - if (! quiet) + if (! quiet) warning("no simple feature geometries present: returning a data.frame or tbl_df", call. = FALSE) x = if (!as_tibble) { if (any(sapply(x, is.list))) @@ -192,8 +192,13 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE, for (i in seq_along(lc.other)) x[[ nm.lc[i] ]] = list.cols[[i]] - for (i in seq_along(geom)) - x[[ nm[i] ]] = st_sfc(geom[[i]], crs = attr(geom[[i]], "crs")) # computes bbox + for (i in seq_along(geom)) { + if (is.null(attr(geom[[i]], "bbox"))) { + x[[ nm[i] ]] = st_sfc(geom[[i]], crs = attr(geom[[i]], "crs")) # computes bbox + } else { + x[[ nm[i] ]] = geom[[i]] + } + } x = st_as_sf(x, ..., sf_column_name = if (is.character(geometry_column)) geometry_column else nm[geometry_column], @@ -204,20 +209,70 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE, x } +# Allow setting the default to TRUE to make it easier to run existing tests +# of st_read() through the stream interface +default_st_read_use_stream = function() { + getOption( + "sf.st_read_use_stream", + identical(Sys.getenv("R_SF_ST_READ_USE_STREAM"), "true") + ) +} + +process_cpl_read_ogr_stream = function(x, default_crs, num_features, fid_column_name, + crs = NULL, ...) { + is_geometry_column = vapply( + x$get_schema()$children, + function(s) identical(s$metadata[["ARROW:extension:name"]], "ogc.wkb"), + logical(1) + ) + + crs = if (is.null(crs)) st_crs(default_crs) else st_crs(crs) + if (num_features == -1) { + num_features = NULL + } + df = suppressWarnings(nanoarrow::convert_array_stream(x, size = num_features)) + + df[is_geometry_column] = lapply(df[is_geometry_column], function(x) { + class(x) <- "WKB" + x <- st_as_sfc(x) + st_set_crs(x, crs) + }) + + # Prefer "geometry" as the geometry column name + if (any(is_geometry_column) && !("geometry" %in% names(df))) { + names(df)[which(is_geometry_column)[1]] = "geometry" + } + + # Rename OGC_FID to fid_column_name and move to end + if (length(fid_column_name) == 1 && "OGC_FID" %in% names(df)) { + df <- df[c(setdiff(names(df), "OGC_FID"), "OGC_FID")] + names(df)[names(df) == "OGC_FID"] = fid_column_name + } + + # Move geometry to the end + if ("geometry" %in% names(df)) { + df <- df[c(setdiff(names(df), "geometry"), "geometry")] + } + + process_cpl_read_ogr(df, ...) +} + #' @name st_read #' @param fid_column_name character; name of column to write feature IDs to; defaults to not doing this #' @param drivers character; limited set of driver short names to be tried (default: try all) #' @param wkt_filter character; WKT representation of a spatial filter (may be used as bounding box, selecting overlapping geometries); see examples #' @param optional logical; passed to \link[base]{as.data.frame}; always \code{TRUE} when \code{as_tibble} is \code{TRUE} +#' @param use_stream Use TRUE to use the experimental columnar interface introduced in GDAL 3.6. #' @note The use of \code{system.file} in examples make sure that examples run regardless where R is installed: #' typical users will not use \code{system.file} but give the file name directly, either with full path or relative #' to the current working directory (see \link{getwd}). "Shapefiles" consist of several files with the same basename #' that reside in the same directory, only one of them having extension \code{.shp}. #' @export -st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet = FALSE, geometry_column = 1L, +st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet = FALSE, geometry_column = 1L, type = 0, promote_to_multi = TRUE, stringsAsFactors = sf_stringsAsFactors(), int64_as_string = FALSE, check_ring_dir = FALSE, fid_column_name = character(0), - drivers = character(0), wkt_filter = character(0), optional = FALSE) { + drivers = character(0), wkt_filter = character(0), optional = FALSE, + use_stream = default_st_read_use_stream()) { layer = if (missing(layer)) character(0) @@ -233,11 +288,22 @@ st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet if (length(promote_to_multi) > 1) stop("`promote_to_multi' should have length one, and applies to all geometry columns") - x = CPL_read_ogr(dsn, layer, query, as.character(options), quiet, type, fid_column_name, - drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width")) - process_cpl_read_ogr(x, quiet, check_ring_dir = check_ring_dir, - stringsAsFactors = stringsAsFactors, geometry_column = geometry_column, - optional = optional, ...) + + + if (use_stream) { + stream = nanoarrow::nanoarrow_allocate_array_stream() + info = CPL_read_gdal_stream(stream, dsn, layer, query, as.character(options), quiet, + drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column_name, getOption("width")) + process_cpl_read_ogr_stream(stream, default_crs = info[[1]], num_features = info[[2]], + fid_column_name = fid_column_name, stringsAsFactors = stringsAsFactors, quiet = quiet, ...) + } else { + x = CPL_read_ogr(dsn, layer, query, as.character(options), quiet, type, fid_column_name, + drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width")) + + process_cpl_read_ogr(x, quiet, check_ring_dir = check_ring_dir, + stringsAsFactors = stringsAsFactors, geometry_column = geometry_column, + optional = optional, ...) + } } #' @name st_read @@ -606,7 +672,7 @@ print.sf_layers = function(x, ...) { #' @param options character; driver dependent dataset open options, multiple options supported. #' @param do_count logical; if TRUE, count the features by reading them, even if their count is not reported by the driver #' @name st_layers -#' @return list object of class \code{sf_layers} with elements +#' @return list object of class \code{sf_layers} with elements #' \describe{ #' \item{name}{name of the layer} #' \item{geomtype}{list with for each layer the geometry types} @@ -751,7 +817,7 @@ check_append_delete <- function(append, delete) { #' @name st_write #' @export -#' @details st_delete deletes layer(s) in a data source, or a data source if layers are +#' @details st_delete deletes layer(s) in a data source, or a data source if layers are #' omitted; it returns TRUE on success, FALSE on failure, invisibly. st_delete = function(dsn, layer = character(0), driver = guess_driver_can_write(dsn), quiet = FALSE) { invisible(CPL_delete_ogr(dsn, layer, driver, quiet) == 0) diff --git a/man/st_layers.Rd b/man/st_layers.Rd index ca1ff8cdd..cd2ba482b 100644 --- a/man/st_layers.Rd +++ b/man/st_layers.Rd @@ -15,7 +15,7 @@ folder, or contain the name and access credentials of a database)} \item{do_count}{logical; if TRUE, count the features by reading them, even if their count is not reported by the driver} } \value{ -list object of class \code{sf_layers} with elements +list object of class \code{sf_layers} with elements \describe{ \item{name}{name of the layer} \item{geomtype}{list with for each layer the geometry types} diff --git a/man/st_read.Rd b/man/st_read.Rd index ed3e3e6fa..ee29940ab 100644 --- a/man/st_read.Rd +++ b/man/st_read.Rd @@ -25,7 +25,8 @@ st_read(dsn, layer, ...) fid_column_name = character(0), drivers = character(0), wkt_filter = character(0), - optional = FALSE + optional = FALSE, + use_stream = default_st_read_use_stream() ) read_sf(..., quiet = TRUE, stringsAsFactors = FALSE, as_tibble = TRUE) @@ -83,7 +84,7 @@ of LineString and MultiLineString, or of Polygon and MultiPolygon, convert all to the Multi variety; defaults to \code{TRUE}} \item{stringsAsFactors}{logical; logical: should character vectors be - converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is + converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is \code{FALSE}, for \code{st_read} and R version < 4.1.0 equal to \code{default.stringsAsFactors()}} @@ -103,6 +104,8 @@ clockwise, holes clockwise)} \item{optional}{logical; passed to \link[base]{as.data.frame}; always \code{TRUE} when \code{as_tibble} is \code{TRUE}} +\item{use_stream}{Use TRUE to use the experimental columnar interface introduced in GDAL 3.6.} + \item{as_tibble}{logical; should the returned table be of class tibble or data.frame?} \item{EWKB}{logical; is the WKB of type EWKB? if missing, defaults to diff --git a/man/st_write.Rd b/man/st_write.Rd index cd0b0c115..a7d8d49c3 100644 --- a/man/st_write.Rd +++ b/man/st_write.Rd @@ -111,7 +111,7 @@ When deleting layers or data sources is not successful, no error is emitted. \code{delete_dsn} and \code{delete_layer} should be handled with care; the former may erase complete directories or databases. -st_delete deletes layer(s) in a data source, or a data source if layers are +st_delete deletes layer(s) in a data source, or a data source if layers are omitted; it returns TRUE on success, FALSE on failure, invisibly. } \examples{ diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index ef5c68195..fbb6863e5 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -345,6 +345,28 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// CPL_read_gdal_stream +Rcpp::List CPL_read_gdal_stream(Rcpp::RObject stream_xptr, Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer, Rcpp::CharacterVector query, Rcpp::CharacterVector options, bool quiet, Rcpp::CharacterVector drivers, Rcpp::CharacterVector wkt_filter, bool dsn_exists, bool dsn_isdb, Rcpp::CharacterVector fid_column, int width); +RcppExport SEXP _sf_CPL_read_gdal_stream(SEXP stream_xptrSEXP, SEXP datasourceSEXP, SEXP layerSEXP, SEXP querySEXP, SEXP optionsSEXP, SEXP quietSEXP, SEXP driversSEXP, SEXP wkt_filterSEXP, SEXP dsn_existsSEXP, SEXP dsn_isdbSEXP, SEXP fid_columnSEXP, SEXP widthSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< Rcpp::RObject >::type stream_xptr(stream_xptrSEXP); + Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type datasource(datasourceSEXP); + Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type layer(layerSEXP); + Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type query(querySEXP); + Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type options(optionsSEXP); + Rcpp::traits::input_parameter< bool >::type quiet(quietSEXP); + Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type drivers(driversSEXP); + Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type wkt_filter(wkt_filterSEXP); + Rcpp::traits::input_parameter< bool >::type dsn_exists(dsn_existsSEXP); + Rcpp::traits::input_parameter< bool >::type dsn_isdb(dsn_isdbSEXP); + Rcpp::traits::input_parameter< Rcpp::CharacterVector >::type fid_column(fid_columnSEXP); + Rcpp::traits::input_parameter< int >::type width(widthSEXP); + rcpp_result_gen = Rcpp::wrap(CPL_read_gdal_stream(stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column, width)); + return rcpp_result_gen; +END_RCPP +} // CPL_gdalinfo Rcpp::CharacterVector CPL_gdalinfo(Rcpp::CharacterVector obj, Rcpp::CharacterVector options, Rcpp::CharacterVector oo, Rcpp::CharacterVector co); RcppExport SEXP _sf_CPL_gdalinfo(SEXP objSEXP, SEXP optionsSEXP, SEXP ooSEXP, SEXP coSEXP) { @@ -1442,6 +1464,7 @@ static const R_CallMethodDef CallEntries[] = { {"_sf_CPL_gdal_linestring_sample", (DL_FUNC) &_sf_CPL_gdal_linestring_sample, 2}, {"_sf_CPL_get_layers", (DL_FUNC) &_sf_CPL_get_layers, 3}, {"_sf_CPL_read_ogr", (DL_FUNC) &_sf_CPL_read_ogr, 14}, + {"_sf_CPL_read_gdal_stream", (DL_FUNC) &_sf_CPL_read_gdal_stream, 12}, {"_sf_CPL_gdalinfo", (DL_FUNC) &_sf_CPL_gdalinfo, 4}, {"_sf_CPL_ogrinfo", (DL_FUNC) &_sf_CPL_ogrinfo, 4}, {"_sf_CPL_gdaladdo", (DL_FUNC) &_sf_CPL_gdaladdo, 8}, diff --git a/src/gdal_read.cpp b/src/gdal_read.cpp index ad207286a..be2ee62c9 100644 --- a/src/gdal_read.cpp +++ b/src/gdal_read.cpp @@ -288,14 +288,14 @@ Rcpp::List sf_from_ogrlayer(OGRLayer *poLayer, bool quiet, bool int64_as_string, // POSIXlt: sec min hour mday mon year wday yday isdst ... Rcpp::List dtlst = Rcpp::List::create( - Rcpp::_["sec"] = (double) Second, + Rcpp::_["sec"] = (double) Second, Rcpp::_["min"] = (int) Minute, Rcpp::_["hour"] = (int) Hour, Rcpp::_["mday"] = (int) Day, Rcpp::_["mon"] = (int) Month - 1, Rcpp::_["year"] = (int) Year - 1900, - Rcpp::_["wday"] = NA_INTEGER, - Rcpp::_["yday"] = NA_INTEGER, + Rcpp::_["wday"] = NA_INTEGER, + Rcpp::_["yday"] = NA_INTEGER, Rcpp::_["isdst"] = NA_INTEGER, Rcpp::_["zone"] = tzone, Rcpp::_["gmtoff"] = NA_INTEGER); @@ -502,22 +502,25 @@ Rcpp::List sf_from_ogrlayer(OGRLayer *poLayer, bool quiet, bool int64_as_string, return out; } -// [[Rcpp::export]] -Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer, +static void finalize_dataset_xptr(SEXP dataset_xptr) { + GDALDataset *poDS = (GDALDataset*)R_ExternalPtrAddr(dataset_xptr); + if (poDS != nullptr) { + GDALClose(poDS); + } +} + +Rcpp::List CPL_ogr_layer_setup(Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer, Rcpp::CharacterVector query, - Rcpp::CharacterVector options, bool quiet, Rcpp::NumericVector toTypeUser, - Rcpp::CharacterVector fid_column_name, Rcpp::CharacterVector drivers, + Rcpp::CharacterVector options, bool quiet, Rcpp::CharacterVector drivers, Rcpp::CharacterVector wkt_filter, - bool promote_to_multi = true, bool int64_as_string = false, - bool dsn_exists = true, - bool dsn_isdb = false, - int width = 80) { - - // adapted from the OGR tutorial @ www.gdal.org + bool dsn_exists, + bool dsn_isdb, + int width) { + // adapted from the OGR tutorial @ www.gdal.org std::vector open_options = create_options(options, quiet); std::vector drivers_v = create_options(drivers, quiet); GDALDataset *poDS; - poDS = (GDALDataset *) GDALOpenEx( datasource[0], GDAL_OF_VECTOR | GDAL_OF_READONLY, + poDS = (GDALDataset *) GDALOpenEx( datasource[0], GDAL_OF_VECTOR | GDAL_OF_READONLY, drivers.size() ? drivers_v.data() : NULL, open_options.data(), NULL ); if( poDS == NULL ) { // could not open dsn @@ -533,6 +536,11 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector Rcpp::stop("Cannot open %s; The file doesn't seem to exist.", datasource); } + // Will close the dataset if some early return/exception prevents GDALClose() from being + // called/allows the result to be accessed by the caller. + Rcpp::RObject dataset_xptr = R_MakeExternalPtr(poDS, R_NilValue, R_NilValue); + R_RegisterCFinalizer(dataset_xptr, &finalize_dataset_xptr); + if (layer.size() == 0 && Rcpp::CharacterVector::is_na(query[0])) { // no layer specified switch (poDS->GetLayerCount()) { case 0: { // error: @@ -596,7 +604,7 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector Rcpp::Rcout << "Reading layer `" << layer[0] << "' from data source "; // if (LENGTH(datasource[0]) > (width - (34 + LENGTH(layer[0])))) Rcpp::String ds(datasource(0)); - if (layer.size()) { + if (layer.size()) { Rcpp::String la(layer(0)); if (strlen(ds.get_cstring()) > (width - (34 + strlen(la.get_cstring())))) Rcpp::Rcout << std::endl << " "; @@ -607,6 +615,29 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector Rcpp::Rcout << "using driver `" << poDS->GetDriverName() << "'" << std::endl; // #nocov } + // Keeps the dataset external pointer alive as long as the layer external pointer is alive + Rcpp::RObject layer_xptr = R_MakeExternalPtr(poLayer, R_NilValue, dataset_xptr); + + return Rcpp::List::create(dataset_xptr, layer_xptr); +} + +// [[Rcpp::export]] +Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer, + Rcpp::CharacterVector query, + Rcpp::CharacterVector options, bool quiet, Rcpp::NumericVector toTypeUser, + Rcpp::CharacterVector fid_column_name, Rcpp::CharacterVector drivers, + Rcpp::CharacterVector wkt_filter, + bool promote_to_multi = true, bool int64_as_string = false, + bool dsn_exists = true, + bool dsn_isdb = false, + int width = 80) { + Rcpp::List prep = CPL_ogr_layer_setup(datasource, layer, query, options, + quiet, drivers, + wkt_filter, + dsn_exists, dsn_isdb, width); + OGRDataSource* poDS = (OGRDataSource*)(R_ExternalPtrAddr(prep[0])); + OGRLayer* poLayer = (OGRLayer*)R_ExternalPtrAddr(prep[1]); + Rcpp::List out = sf_from_ogrlayer(poLayer, quiet, int64_as_string, toTypeUser, fid_column_name, promote_to_multi); @@ -615,5 +646,6 @@ Rcpp::List CPL_read_ogr(Rcpp::CharacterVector datasource, Rcpp::CharacterVector poDS->ReleaseResultSet(poLayer); GDALClose(poDS); + R_SetExternalPtrAddr(prep[0], nullptr); return out; } diff --git a/src/gdal_read.h b/src/gdal_read.h index 1f541cc8d..a988dd89b 100644 --- a/src/gdal_read.h +++ b/src/gdal_read.h @@ -1,4 +1,14 @@ -Rcpp::List sf_from_ogrlayer(OGRLayer *poLayer, bool quiet, bool int64_as_string, + +Rcpp::List sf_from_ogrlayer(OGRLayer *poLayer, bool quiet, bool int64_as_string, Rcpp::NumericVector toTypeUser, Rcpp::CharacterVector fid_column, bool promote_to_multi); + +Rcpp::List CPL_ogr_layer_setup(Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer, + Rcpp::CharacterVector query, + Rcpp::CharacterVector options, bool quiet, Rcpp::CharacterVector drivers, + Rcpp::CharacterVector wkt_filter, + bool dsn_exists, + bool dsn_isdb, + int width); + Rcpp::List CPL_read_gdal(Rcpp::CharacterVector fname, Rcpp::CharacterVector options, Rcpp::CharacterVector driver, bool read_data, Rcpp::NumericVector NA_value, Rcpp::List RasterIO_parameters); diff --git a/src/gdal_read_stream.cpp b/src/gdal_read_stream.cpp new file mode 100644 index 000000000..44b1c5a15 --- /dev/null +++ b/src/gdal_read_stream.cpp @@ -0,0 +1,133 @@ + +#include + +#define RCPP_DEFAULT_INCLUDE_CALL false +#include "Rcpp.h" + +#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3,6,0) + +#include +#include "gdal_read.h" + +class GDALStreamWrapper { +public: + static void Make(struct ArrowArrayStream* stream, Rcpp::List shelter, + struct ArrowArrayStream* stream_out) { + stream_out->get_schema = &get_schema_wrap; + stream_out->get_next = &get_next_wrap; + stream_out->get_last_error = &get_last_error_wrap; + stream_out->release = &release_wrap; + stream_out->private_data = new GDALStreamWrapper(stream, shelter); + } + + ~GDALStreamWrapper() { + stream_.release(&stream_); + GDALDataset* poDS = (GDALDataset*)R_ExternalPtrAddr(shelter_[0]); + GDALClose(poDS); + R_SetExternalPtrAddr(shelter_[0], nullptr); + } + +private: + // The parent stream as returned from GDAL + struct ArrowArrayStream stream_; + Rcpp::List shelter_; + + GDALStreamWrapper(struct ArrowArrayStream* stream, Rcpp::List shelter): + shelter_(shelter) { + memcpy(&stream_, stream, sizeof(struct ArrowArrayStream)); + stream->release = nullptr; + } + + int get_schema(struct ArrowSchema* out) { + return stream_.get_schema(&stream_, out); + } + + int get_next(struct ArrowArray* out) { + return stream_.get_next(&stream_, out); + } + + const char* get_last_error() { + return stream_.get_last_error(&stream_); + } + + static int get_schema_wrap(struct ArrowArrayStream* stream, struct ArrowSchema* out) { + return reinterpret_cast(stream->private_data)->get_schema(out); + } + + static int get_next_wrap(struct ArrowArrayStream* stream, struct ArrowArray* out) { + return reinterpret_cast(stream->private_data)->get_next(out); + } + + static const char* get_last_error_wrap(struct ArrowArrayStream* stream) { + return reinterpret_cast(stream->private_data)->get_last_error(); + } + + static void release_wrap(struct ArrowArrayStream* stream) { + delete reinterpret_cast(stream->private_data); + stream->release = nullptr; + } +}; + +#endif + +// [[Rcpp::export]] +Rcpp::List CPL_read_gdal_stream( + Rcpp::RObject stream_xptr, + Rcpp::CharacterVector datasource, Rcpp::CharacterVector layer, + Rcpp::CharacterVector query, + Rcpp::CharacterVector options, bool quiet, Rcpp::CharacterVector drivers, + Rcpp::CharacterVector wkt_filter, + bool dsn_exists, + bool dsn_isdb, + Rcpp::CharacterVector fid_column, + int width) { + +#if GDAL_VERSION_NUM >= GDAL_COMPUTE_VERSION(3,6,0) + + const char* array_stream_options[] = {"INCLUDE_FID=NO", nullptr}; + if (fid_column.size() == 1) { + array_stream_options[0] = "INCLUDE_FID=YES"; + } + + Rcpp::List prep = CPL_ogr_layer_setup(datasource, layer, query, options, + quiet, drivers, + wkt_filter, + dsn_exists, dsn_isdb, width); + OGRLayer* poLayer = (OGRLayer*)R_ExternalPtrAddr(prep[1]); + auto stream_out = reinterpret_cast( + R_ExternalPtrAddr(stream_xptr)); + + OGRSpatialReference* crs = poLayer->GetSpatialRef(); + + Rcpp::String wkt_str = NA_STRING; + if (crs != nullptr) { + char* wkt_out; + crs->exportToWkt(&wkt_out); + wkt_str = wkt_out; + CPLFree(wkt_out); + } + + struct ArrowArrayStream stream_temp; + if (!poLayer->GetArrowStream(&stream_temp, array_stream_options)) { + Rcpp::stop("Failed to open ArrayStream from Layer"); + } + + GDALStreamWrapper::Make(&stream_temp, prep, stream_out); + + // The reported feature count is incorrect if there is a query + double num_features; + if (query.size() == 0) { + num_features = (double) poLayer->GetFeatureCount(false); + } else { + num_features = -1; + } + + return Rcpp::List::create(wkt_str, Rcpp::NumericVector::create(num_features)); + +#else + + Rcpp::stop("read_stream() requires GDAL >= 3.6"); + +#endif + +} diff --git a/src/stars.cpp b/src/stars.cpp index b0685de56..423a952c9 100644 --- a/src/stars.cpp +++ b/src/stars.cpp @@ -798,7 +798,7 @@ NumericMatrix CPL_extract(CharacterVector input, NumericMatrix xy, bool interpol } // [[Rcpp::export]] -void CPL_create(CharacterVector file, IntegerVector nxy, NumericVector value, CharacterVector wkt, +void CPL_create(CharacterVector file, IntegerVector nxy, NumericVector value, CharacterVector wkt, NumericVector xlim, NumericVector ylim) { // // modified from gdal/apps/gdal_create.cpp: diff --git a/tests/testthat/test_tm.R b/tests/testthat/test_tm.R index fd8b1a157..1a5e983c3 100644 --- a/tests/testthat/test_tm.R +++ b/tests/testthat/test_tm.R @@ -1,6 +1,6 @@ test_that("st_read and write handle date and time", { Sys.setenv(TZ="") # local time - x = st_sf(a = 1:2, b=c(5.6,3), dt = Sys.Date()+1:2, tm = Sys.time()+2:3, + x = st_sf(a = 1:2, b=c(5.6,3), dt = Sys.Date()+1:2, tm = Sys.time()+2:3, geometry = structure(st_sfc(st_point(c(1,1)), st_point(c(2,2))))) shp <- paste0(tempfile(), ".shp") gpkg <- paste0(tempfile(), ".gpkg") @@ -11,7 +11,7 @@ test_that("st_read and write handle date and time", { x2 = st_read(shp[1], quiet = TRUE) expect_equal(x[-4], x2, check.attributes=FALSE) # WKT2 CRS do not roundtrip for ESRI Shapefile - + st_write(x, gpkg, quiet = TRUE) x2 = st_read(gpkg, quiet = TRUE) expect_equal(x[["a"]], x2[["a"]]) @@ -20,18 +20,18 @@ test_that("st_read and write handle date and time", { expect_equal(x[["tm"]], x2[["tm"]]) Sys.setenv(TZ="UTC") # GMT - x = st_sf(a = 1:2, b=c(5.6,3), dt = Sys.Date()+1:2, tm = Sys.time()+2:3, + x = st_sf(a = 1:2, b=c(5.6,3), dt = Sys.Date()+1:2, tm = Sys.time()+2:3, geometry = structure(st_sfc(st_point(c(1,1)), st_point(c(2,2))))) shp <- paste0(tempfile(), ".shp") gpkg <- paste0(tempfile(), ".gpkg") - + st_crs(x) = st_crs("ENGCRS[\"Undefined Cartesian SRS with unknown unit\",EDATUM[\"Unknown engineering datum\"],CS[Cartesian,2],AXIS[\"X\",unspecified,ORDER[1],LENGTHUNIT[\"unknown\",0]],AXIS[\"Y\",unspecified,ORDER[2],LENGTHUNIT[\"unknown\",0]]]") - + st_write(x[-4], shp[1], quiet = TRUE) x2 = st_read(shp[1], quiet = TRUE) expect_equal(x[-4], x2, check.attributes=FALSE) # WKT2 CRS do not roundtrip for ESRI Shapefile - + st_write(x, gpkg, quiet = TRUE) x2 = st_read(gpkg, quiet = TRUE) expect_equal(x[["a"]], x2[["a"]]) diff --git a/tests/testthat/test_zm_range.R b/tests/testthat/test_zm_range.R index 2726deb64..8bcc4991f 100644 --- a/tests/testthat/test_zm_range.R +++ b/tests/testthat/test_zm_range.R @@ -144,7 +144,6 @@ test_that("transform includes zm in output", { test_that("XYM-only objects correctly calculate M (and not Z)", { - skip_if_not(sf_extSoftVersion()["GDAL"] > "2.1.0") sf_m <- sf::st_read(system.file("/shape/storms_xyzm.shp", package = "sf"), quiet = TRUE)