Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Paleolimbot stream reading #2238

Merged
merged 24 commits into from
Oct 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ Imports:
utils
Suggests:
blob,
nanoarrow,
covr,
dplyr (>= 0.8-3),
ggplot2,
Expand Down
4 changes: 4 additions & 0 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ CPL_read_ogr <- function(datasource, layer, query, options, quiet, toTypeUser, f
.Call(`_sf_CPL_read_ogr`, datasource, layer, query, options, quiet, toTypeUser, fid_column_name, drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, width)
}

CPL_read_gdal_stream <- function(stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column, width) {
.Call(`_sf_CPL_read_gdal_stream`, stream_xptr, datasource, layer, query, options, quiet, drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column, width)
}

CPL_gdalinfo <- function(obj, options, oo, co) {
.Call(`_sf_CPL_gdalinfo`, obj, options, oo, co)
}
Expand Down
12 changes: 6 additions & 6 deletions R/cast_sfg.R
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ ClosePol <- function(mtrx) {
#' @examples
#' # example(st_read)
#' nc = st_read(system.file("shape/nc.shp", package="sf"))
#' mpl <- nc$geometry[[4]]
#' mpl <- st_geometry(nc)[[4]]
#' #st_cast(x) ## error 'argument "to" is missing, with no default'
#' cast_all <- function(xg) {
#' lapply(c("MULTIPOLYGON", "MULTILINESTRING", "MULTIPOINT", "POLYGON", "LINESTRING", "POINT"),
Expand Down Expand Up @@ -81,7 +81,7 @@ st_cast.MULTIPOLYGON <- function(x, to, ...) {
#' @name st_cast
#' @export
#' @examples
#' mls <- st_cast(nc$geometry[[4]], "MULTILINESTRING")
#' mls <- st_cast(st_geometry(nc)[[4]], "MULTILINESTRING")
#' st_sfc(cast_all(mls))
st_cast.MULTILINESTRING <- function(x, to, ...) {
switch(to,
Expand All @@ -108,7 +108,7 @@ st_cast.MULTILINESTRING <- function(x, to, ...) {
#' @name st_cast
#' @export
#' @examples
#' mpt <- st_cast(nc$geometry[[4]], "MULTIPOINT")
#' mpt <- st_cast(st_geometry(nc)[[4]], "MULTIPOINT")
#' st_sfc(cast_all(mpt))
st_cast.MULTIPOINT <- function(x, to, ...) {
switch(to,
Expand All @@ -135,7 +135,7 @@ st_cast.MULTIPOINT <- function(x, to, ...) {
#' @name st_cast
#' @export
#' @examples
#' pl <- st_cast(nc$geometry[[4]], "POLYGON")
#' pl <- st_cast(st_geometry(nc)[[4]], "POLYGON")
#' st_sfc(cast_all(pl))
st_cast.POLYGON <- function(x, to, ...) {
switch(to,
Expand All @@ -156,7 +156,7 @@ st_cast.POLYGON <- function(x, to, ...) {
#' @name st_cast
#' @export
#' @examples
#' ls <- st_cast(nc$geometry[[4]], "LINESTRING")
#' ls <- st_cast(st_geometry(nc)[[4]], "LINESTRING")
#' st_sfc(cast_all(ls))
st_cast.LINESTRING <- function(x, to, ...) {
switch(to,
Expand All @@ -173,7 +173,7 @@ st_cast.LINESTRING <- function(x, to, ...) {
#' @name st_cast
#' @export
#' @examples
#' pt <- st_cast(nc$geometry[[4]], "POINT")
#' pt <- st_cast(st_geometry(nc)[[4]], "POINT")
#' ## st_sfc(cast_all(pt)) ## Error: cannot create MULTIPOLYGON from POINT
#' st_sfc(lapply(c("POINT", "MULTIPOINT"), function(x) st_cast(pt, x)))
st_cast.POINT <- function(x, to, ...) {
Expand Down
96 changes: 82 additions & 14 deletions R/read.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ set_utf8 = function(x) {
#' of LineString and MultiLineString, or of Polygon and MultiPolygon, convert
#' all to the Multi variety; defaults to \code{TRUE}
#' @param stringsAsFactors logical; logical: should character vectors be
#' converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is
#' converted to factors? Default for \code{read_sf} or R version >= 4.1.0 is
#' \code{FALSE}, for \code{st_read} and R version < 4.1.0 equal to
#' \code{default.stringsAsFactors()}
#' @param int64_as_string logical; if TRUE, Int64 attributes are returned as
Expand Down Expand Up @@ -146,7 +146,7 @@ st_read.default = function(dsn, layer, ...) {
}

process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
stringsAsFactors = ifelse(as_tibble, FALSE, sf_stringsAsFactors()),
stringsAsFactors = ifelse(as_tibble, FALSE, sf_stringsAsFactors()),
geometry_column = 1, as_tibble = FALSE, optional = FALSE) {

which.geom = which(vapply(x, function(f) inherits(f, "sfc"), TRUE))
Expand All @@ -156,7 +156,7 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,

# in case no geometry is present:
if (length(which.geom) == 0) {
if (! quiet)
if (! quiet)
warning("no simple feature geometries present: returning a data.frame or tbl_df", call. = FALSE)
x = if (!as_tibble) {
if (any(sapply(x, is.list)))
Expand Down Expand Up @@ -192,8 +192,13 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
for (i in seq_along(lc.other))
x[[ nm.lc[i] ]] = list.cols[[i]]

for (i in seq_along(geom))
x[[ nm[i] ]] = st_sfc(geom[[i]], crs = attr(geom[[i]], "crs")) # computes bbox
for (i in seq_along(geom)) {
if (is.null(attr(geom[[i]], "bbox"))) {
x[[ nm[i] ]] = st_sfc(geom[[i]], crs = attr(geom[[i]], "crs")) # computes bbox
} else {
x[[ nm[i] ]] = geom[[i]]
}
}

x = st_as_sf(x, ...,
sf_column_name = if (is.character(geometry_column)) geometry_column else nm[geometry_column],
Expand All @@ -204,20 +209,72 @@ process_cpl_read_ogr = function(x, quiet = FALSE, ..., check_ring_dir = FALSE,
x
}

# Allow setting the default to TRUE to make it easier to run existing tests
# of st_read() through the stream interface
default_st_read_use_stream = function() {
getOption(
"sf.st_read_use_stream",
identical(Sys.getenv("R_SF_ST_READ_USE_STREAM"), "true")
)
}

process_cpl_read_ogr_stream = function(x, default_crs, num_features, fid_column_name,
crs = NULL, ...) {
is_geometry_column = vapply(
x$get_schema()$children,
function(s) identical(s$metadata[["ARROW:extension:name"]], "ogc.wkb"),
logical(1)
)

crs = if (is.null(crs)) st_crs(default_crs) else st_crs(crs)
if (num_features == -1) {
num_features = NULL
}
df = suppressWarnings(nanoarrow::convert_array_stream(x, size = num_features))

df[is_geometry_column] = lapply(df[is_geometry_column], function(x) {
class(x) <- "WKB"
x <- st_as_sfc(x)
st_set_crs(x, crs)
})

# # Prefer "geometry" as the geometry column name
# if (any(is_geometry_column) && !("geometry" %in% names(df))) {
# names(df)[which(is_geometry_column)[1]] = "geometry"
# }

# Rename OGC_FID to fid_column_name and move to end
if (length(fid_column_name) == 1 && "OGC_FID" %in% names(df)) {
df <- df[c(setdiff(names(df), "OGC_FID"), "OGC_FID")]
names(df)[names(df) == "OGC_FID"] = fid_column_name
}

# Move geometry to the end
# if ("geometry" %in% names(df)) {
# df <- df[c(setdiff(names(df), "geometry"), "geometry")]
# }
gc1 = which(is_geometry_column)[1]
df = df[c(setdiff(seq_along(df), gc1), gc1)]

process_cpl_read_ogr(df, ...)
}

#' @name st_read
#' @param fid_column_name character; name of column to write feature IDs to; defaults to not doing this
#' @param drivers character; limited set of driver short names to be tried (default: try all)
#' @param wkt_filter character; WKT representation of a spatial filter (may be used as bounding box, selecting overlapping geometries); see examples
#' @param optional logical; passed to \link[base]{as.data.frame}; always \code{TRUE} when \code{as_tibble} is \code{TRUE}
#' @param use_stream Use TRUE to use the experimental columnar interface introduced in GDAL 3.6.
#' @note The use of \code{system.file} in examples make sure that examples run regardless where R is installed:
#' typical users will not use \code{system.file} but give the file name directly, either with full path or relative
#' to the current working directory (see \link{getwd}). "Shapefiles" consist of several files with the same basename
#' that reside in the same directory, only one of them having extension \code{.shp}.
#' @export
st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet = FALSE, geometry_column = 1L,
st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet = FALSE, geometry_column = 1L,
type = 0, promote_to_multi = TRUE, stringsAsFactors = sf_stringsAsFactors(),
int64_as_string = FALSE, check_ring_dir = FALSE, fid_column_name = character(0),
drivers = character(0), wkt_filter = character(0), optional = FALSE) {
drivers = character(0), wkt_filter = character(0), optional = FALSE,
use_stream = default_st_read_use_stream()) {

layer = if (missing(layer))
character(0)
Expand All @@ -233,11 +290,22 @@ st_read.character = function(dsn, layer, ..., query = NA, options = NULL, quiet
if (length(promote_to_multi) > 1)
stop("`promote_to_multi' should have length one, and applies to all geometry columns")

x = CPL_read_ogr(dsn, layer, query, as.character(options), quiet, type, fid_column_name,
drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width"))
process_cpl_read_ogr(x, quiet, check_ring_dir = check_ring_dir,
stringsAsFactors = stringsAsFactors, geometry_column = geometry_column,
optional = optional, ...)


if (use_stream) {
stream = nanoarrow::nanoarrow_allocate_array_stream()
info = CPL_read_gdal_stream(stream, dsn, layer, query, as.character(options), quiet,
drivers, wkt_filter, dsn_exists, dsn_isdb, fid_column_name, getOption("width"))
process_cpl_read_ogr_stream(stream, default_crs = info[[1]], num_features = info[[2]],
fid_column_name = fid_column_name, stringsAsFactors = stringsAsFactors, quiet = quiet, ...)
} else {
x = CPL_read_ogr(dsn, layer, query, as.character(options), quiet, type, fid_column_name,
drivers, wkt_filter, promote_to_multi, int64_as_string, dsn_exists, dsn_isdb, getOption("width"))

process_cpl_read_ogr(x, quiet, check_ring_dir = check_ring_dir,
stringsAsFactors = stringsAsFactors, geometry_column = geometry_column,
optional = optional, ...)
}
}

#' @name st_read
Expand Down Expand Up @@ -606,7 +674,7 @@ print.sf_layers = function(x, ...) {
#' @param options character; driver dependent dataset open options, multiple options supported.
#' @param do_count logical; if TRUE, count the features by reading them, even if their count is not reported by the driver
#' @name st_layers
#' @return list object of class \code{sf_layers} with elements
#' @return list object of class \code{sf_layers} with elements
#' \describe{
#' \item{name}{name of the layer}
#' \item{geomtype}{list with for each layer the geometry types}
Expand Down Expand Up @@ -751,7 +819,7 @@ check_append_delete <- function(append, delete) {

#' @name st_write
#' @export
#' @details st_delete deletes layer(s) in a data source, or a data source if layers are
#' @details st_delete deletes layer(s) in a data source, or a data source if layers are
#' omitted; it returns TRUE on success, FALSE on failure, invisibly.
st_delete = function(dsn, layer = character(0), driver = guess_driver_can_write(dsn), quiet = FALSE) {
invisible(CPL_delete_ogr(dsn, layer, driver, quiet) == 0)
Expand Down
7 changes: 2 additions & 5 deletions R/tidyverse.R
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,6 @@ mutate.sf <- function(.data, ..., .dots) {
#' @name tidyverse
#' @examples
#' if (require(dplyr, quietly = TRUE)) {
#' nc %>% transmute(AREA = AREA/10, geometry = geometry) %>% class()
#' nc %>% transmute(AREA = AREA/10) %>% class()
#' }
transmute.sf <- function(.data, ..., .dots) {
Expand All @@ -144,9 +143,7 @@ transmute.sf <- function(.data, ..., .dots) {
#' @examples
#' if (require(dplyr, quietly = TRUE)) {
#' nc %>% select(SID74, SID79) %>% names()
#' nc %>% select(SID74, SID79, geometry) %>% names()
#' nc %>% select(SID74, SID79) %>% class()
#' nc %>% select(SID74, SID79, geometry) %>% class()
#' }
#' @details \code{select} keeps the geometry regardless whether it is selected or not; to deselect it, first pipe through \code{as.data.frame} to let dplyr's own \code{select} drop it.
select.sf <- function(.data, ...) {
Expand Down Expand Up @@ -391,7 +388,7 @@ distinct.sf <- function(.data, ..., .keep_all = FALSE) {
#' @param na.rm see original function docs
#' @param factor_key see original function docs
#' @examples
#' if (require(tidyr, quietly = TRUE) && require(dplyr, quietly = TRUE)) {
#' if (require(tidyr, quietly = TRUE) && require(dplyr, quietly = TRUE) && "geometry" %in% names(nc)) {
#' nc %>% select(SID74, SID79) %>% gather("VAR", "SID", -geometry) %>% summary()
#' }
gather.sf <- function(data, key, value, ..., na.rm = FALSE, convert = FALSE, factor_key = FALSE) {
Expand Down Expand Up @@ -527,7 +524,7 @@ pivot_wider.sf = function(data,
#' @param fill see original function docs
#' @param drop see original function docs
#' @examples
#' if (require(tidyr, quietly = TRUE) && require(dplyr, quietly = TRUE)) {
#' if (require(tidyr, quietly = TRUE) && require(dplyr, quietly = TRUE) && "geometry" %in% names(nc)) {
#' nc$row = 1:100 # needed for spread to work
#' nc %>% select(SID74, SID79, geometry, row) %>%
#' gather("VAR", "SID", -geometry, -row) %>%
Expand Down
1 change: 1 addition & 0 deletions R/transform.R
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ chk_mpol = function(x) {
sanity_check = function(x) {
d = st_dimension(x) # flags empty geoms as NA
if (any(d == 2, na.rm = TRUE)) { # the polygon stuff
x = st_cast(x[d == 2]) # convert GEOMETRY to POLYGON or MULTIPOLYGON, if possible
if (inherits(x, "sfc_POLYGON"))
st_sfc(lapply(x, chk_pol), crs = st_crs(x))
else if (inherits(x, "sfc_MULTIPOLYGON"))
Expand Down
12 changes: 6 additions & 6 deletions man/st_cast.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/st_layers.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 5 additions & 2 deletions man/st_read.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/st_write.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 2 additions & 5 deletions man/tidyverse.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading