Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Upgrade to object_store 0.9.0 and arrow 50.0.0 #8758

Merged
merged 14 commits into from
Jan 14, 2024
28 changes: 19 additions & 9 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,13 @@ rust-version = "1.70"
version = "34.0.0"

[workspace.dependencies]
arrow = { version = "49.0.0", features = ["prettyprint"] }
arrow-array = { version = "49.0.0", default-features = false, features = ["chrono-tz"] }
arrow-buffer = { version = "49.0.0", default-features = false }
arrow-flight = { version = "49.0.0", features = ["flight-sql-experimental"] }
arrow-ipc = { version = "49.0.0", default-features = false, features = ["lz4"] }
arrow-ord = { version = "49.0.0", default-features = false }
arrow-schema = { version = "49.0.0", default-features = false }
arrow = { version = "50.0.0", features = ["prettyprint"] }
arrow-array = { version = "50.0.0", default-features = false, features = ["chrono-tz"] }
arrow-buffer = { version = "50.0.0", default-features = false }
arrow-flight = { version = "50.0.0", features = ["flight-sql-experimental"] }
arrow-ipc = { version = "50.0.0", default-features = false, features = ["lz4"] }
arrow-ord = { version = "50.0.0", default-features = false }
arrow-schema = { version = "50.0.0", default-features = false }
async-trait = "0.1.73"
bigdecimal = "0.4.1"
bytes = "1.4"
Expand All @@ -64,9 +64,9 @@ indexmap = "2.0.0"
itertools = "0.12"
log = "^0.4"
num_cpus = "1.13.0"
object_store = { version = "0.8.0", default-features = false }
object_store = { version = "0.9.0", default-features = false }
parking_lot = "0.12"
parquet = { version = "49.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
parquet = { version = "50.0.0", default-features = false, features = ["arrow", "async", "object_store"] }
rand = "0.8"
rstest = "0.18.0"
serde_json = "1"
Expand All @@ -92,3 +92,13 @@ opt-level = 3
overflow-checks = false
panic = 'unwind'
rpath = false

[patch.crates-io]
arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
arrow-array = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
arrow-flight = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
arrow-ipc = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
arrow-ord = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
1 change: 0 additions & 1 deletion benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ rust-version = "1.70"
[features]
ci = []
default = ["mimalloc"]
simd = ["datafusion/simd"]
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

arrow 50 removed the manual SIMD implementation and now relies on auto vectorization - apache/arrow-rs#5184

snmalloc = ["snmalloc-rs"]

[dependencies]
Expand Down
113 changes: 54 additions & 59 deletions datafusion-cli/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 12 additions & 3 deletions datafusion-cli/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ rust-version = "1.70"
readme = "README.md"

[dependencies]
arrow = "49.0.0"
arrow = "50.0.0"
async-trait = "0.1.41"
aws-config = "0.55"
aws-credential-types = "0.55"
Expand All @@ -40,9 +40,9 @@ dirs = "4.0.0"
env_logger = "0.9"
futures = "0.3"
mimalloc = { version = "0.1", default-features = false }
object_store = { version = "0.8.0", features = ["aws", "gcp"] }
object_store = { version = "0.9.0", features = ["aws", "gcp"] }
parking_lot = { version = "0.12" }
parquet = { version = "49.0.0", default-features = false }
parquet = { version = "50.0.0", default-features = false }
regex = "1.8"
rustyline = "11.0"
tokio = { version = "1.24", features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot", "signal"] }
Expand All @@ -53,3 +53,12 @@ assert_cmd = "2.0"
ctor = "0.2.0"
predicates = "3.0"
rstest = "0.17"

[patch.crates-io]
arrow = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
arrow-array = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
arrow-buffer = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
arrow-ipc = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
arrow-ord = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
arrow-schema = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
parquet = { git = "https://github.com/tustvold/arrow-rs.git", rev = "06490e8ab2b986784d2998cfcf74cbf9c025ef10" }
8 changes: 0 additions & 8 deletions datafusion-cli/src/exec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -340,14 +340,6 @@ mod tests {
let session_token = "fake_session_token";
let location = "s3://bucket/path/file.parquet";

// Missing region
alamb marked this conversation as resolved.
Show resolved Hide resolved
let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET
OPTIONS('access_key_id' '{access_key_id}', 'secret_access_key' '{secret_access_key}') LOCATION '{location}'");
let err = create_external_table_test(location, &sql)
.await
.unwrap_err();
assert!(err.to_string().contains("Missing region"));

// Should be OK
let sql = format!("CREATE EXTERNAL TABLE test STORED AS PARQUET
OPTIONS('access_key_id' '{access_key_id}', 'secret_access_key' '{secret_access_key}', 'region' '{region}', 'session_token' '{session_token}') LOCATION '{location}'");
Expand Down
1 change: 0 additions & 1 deletion datafusion/core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@ parquet = ["datafusion-common/parquet", "dep:parquet"]
pyarrow = ["datafusion-common/pyarrow", "parquet"]
regex_expressions = ["datafusion-physical-expr/regex_expressions", "datafusion-optimizer/regex_expressions"]
serde = ["arrow-schema/serde"]
simd = ["arrow/simd"]
alamb marked this conversation as resolved.
Show resolved Hide resolved
unicode_expressions = ["datafusion-physical-expr/unicode_expressions", "datafusion-optimizer/unicode_expressions", "datafusion-sql/unicode_expressions"]

[dependencies]
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/physical_plan/csv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ impl FileOpener for CsvOpener {

let range = match calculated_range {
RangeCalculation::Range(None) => None,
RangeCalculation::Range(Some(range)) => Some(range),
RangeCalculation::Range(Some(range)) => Some(range.into()),
RangeCalculation::TerminateEarly => {
return Ok(
futures::stream::poll_fn(move |_| Poll::Ready(None)).boxed()
Expand Down
Loading
Loading