Skip to content

Commit

Permalink
Merge branch 'main' into bugfix/do-not-normalize-values
Browse files Browse the repository at this point in the history
  • Loading branch information
blaginin committed Dec 9, 2024
2 parents 0574ab8 + d3cfc45 commit 2045495
Show file tree
Hide file tree
Showing 207 changed files with 3,522 additions and 2,107 deletions.
11 changes: 7 additions & 4 deletions .github/workflows/rust.yml
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,12 @@ jobs:
- name: Check datafusion-common without default features
run: cargo check --all-targets --no-default-features -p datafusion-common

- name: Check datafusion-functions
- name: Check datafusion-functions without default features
run: cargo check --all-targets --no-default-features -p datafusion-functions

- name: Check datafusion-substrait without default features
run: cargo check --all-targets --no-default-features -p datafusion-substrait

- name: Check workspace in debug mode
run: cargo check --all-targets --workspace

Expand Down Expand Up @@ -582,9 +585,9 @@ jobs:
#
# To reproduce:
# 1. Install the version of Rust that is failing. Example:
# rustup install 1.79.0
# rustup install 1.80.1
# 2. Run the command that failed with that version. Example:
# cargo +1.79.0 check -p datafusion
# cargo +1.80.1 check -p datafusion
#
# To resolve, either:
# 1. Change your code to use older Rust features,
Expand All @@ -603,4 +606,4 @@ jobs:
run: cargo msrv --output-format json --log-target stdout verify
- name: Check datafusion-cli
working-directory: datafusion-cli
run: cargo msrv --output-format json --log-target stdout verify
run: cargo msrv --output-format json --log-target stdout verify
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ datafusion/sqllogictest/test_files/scratch*
# temp file for core
datafusion/core/*.parquet

# Generated core benchmark data
datafusion/core/benches/data/*

# rat
filtered_rat.txt
rat.txt
4 changes: 3 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ members = [
"datafusion/functions",
"datafusion/functions-aggregate",
"datafusion/functions-aggregate-common",
"datafusion/functions-table",
"datafusion/functions-nested",
"datafusion/functions-window",
"datafusion/functions-window-common",
Expand Down Expand Up @@ -64,7 +65,7 @@ homepage = "https://datafusion.apache.org"
license = "Apache-2.0"
readme = "README.md"
repository = "https://github.com/apache/datafusion"
rust-version = "1.79"
rust-version = "1.80.1"
version = "43.0.0"

[workspace.dependencies]
Expand Down Expand Up @@ -110,6 +111,7 @@ datafusion-functions = { path = "datafusion/functions", version = "43.0.0" }
datafusion-functions-aggregate = { path = "datafusion/functions-aggregate", version = "43.0.0" }
datafusion-functions-aggregate-common = { path = "datafusion/functions-aggregate-common", version = "43.0.0" }
datafusion-functions-nested = { path = "datafusion/functions-nested", version = "43.0.0" }
datafusion-functions-table = { path = "datafusion/functions-table", version = "43.0.0" }
datafusion-functions-window = { path = "datafusion/functions-window", version = "43.0.0" }
datafusion-functions-window-common = { path = "datafusion/functions-window-common", version = "43.0.0" }
datafusion-macros = { path = "datafusion/macros", version = "43.0.0" }
Expand Down
11 changes: 7 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -126,14 +126,17 @@ Optional features:

## Rust Version Compatibility Policy

DataFusion's Minimum Required Stable Rust Version (MSRV) policy is to support stable [4 latest
Rust versions](https://releases.rs) OR the stable minor Rust version as of 4 months, whichever is lower.
The Rust toolchain releases are tracked at [Rust Versions](https://releases.rs) and follow
[semantic versioning](https://semver.org/). A Rust toolchain release can be identified
by a version string like `1.80.0`, or more generally `major.minor.patch`.

DataFusion's supports the last 4 stable Rust minor versions released and any such versions released within the last 4 months.

For example, given the releases `1.78.0`, `1.79.0`, `1.80.0`, `1.80.1` and `1.81.0` DataFusion will support 1.78.0, which is 3 minor versions prior to the most minor recent `1.81`.

If a hotfix is released for the minimum supported Rust version (MSRV), the MSRV will be the minor version with all hotfixes, even if it surpasses the four-month window.
Note: If a Rust hotfix is released for the current MSRV, the MSRV will be updated to the specific minor version that includes all applicable hotfixes preceding other policies.

We enforce this policy using a [MSRV CI Check](https://github.com/search?q=repo%3Aapache%2Fdatafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&type=code)
DataFusion enforces MSRV policy using a [MSRV CI Check](https://github.com/search?q=repo%3Aapache%2Fdatafusion+rust-version+language%3ATOML+path%3A%2F%5ECargo.toml%2F&type=code)

## DataFusion API evolution policy

Expand Down
35 changes: 18 additions & 17 deletions benchmarks/src/bin/external_aggr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use std::sync::OnceLock;
use std::sync::LazyLock;
use structopt::StructOpt;

use arrow::record_batch::RecordBatch;
Expand All @@ -33,7 +33,8 @@ use datafusion::datasource::{MemTable, TableProvider};
use datafusion::error::Result;
use datafusion::execution::memory_pool::FairSpillPool;
use datafusion::execution::memory_pool::{human_readable_size, units};
use datafusion::execution::runtime_env::RuntimeConfig;
use datafusion::execution::runtime_env::RuntimeEnvBuilder;
use datafusion::execution::SessionStateBuilder;
use datafusion::physical_plan::display::DisplayableExecutionPlan;
use datafusion::physical_plan::{collect, displayable};
use datafusion::prelude::*;
Expand Down Expand Up @@ -90,7 +91,13 @@ struct QueryResult {
/// Memory limits to run: 64MiB, 32MiB, 16MiB
/// Q2 requires 250MiB for aggregation
/// Memory limits to run: 512MiB, 256MiB, 128MiB, 64MiB, 32MiB
static QUERY_MEMORY_LIMITS: OnceLock<HashMap<usize, Vec<u64>>> = OnceLock::new();
static QUERY_MEMORY_LIMITS: LazyLock<HashMap<usize, Vec<u64>>> = LazyLock::new(|| {
use units::*;
let mut map = HashMap::new();
map.insert(1, vec![64 * MB, 32 * MB, 16 * MB]);
map.insert(2, vec![512 * MB, 256 * MB, 128 * MB, 64 * MB, 32 * MB]);
map
});

impl ExternalAggrConfig {
const AGGR_TABLES: [&'static str; 1] = ["lineitem"];
Expand All @@ -113,16 +120,6 @@ impl ExternalAggrConfig {
"#,
];

fn init_query_memory_limits() -> &'static HashMap<usize, Vec<u64>> {
use units::*;
QUERY_MEMORY_LIMITS.get_or_init(|| {
let mut map = HashMap::new();
map.insert(1, vec![64 * MB, 32 * MB, 16 * MB]);
map.insert(2, vec![512 * MB, 256 * MB, 128 * MB, 64 * MB, 32 * MB]);
map
})
}

/// If `--query` and `--memory-limit` is not speicified, run all queries
/// with pre-configured memory limits
/// If only `--query` is specified, run the query with all memory limits
Expand Down Expand Up @@ -160,8 +157,7 @@ impl ExternalAggrConfig {
query_executions.push((query_id, limit));
}
None => {
let memory_limits_table = Self::init_query_memory_limits();
let memory_limits = memory_limits_table.get(&query_id).unwrap();
let memory_limits = QUERY_MEMORY_LIMITS.get(&query_id).unwrap();
for limit in memory_limits {
query_executions.push((query_id, *limit));
}
Expand Down Expand Up @@ -195,10 +191,15 @@ impl ExternalAggrConfig {
let query_name =
format!("Q{query_id}({})", human_readable_size(mem_limit as usize));
let config = self.common.config();
let runtime_config = RuntimeConfig::new()
let runtime_env = RuntimeEnvBuilder::new()
.with_memory_pool(Arc::new(FairSpillPool::new(mem_limit as usize)))
.build_arc()?;
let ctx = SessionContext::new_with_config_rt(config, runtime_config);
let state = SessionStateBuilder::new()
.with_config(config)
.with_runtime_env(runtime_env)
.with_default_features()
.build();
let ctx = SessionContext::from(state);

// register tables
self.register_tables(&ctx).await?;
Expand Down
10 changes: 6 additions & 4 deletions benchmarks/src/sort_tpch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ use datafusion::datasource::listing::{
};
use datafusion::datasource::{MemTable, TableProvider};
use datafusion::error::Result;
use datafusion::execution::runtime_env::RuntimeConfig;
use datafusion::execution::SessionStateBuilder;
use datafusion::physical_plan::display::DisplayableExecutionPlan;
use datafusion::physical_plan::{displayable, execute_stream};
use datafusion::prelude::*;
Expand Down Expand Up @@ -188,9 +188,11 @@ impl RunOpt {
/// Benchmark query `query_id` in `SORT_QUERIES`
async fn benchmark_query(&self, query_id: usize) -> Result<Vec<QueryResult>> {
let config = self.common.config();

let runtime_config = RuntimeConfig::new().build_arc()?;
let ctx = SessionContext::new_with_config_rt(config, runtime_config);
let state = SessionStateBuilder::new()
.with_config(config)
.with_default_features()
.build();
let ctx = SessionContext::from(state);

// register tables
self.register_tables(&ctx).await?;
Expand Down
Loading

0 comments on commit 2045495

Please sign in to comment.