Skip to content

Commit

Permalink
Lowercase config params
Browse files Browse the repository at this point in the history
  • Loading branch information
blaginin committed Dec 4, 2024
1 parent cb7da8c commit 7c2b3fe
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 27 deletions.
9 changes: 5 additions & 4 deletions datafusion-cli/src/object_storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -471,12 +471,13 @@ mod tests {

#[tokio::test]
async fn s3_object_store_builder() -> Result<()> {
let access_key_id = "fake_access_key_id";
let secret_access_key = "fake_secret_access_key";
// "fake" is uppercase to ensure the values are not lowercased when parsed
let access_key_id = "FAKE_access_key_id";
let secret_access_key = "FAKE_secret_access_key";
let region = "fake_us-east-2";
let endpoint = "endpoint33";
let session_token = "fake_session_token";
let location = "s3://bucket/path/file.parquet";
let session_token = "FAKE_session_token";
let location = "s3://bucket/path/FAKE/file.parquet";

let table_url = ListingTableUrl::parse(location)?;
let scheme = table_url.scheme();
Expand Down
43 changes: 29 additions & 14 deletions datafusion/common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ use crate::{DataFusionError, Result};
/// /// Amazing config
/// pub struct MyConfig {
/// /// Field 1 doc
/// field1: String, default = "".to_string()
/// field1: String, transform = str::to_lowercase, default = "".to_string()
///
/// /// Field 2 doc
/// field2: usize, default = 232
Expand Down Expand Up @@ -67,9 +67,12 @@ use crate::{DataFusionError, Result};
/// fn set(&mut self, key: &str, value: &str) -> Result<()> {
/// let (key, rem) = key.split_once('.').unwrap_or((key, ""));
/// match key {
/// "field1" => self.field1.set(rem, value),
/// "field2" => self.field2.set(rem, value),
/// "field3" => self.field3.set(rem, value),
/// "field1" => {
/// let value = str::to_lowercase(value);
/// self.field1.set(rem, value.as_ref())
/// },
/// "field2" => self.field2.set(rem, value.as_ref()),
/// "field3" => self.field3.set(rem, value.as_ref()),
/// _ => _internal_err!(
/// "Config value \"{}\" not found on MyConfig",
/// key
Expand Down Expand Up @@ -102,15 +105,14 @@ use crate::{DataFusionError, Result};
/// ```
///
/// NB: Misplaced commas may result in nonsensical errors
///
#[macro_export]
macro_rules! config_namespace {
(
$(#[doc = $struct_d:tt])*
$vis:vis struct $struct_name:ident {
$(
$(#[doc = $d:tt])*
$field_vis:vis $field_name:ident : $field_type:ty, default = $default:expr
$field_vis:vis $field_name:ident : $field_type:ty, $(transform = $transform:expr,)? default = $default:expr
)*$(,)*
}
) => {
Expand All @@ -127,9 +129,13 @@ macro_rules! config_namespace {
impl ConfigField for $struct_name {
fn set(&mut self, key: &str, value: &str) -> Result<()> {
let (key, rem) = key.split_once('.').unwrap_or((key, ""));

match key {
$(
stringify!($field_name) => self.$field_name.set(rem, value),
stringify!($field_name) => {
$(let value = $transform(value);)?
self.$field_name.set(rem, value.as_ref())
},
)*
_ => return _config_err!(
"Config value \"{}\" not found on {}", key, stringify!($struct_name)
Expand Down Expand Up @@ -216,7 +222,8 @@ config_namespace! {

/// Configure the SQL dialect used by DataFusion's parser; supported values include: Generic,
/// MySQL, PostgreSQL, Hive, SQLite, Snowflake, Redshift, MsSQL, ClickHouse, BigQuery, and Ansi.
pub dialect: String, default = "generic".to_string()
pub dialect: String, default = "generic".to_string() // no need to lowercase because
// [`sqlparser::dialect_from_str`] is case-insensitive

/// If true, permit lengths for `VARCHAR` such as `VARCHAR(20)`, but
/// ignore the length. If false, error if a `VARCHAR` with a length is
Expand Down Expand Up @@ -431,7 +438,7 @@ config_namespace! {
///
/// Note that this default setting is not the same as
/// the default parquet writer setting.
pub compression: Option<String>, default = Some("zstd(3)".into())
pub compression: Option<String>, transform = str::to_lowercase, default = Some("zstd(3)".into())

/// (writing) Sets if dictionary encoding is enabled. If NULL, uses
/// default parquet writer setting
Expand All @@ -444,7 +451,7 @@ config_namespace! {
/// Valid values are: "none", "chunk", and "page"
/// These values are not case sensitive. If NULL, uses
/// default parquet writer setting
pub statistics_enabled: Option<String>, default = Some("page".into())
pub statistics_enabled: Option<String>, transform = str::to_lowercase, default = Some("page".into())

/// (writing) Sets max statistics size for any column. If NULL, uses
/// default parquet writer setting
Expand All @@ -470,7 +477,7 @@ config_namespace! {
/// delta_byte_array, rle_dictionary, and byte_stream_split.
/// These values are not case sensitive. If NULL, uses
/// default parquet writer setting
pub encoding: Option<String>, default = None
pub encoding: Option<String>, transform = str::to_lowercase, default = None

/// (writing) Use any available bloom filters when reading parquet files
pub bloom_filter_on_read: bool, default = true
Expand Down Expand Up @@ -973,16 +980,24 @@ impl<F: ConfigField + Default> ConfigField for Option<F> {

#[macro_export]
macro_rules! config_field {
($t:ty) => {
($t:ty $(, $transform:expr)?) => {
impl ConfigField for $t {
fn visit<V: Visit>(&self, v: &mut V, key: &str, description: &'static str) {
v.some(key, self, description)
}

fn set(&mut self, _: &str, value: &str) -> Result<()> {
$(
let value = $transform(&value);
)?

*self = value.parse().map_err(|e| {
DataFusionError::Context(
format!(concat!("Error parsing {} as ", stringify!($t),), value),
format!(
"Error parsing '{}' as {}",
value,
stringify!($t),
),
Box::new(DataFusionError::External(Box::new(e))),
)
})?;
Expand All @@ -993,7 +1008,7 @@ macro_rules! config_field {
}

config_field!(String);
config_field!(bool);
config_field!(bool, str::to_lowercase);
config_field!(usize);
config_field!(f64);
config_field!(u64);
Expand Down
2 changes: 1 addition & 1 deletion datafusion/core/src/datasource/stream.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ impl TableProviderFactory for StreamTableFactory {
let header = if let Ok(opt) = cmd
.options
.get("format.has_header")
.map(|has_header| bool::from_str(has_header))
.map(|has_header| bool::from_str(has_header.to_lowercase().as_str()))
.transpose()
{
opt.unwrap_or(false)
Expand Down
17 changes: 13 additions & 4 deletions datafusion/core/tests/config_from_env.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,19 @@ use std::env;
fn from_env() {
// Note: these must be a single test to avoid interference from concurrent execution
let env_key = "DATAFUSION_OPTIMIZER_FILTER_NULL_JOIN_KEYS";
env::set_var(env_key, "true");
let config = ConfigOptions::from_env().unwrap();
// valid testing in different cases
for bool_option in ["true", "TRUE", "True"] {
env::set_var(env_key, bool_option);
let config = ConfigOptions::from_env().unwrap();
env::remove_var(env_key);
assert!(config.optimizer.filter_null_join_keys);
}

// invalid testing
env::set_var(env_key, "ttruee");
let err = ConfigOptions::from_env().unwrap_err().strip_backtrace();
assert_eq!(err, "Error parsing 'ttruee' as bool\ncaused by\nExternal error: provided string was not `true` or `false`");
env::remove_var(env_key);
assert!(config.optimizer.filter_null_join_keys);

let env_key = "DATAFUSION_EXECUTION_BATCH_SIZE";

Expand All @@ -37,7 +46,7 @@ fn from_env() {
// for invalid testing
env::set_var(env_key, "abc");
let err = ConfigOptions::from_env().unwrap_err().strip_backtrace();
assert_eq!(err, "Error parsing abc as usize\ncaused by\nExternal error: invalid digit found in string");
assert_eq!(err, "Error parsing 'abc' as usize\ncaused by\nExternal error: invalid digit found in string");

env::remove_var(env_key);
let config = ConfigOptions::from_env().unwrap();
Expand Down
14 changes: 14 additions & 0 deletions datafusion/sqllogictest/test_files/create_external_table.slt
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,20 @@ OPTIONS (
has_header false,
compression gzip);

# Verify that some options are case insensitive
statement ok
CREATE EXTERNAL TABLE IF NOT EXISTS region (
r_regionkey BIGINT,
r_name VARCHAR,
r_comment VARCHAR,
r_rev VARCHAR,
) STORED AS CSV LOCATION 'test_files/tpch/data/region.tbl'
OPTIONS (
format.delimiter '|',
has_header FALSE,
compression GZIP);


# Create an external parquet table and infer schema to order by

# query should succeed
Expand Down
8 changes: 4 additions & 4 deletions datafusion/sqllogictest/test_files/set_variable.slt
Original file line number Diff line number Diff line change
Expand Up @@ -93,10 +93,10 @@ datafusion.execution.coalesce_batches false
statement ok
set datafusion.catalog.information_schema = true

statement error DataFusion error: Error parsing 1 as bool
statement error DataFusion error: Error parsing '1' as bool
SET datafusion.execution.coalesce_batches to 1

statement error DataFusion error: Error parsing abc as bool
statement error DataFusion error: Error parsing 'abc' as bool
SET datafusion.execution.coalesce_batches to abc

# set u64 variable
Expand Down Expand Up @@ -132,10 +132,10 @@ datafusion.execution.batch_size 2
statement ok
set datafusion.catalog.information_schema = true

statement error DataFusion error: Error parsing -1 as usize
statement error DataFusion error: Error parsing '-1' as usize
SET datafusion.execution.batch_size to -1

statement error DataFusion error: Error parsing abc as usize
statement error DataFusion error: Error parsing 'abc' as usize
SET datafusion.execution.batch_size to abc

statement error External error: invalid digit found in string
Expand Down

0 comments on commit 7c2b3fe

Please sign in to comment.