From ebf2f5a7416daab0477d1f9ee89f4706ef3a4479 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Thu, 25 Jan 2024 10:18:23 +0100 Subject: [PATCH 01/27] VTX-4075: ore verbose display for union --- .../enforce_distribution.rs | 10 ++-- .../src/physical_optimizer/enforce_sorting.rs | 50 +++++++++---------- datafusion/physical-plan/src/union.rs | 11 +++- 3 files changed, 40 insertions(+), 31 deletions(-) diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs index f2e04989ef66..126253df247a 100644 --- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs @@ -3574,7 +3574,7 @@ pub(crate) mod tests { // should not repartition / sort (as the data was already sorted) let expected = &[ "SortPreservingMergeExec: [c@2 ASC]", - "UnionExec", + "UnionExec: sort_expr=[c@2 ASC]", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", ]; @@ -3584,7 +3584,7 @@ pub(crate) mod tests { let expected = &[ "SortExec: expr=[c@2 ASC]", "CoalescePartitionsExec", - "UnionExec", + "UnionExec: sort_expr=[c@2 ASC]", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", ]; @@ -4180,13 +4180,13 @@ pub(crate) mod tests { // should not sort (as the data was already sorted) let expected_parquet = &[ "SortPreservingMergeExec: [c@2 ASC]", - "UnionExec", + "UnionExec: sort_expr=[c@2 ASC]", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", ]; let expected_csv = &[ "SortPreservingMergeExec: [c@2 ASC]", - "UnionExec", + "UnionExec: sort_expr=[c@2 ASC]", "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], has_header=false", "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], has_header=false", ]; @@ -4652,4 +4652,4 @@ pub(crate) mod tests { Ok(()) } -} +} \ No newline at end of file diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs index 277404b301c4..d042329ac999 100644 --- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs @@ -1185,7 +1185,7 @@ mod tests { let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " GlobalLimitExec: skip=0, fetch=100", " LocalLimitExec: fetch=100", @@ -1196,7 +1196,7 @@ mod tests { let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " GlobalLimitExec: skip=0, fetch=100", " LocalLimitExec: fetch=100", @@ -1272,7 +1272,7 @@ mod tests { // one input to the union is already sorted, one is not. let expected_input = vec![ "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", @@ -1303,7 +1303,7 @@ mod tests { // one input to the union is already sorted, one is not. let expected_input = vec![ "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC]", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", @@ -1335,13 +1335,13 @@ mod tests { // First ParquetExec has output ordering(nullable_col@0 ASC). However, it doesn't satisfy the // required ordering of SortPreservingMergeExec. let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", @@ -1373,7 +1373,7 @@ mod tests { // Second input to the union is already Sorted (matches with the required ordering by the SortPreservingMergeExec above). // Third input to the union is not Sorted (SortExec is matches required ordering by the SortPreservingMergeExec above). let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", @@ -1381,7 +1381,7 @@ mod tests { " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; // should adjust sorting in the first input of the union such that it is not unnecessarily fine let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", @@ -1414,14 +1414,14 @@ mod tests { // `UnionExec` satisfy the ordering, OR add a single sort after // the `UnionExec` (both of which are equally good for this example). let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", @@ -1463,13 +1463,13 @@ mod tests { // example below. However, we should be able to change the unnecessarily // fine `SortExec`s below with required `SortExec`s that are absolutely necessary. let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " SortExec: expr=[nullable_col@0 ASC]", @@ -1515,7 +1515,7 @@ mod tests { // Should adjust the requirement in the third input of the union so // that it is not unnecessarily fine. let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", @@ -1544,14 +1544,14 @@ mod tests { // Union has unnecessarily fine ordering below it. We should be able to replace them with absolutely necessary ordering. let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; // Union preserves the inputs ordering and we should not change any of the SortExecs under UnionExec let expected_output = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " SortExec: expr=[nullable_col@0 ASC]", @@ -1642,7 +1642,7 @@ mod tests { let expected_input = [ "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow }], mode=[Sorted]", " SortPreservingMergeExec: [nullable_col@0 DESC NULLS LAST]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 DESC NULLS LAST]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST]", @@ -1650,7 +1650,7 @@ mod tests { let expected_optimized = [ "WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(NULL) }]", " SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -1679,14 +1679,14 @@ mod tests { // The unnecessary SortExecs should be removed let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow }], mode=[Sorted]", " SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"]; let expected_optimized = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow }], mode=[Sorted]", " SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -1725,7 +1725,7 @@ mod tests { // Should not change the unnecessarily fine `SortExec`s because there is `LimitExec` let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " GlobalLimitExec: skip=0, fetch=100", @@ -1733,7 +1733,7 @@ mod tests { " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec", + " UnionExec: sort_expr=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " GlobalLimitExec: skip=0, fetch=100", @@ -2162,7 +2162,7 @@ mod tests { ]; let expected_optimized = [ "SortPreservingMergeExec: [a@0 ASC]", - " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC", + " SortPreservingRepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, sort_exprs=a@0 ASC", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC], has_header=false", ]; @@ -2191,7 +2191,7 @@ mod tests { ]; let expected_optimized = [ "SortPreservingMergeExec: [a@0 ASC]", - " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC", + " SortPreservingRepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, sort_exprs=a@0 ASC", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC], has_header=false", ]; @@ -2263,7 +2263,7 @@ mod tests { let expected_input = [ "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow }], mode=[Sorted]", " SortPreservingMergeExec: [a@0 ASC,b@1 ASC]", - " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC,b@1 ASC", + " SortPreservingRepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, sort_exprs=a@0 ASC,b@1 ASC", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " SortExec: expr=[a@0 ASC,b@1 ASC]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", @@ -2279,4 +2279,4 @@ mod tests { assert_optimized!(expected_input, expected_optimized, physical_plan, false); Ok(()) } -} +} \ No newline at end of file diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index 14ef9c2ec27b..8a0935a0b3d0 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -120,7 +120,16 @@ impl DisplayAs for UnionExec { ) -> std::fmt::Result { match t { DisplayFormatType::Default | DisplayFormatType::Verbose => { - write!(f, "UnionExec") + match self.output_ordering() { + Some(exprs) => { + write!( + f, + "UnionExec: sort_expr=[{}]", + PhysicalSortExpr::format_list(exprs) + ) + } + _ => write!(f, "UnionExec"), + } } } } From edc5a5fa2bb67ee7e2410e950a996bb96c041530 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Thu, 25 Jan 2024 10:25:59 +0100 Subject: [PATCH 02/27] VTX-4075: fmt --- datafusion/core/src/physical_optimizer/enforce_distribution.rs | 2 +- datafusion/core/src/physical_optimizer/enforce_sorting.rs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs index 126253df247a..a06456ea41e6 100644 --- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs @@ -4652,4 +4652,4 @@ pub(crate) mod tests { Ok(()) } -} \ No newline at end of file +} diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs index d042329ac999..81eb3ada4c2a 100644 --- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs @@ -2279,4 +2279,4 @@ mod tests { assert_optimized!(expected_input, expected_optimized, physical_plan, false); Ok(()) } -} \ No newline at end of file +} From 28ade246d2b119265cbfbaef2d1cbdcc0d89f261 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Thu, 25 Jan 2024 10:34:03 +0100 Subject: [PATCH 03/27] VTX-4075: clippy --- datafusion/common/src/error.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index 56b52bd73f9b..045768d1fba1 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -581,7 +581,6 @@ mod test { #[cfg(not(feature = "backtrace"))] #[test] - #[allow(clippy::unnecessary_literal_unwrap)] fn test_disabled_backtrace() { let res: Result<(), DataFusionError> = plan_err!("Err"); let res = res.unwrap_err().to_string(); @@ -645,7 +644,6 @@ mod test { } #[test] - #[allow(clippy::unnecessary_literal_unwrap)] fn test_make_error_parse_input() { let res: Result<(), DataFusionError> = plan_err!("Err"); let res = res.unwrap_err(); From 85e8504fc2adb8d0ea6a85822b1927098b02d73f Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Thu, 25 Jan 2024 10:40:31 +0100 Subject: [PATCH 04/27] VTX-4075: clippy --- datafusion/common/src/error.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/common/src/error.rs b/datafusion/common/src/error.rs index 045768d1fba1..a3c950b201b4 100644 --- a/datafusion/common/src/error.rs +++ b/datafusion/common/src/error.rs @@ -530,6 +530,7 @@ macro_rules! sql_err { // To avoid compiler error when using macro in the same crate: // macros from the current crate cannot be referred to by absolute paths +#[allow(unused_imports)] pub use exec_err as _exec_err; pub use internal_err as _internal_err; pub use not_impl_err as _not_impl_err; From e5a8db7f429627bdba1eb9a514c8d26a66fefdb2 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Thu, 25 Jan 2024 11:34:25 +0100 Subject: [PATCH 05/27] VTX-4075: display for projection --- datafusion/physical-plan/src/projection.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index cc2ab62049ed..3e185e4ff4b8 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -141,7 +141,17 @@ impl DisplayAs for ProjectionExec { }) .collect(); - write!(f, "ProjectionExec: expr=[{}]", expr.join(", ")) + match self.output_ordering() { + Some(exprs) => { + write!( + f, + "ProjectionExec: expr=[{}], sort_expr=[{}]", + expr.join(", "), + PhysicalSortExpr::format_list(exprs) + ) + } + _ => write!(f, "ProjectionExec: expr=[{}]", expr.join(", ")), + } } } } From 25afc102a777e3f1a595dca85b7e06a607f564c4 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Thu, 25 Jan 2024 15:12:33 +0100 Subject: [PATCH 06/27] VTX-4075: clippy --- .../physical_optimizer/projection_pushdown.rs | 1 + .../src/simplify_expressions/guarantees.rs | 1 + .../physical-expr/src/array_expressions.rs | 2 +- datafusion/physical-plan/src/common.rs | 30 +++++++++++++++++++ .../src/engines/datafusion_engine/mod.rs | 2 +- 5 files changed, 34 insertions(+), 2 deletions(-) diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs index 664afbe822ff..b5eab01daf8c 100644 --- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs @@ -985,6 +985,7 @@ fn join_table_borders( /// Tries to update the equi-join `Column`'s of a join as if the the input of /// the join was replaced by a projection. +#[allow(clippy::map_identity)] fn update_join_on( proj_left_exprs: &[(Column, String)], proj_right_exprs: &[(Column, String)], diff --git a/datafusion/optimizer/src/simplify_expressions/guarantees.rs b/datafusion/optimizer/src/simplify_expressions/guarantees.rs index 860dc326b9b0..f67af5c84300 100644 --- a/datafusion/optimizer/src/simplify_expressions/guarantees.rs +++ b/datafusion/optimizer/src/simplify_expressions/guarantees.rs @@ -43,6 +43,7 @@ pub(crate) struct GuaranteeRewriter<'a> { } impl<'a> GuaranteeRewriter<'a> { + #[allow(clippy::map_identity)] pub fn new( guarantees: impl IntoIterator, ) -> Self { diff --git a/datafusion/physical-expr/src/array_expressions.rs b/datafusion/physical-expr/src/array_expressions.rs index 7fa97dad7aa6..7e0a867a82a1 100644 --- a/datafusion/physical-expr/src/array_expressions.rs +++ b/datafusion/physical-expr/src/array_expressions.rs @@ -2147,7 +2147,7 @@ pub fn general_array_distinct( let last_offset: OffsetSize = offsets.last().copied().unwrap(); offsets.push(last_offset + OffsetSize::usize_as(rows.len())); let arrays = converter.convert_rows(rows)?; - let array = match arrays.get(0) { + let array = match arrays.first() { Some(array) => array.clone(), None => { return internal_err!("array_distinct: failed to get array from rows") diff --git a/datafusion/physical-plan/src/common.rs b/datafusion/physical-plan/src/common.rs index 649f3a31aa7e..87832dd75bc7 100644 --- a/datafusion/physical-plan/src/common.rs +++ b/datafusion/physical-plan/src/common.rs @@ -388,6 +388,7 @@ mod tests { datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, }; + use arrow_schema::TimeUnit; use datafusion_expr::Operator; use datafusion_physical_expr::expressions::{col, Column}; @@ -667,6 +668,35 @@ mod tests { Ok(()) } + #[test] + fn test_meet_of_orderings_sort() -> Result<()> { + let schema = Arc::new(Schema::new(vec![Field::new( + "f32", + DataType::Timestamp(TimeUnit::Nanosecond, None), + false, + )])); + let sort_expr = vec![PhysicalSortExpr { + expr: col("f32", &schema).unwrap(), + options: SortOptions { + descending: true, + nulls_first: true, + }, + }]; + let memory_exec = Arc::new(MemoryExec::try_new(&[], schema.clone(), None)?) as _; + let sort_exec = Arc::new(SortExec::new(sort_expr.clone(), memory_exec)) + as Arc; + let memory_exec2 = Arc::new(MemoryExec::try_new(&[], schema, None)?) as _; + // memory_exec2 doesn't have output ordering + let union_exec = UnionExec::new(vec![sort_exec.clone(), memory_exec2]); + let res = get_meet_of_orderings(union_exec.inputs()); + assert!(res.is_none()); + + let union_exec = UnionExec::new(vec![sort_exec.clone(), sort_exec]); + let res = get_meet_of_orderings(union_exec.inputs()); + assert_eq!(res, Some(&sort_expr[..])); + Ok(()) + } + #[test] fn test_compute_record_batch_statistics_empty() -> Result<()> { let schema = Arc::new(Schema::new(vec![ diff --git a/datafusion/sqllogictest/src/engines/datafusion_engine/mod.rs b/datafusion/sqllogictest/src/engines/datafusion_engine/mod.rs index 663bbdd5a3c7..35b4a9754111 100644 --- a/datafusion/sqllogictest/src/engines/datafusion_engine/mod.rs +++ b/datafusion/sqllogictest/src/engines/datafusion_engine/mod.rs @@ -21,5 +21,5 @@ mod normalize; mod runner; pub use error::*; -pub use normalize::*; + pub use runner::*; From fd80a29370b9b323814b1aff6ece578f15cfce64 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Thu, 25 Jan 2024 15:13:55 +0100 Subject: [PATCH 07/27] VTX-4075: cleanup --- datafusion/physical-plan/src/common.rs | 30 -------------------------- 1 file changed, 30 deletions(-) diff --git a/datafusion/physical-plan/src/common.rs b/datafusion/physical-plan/src/common.rs index 87832dd75bc7..649f3a31aa7e 100644 --- a/datafusion/physical-plan/src/common.rs +++ b/datafusion/physical-plan/src/common.rs @@ -388,7 +388,6 @@ mod tests { datatypes::{DataType, Field, Schema}, record_batch::RecordBatch, }; - use arrow_schema::TimeUnit; use datafusion_expr::Operator; use datafusion_physical_expr::expressions::{col, Column}; @@ -668,35 +667,6 @@ mod tests { Ok(()) } - #[test] - fn test_meet_of_orderings_sort() -> Result<()> { - let schema = Arc::new(Schema::new(vec![Field::new( - "f32", - DataType::Timestamp(TimeUnit::Nanosecond, None), - false, - )])); - let sort_expr = vec![PhysicalSortExpr { - expr: col("f32", &schema).unwrap(), - options: SortOptions { - descending: true, - nulls_first: true, - }, - }]; - let memory_exec = Arc::new(MemoryExec::try_new(&[], schema.clone(), None)?) as _; - let sort_exec = Arc::new(SortExec::new(sort_expr.clone(), memory_exec)) - as Arc; - let memory_exec2 = Arc::new(MemoryExec::try_new(&[], schema, None)?) as _; - // memory_exec2 doesn't have output ordering - let union_exec = UnionExec::new(vec![sort_exec.clone(), memory_exec2]); - let res = get_meet_of_orderings(union_exec.inputs()); - assert!(res.is_none()); - - let union_exec = UnionExec::new(vec![sort_exec.clone(), sort_exec]); - let res = get_meet_of_orderings(union_exec.inputs()); - assert_eq!(res, Some(&sort_expr[..])); - Ok(()) - } - #[test] fn test_compute_record_batch_statistics_empty() -> Result<()> { let schema = Arc::new(Schema::new(vec![ From beece8b426b3cda277c5d83563bed383c5342cbd Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 10:39:45 +0100 Subject: [PATCH 08/27] VTX-4075: make spawn_buffered public --- datafusion/physical-plan/src/common.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/physical-plan/src/common.rs b/datafusion/physical-plan/src/common.rs index 649f3a31aa7e..4329b0d5df08 100644 --- a/datafusion/physical-plan/src/common.rs +++ b/datafusion/physical-plan/src/common.rs @@ -97,7 +97,7 @@ fn build_file_list_recurse( /// If running in a tokio context spawns the execution of `stream` to a separate task /// allowing it to execute in parallel with an intermediate buffer of size `buffer` -pub(crate) fn spawn_buffered( +pub fn spawn_buffered( mut input: SendableRecordBatchStream, buffer: usize, ) -> SendableRecordBatchStream { From 0220a952694fee3a534d5aeb56a5196853e3a431 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 11:10:12 +0100 Subject: [PATCH 09/27] VTX-4075: update --- datafusion-cli/src/functions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion-cli/src/functions.rs b/datafusion-cli/src/functions.rs index f8d9ed238be4..5390fa9f2271 100644 --- a/datafusion-cli/src/functions.rs +++ b/datafusion-cli/src/functions.rs @@ -297,7 +297,7 @@ pub struct ParquetMetadataFunc {} impl TableFunctionImpl for ParquetMetadataFunc { fn call(&self, exprs: &[Expr]) -> Result> { - let filename = match exprs.get(0) { + let filename = match exprs.first() { Some(Expr::Literal(ScalarValue::Utf8(Some(s)))) => s, // single quote: parquet_metadata('x.parquet') Some(Expr::Column(Column { name, .. })) => name, // double quote: parquet_metadata("x.parquet") _ => { From 76083272c86ce1afc97148393219142c31a1771d Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 11:44:50 +0100 Subject: [PATCH 10/27] VTX-4075: fix more tests --- .../src/physical_optimizer/enforce_distribution.rs | 10 +++++----- .../core/src/physical_optimizer/enforce_sorting.rs | 6 +++--- .../core/src/physical_optimizer/projection_pushdown.rs | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs index a06456ea41e6..5a5eacad969d 100644 --- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs @@ -3718,7 +3718,7 @@ pub(crate) mod tests { let expected = &[ "SortRequiredExec: [c@2 ASC]", // Since this projection is trivial, increasing parallelism is not beneficial - "ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]", + "ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c], sort_expr=[c@2 ASC]", "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", ]; @@ -4250,14 +4250,14 @@ pub(crate) mod tests { sort_preserving_merge_exec(sort_key_after_projection, proj_parquet); let expected = &[ "SortPreservingMergeExec: [c2@1 ASC]", - " ProjectionExec: expr=[a@0 as a2, c@2 as c2]", + " ProjectionExec: expr=[a@0 as a2, c@2 as c2], sort_expr=[c2@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", ]; plans_matches_expected!(expected, &plan_parquet); // data should not be repartitioned / resorted let expected_parquet = &[ - "ProjectionExec: expr=[a@0 as a2, c@2 as c2]", + "ProjectionExec: expr=[a@0 as a2, c@2 as c2], sort_expr=[c2@1 ASC]", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", ]; @@ -4289,14 +4289,14 @@ pub(crate) mod tests { let plan_csv = sort_preserving_merge_exec(sort_key_after_projection, proj_csv); let expected = &[ "SortPreservingMergeExec: [c2@1 ASC]", - " ProjectionExec: expr=[a@0 as a2, c@2 as c2]", + " ProjectionExec: expr=[a@0 as a2, c@2 as c2], sort_expr=[c2@1 ASC]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], has_header=false", ]; plans_matches_expected!(expected, &plan_csv); // data should not be repartitioned / resorted let expected_csv = &[ - "ProjectionExec: expr=[a@0 as a2, c@2 as c2]", + "ProjectionExec: expr=[a@0 as a2, c@2 as c2], sort_expr=[c2@1 ASC]", "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], has_header=false", ]; diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs index 81eb3ada4c2a..19bb60bfa4b3 100644 --- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs @@ -2162,7 +2162,7 @@ mod tests { ]; let expected_optimized = [ "SortPreservingMergeExec: [a@0 ASC]", - " SortPreservingRepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, sort_exprs=a@0 ASC", + " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC], has_header=false", ]; @@ -2191,7 +2191,7 @@ mod tests { ]; let expected_optimized = [ "SortPreservingMergeExec: [a@0 ASC]", - " SortPreservingRepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, sort_exprs=a@0 ASC", + " RepartitionExec: partitioning=Hash([c@2], 10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], infinite_source=true, output_ordering=[a@0 ASC], has_header=false", ]; @@ -2263,7 +2263,7 @@ mod tests { let expected_input = [ "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow }], mode=[Sorted]", " SortPreservingMergeExec: [a@0 ASC,b@1 ASC]", - " SortPreservingRepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, sort_exprs=a@0 ASC,b@1 ASC", + " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=10, preserve_order=true, sort_exprs=a@0 ASC,b@1 ASC", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=1", " SortExec: expr=[a@0 ASC,b@1 ASC]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false", diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs index b5eab01daf8c..4038f8a8297a 100644 --- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs @@ -2200,7 +2200,7 @@ mod tests { let initial = get_plan_string(&projection); let expected_initial = [ - "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", + "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b], sort_expr=[b@2 ASC,c@0 + new_a@1 ASC]", " SortExec: expr=[b@1 ASC,c@2 + a@0 ASC]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" ]; From 0722ae4f2cc91f08a32a01265d02d46da236da07 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 12:08:16 +0100 Subject: [PATCH 11/27] VTX-4075: some updates --- datafusion/sqllogictest/test_files/groupby.slt | 2 +- datafusion/sqllogictest/test_files/insert.slt | 2 +- datafusion/sqllogictest/test_files/joins.slt | 2 +- datafusion/sqllogictest/test_files/order.slt | 2 +- datafusion/sqllogictest/test_files/select.slt | 2 +- datafusion/sqllogictest/test_files/union.slt | 2 +- datafusion/sqllogictest/test_files/window.slt | 4 ++-- 7 files changed, 8 insertions(+), 8 deletions(-) diff --git a/datafusion/sqllogictest/test_files/groupby.slt b/datafusion/sqllogictest/test_files/groupby.slt index b915c439059b..172cdf7bb88b 100644 --- a/datafusion/sqllogictest/test_files/groupby.slt +++ b/datafusion/sqllogictest/test_files/groupby.slt @@ -2113,7 +2113,7 @@ Projection: annotated_data_infinite2.a, annotated_data_infinite2.b, SUM(annotate --Aggregate: groupBy=[[annotated_data_infinite2.b, annotated_data_infinite2.a]], aggr=[[SUM(CAST(annotated_data_infinite2.c AS Int64))]] ----TableScan: annotated_data_infinite2 projection=[a, b, c] physical_plan -ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_infinite2.c)@2 as summation1] +ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_infinite2.c)@2 as summation1], sort_expr=[a@0 ASC NULLS LAST,b@1 ASC NULLS LAST] --AggregateExec: mode=Single, gby=[b@1 as b, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=Sorted ----StreamingTableExec: partition_sizes=1, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST] diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt index e20b3779459b..3fbbe2e3299e 100644 --- a/datafusion/sqllogictest/test_files/insert.slt +++ b/datafusion/sqllogictest/test_files/insert.slt @@ -67,7 +67,7 @@ physical_plan FileSinkExec: sink=MemoryTable (partitions=1) --ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2] ----SortPreservingMergeExec: [c1@2 ASC NULLS LAST] -------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1] +------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1], sort_expr=[c1@2 ASC NULLS LAST] --------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted] ----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST] ------------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index 67e3750113da..e78f5ce13f2c 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -2755,7 +2755,7 @@ Right Join: CAST(t1.c3 AS Decimal128(10, 2)) = t2.c3 --SubqueryAlias: t2 ----TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4] physical_plan -ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3, c4@3 as c4, c1@5 as c1, c2@6 as c2, c3@7 as c3, c4@8 as c4] +ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3, c4@3 as c4, c1@5 as c1, c2@6 as c2, c3@7 as c3, c4@8 as c4], sort_expr=[c3@6 ASC] --SortMergeJoin: join_type=Right, on=[(CAST(t1.c3 AS Decimal128(10, 2))@4, c3@2)] ----SortExec: expr=[CAST(t1.c3 AS Decimal128(10, 2))@4 ASC] ------CoalesceBatchesExec: target_batch_size=2 diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index 77df9e0bb493..7ceb208d7a1f 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -163,7 +163,7 @@ Projection: aggregate_test_100.c1, aggregate_test_100.c2 --Sort: aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST ----TableScan: aggregate_test_100 projection=[c1, c2, c3] physical_plan -ProjectionExec: expr=[c1@0 as c1, c2@1 as c2] +ProjectionExec: expr=[c1@0 as c1, c2@1 as c2], sort_expr=[c2@1 ASC NULLS LAST] --SortExec: expr=[c2@1 ASC NULLS LAST,c3@2 ASC NULLS LAST] ----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], has_header=true diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index ea570b99d4dd..f56c057d8c55 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -901,7 +901,7 @@ Sort: annotated_data_finite2.a ASC NULLS LAST ----TableScan: annotated_data_finite2 projection=[a, b] physical_plan SortPreservingMergeExec: [a@0 ASC NULLS LAST] ---ProjectionExec: expr=[a@0 as a, a@0 + b@1 as annotated_data_finite2.a + annotated_data_finite2.b] +--ProjectionExec: expr=[a@0 as a, a@0 + b@1 as annotated_data_finite2.a + annotated_data_finite2.b], sort_expr=[a@0 ASC NULLS LAST,annotated_data_finite2.a + annotated_data_finite2.b@1 ASC NULLS LAST] ----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 ------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index b4e338875e24..afe91ea76198 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -383,7 +383,7 @@ explain SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggre logical_plan Limit: skip=0, fetch=5 --Sort: aggregate_test_100.c9 DESC NULLS FIRST, fetch=5 -----Union +----Union sort_expr=[c9@1 DESC] ------Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c9 AS Int64) AS c9 --------TableScan: aggregate_test_100 projection=[c1, c9] ------Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c3 AS Int64) AS c9 diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 7b628f9b6f14..e4c2cec2b95d 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -356,9 +356,9 @@ Sort: d.b ASC NULLS LAST ----------------------EmptyRelation physical_plan SortPreservingMergeExec: [b@0 ASC NULLS LAST] ---ProjectionExec: expr=[b@0 as b, MAX(d.a)@1 as max_a, MAX(d.seq)@2 as MAX(d.seq)] +--ProjectionExec: expr=[b@0 as b, MAX(d.a)@1 as max_a, MAX(d.seq)@2 as MAX(d.seq)], sort_expr=[b@0 ASC NULLS LAST] ----AggregateExec: mode=SinglePartitioned, gby=[b@2 as b], aggr=[MAX(d.a), MAX(d.seq)], ordering_mode=Sorted -------ProjectionExec: expr=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b] +------ProjectionExec: expr=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b], sort_expr=[b@2 ASC NULLS LAST,seq@0 ASC NULLS LAST,a@1 ASC NULLS LAST] --------BoundedWindowAggExec: wdw=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }], mode=[Sorted] ----------SortExec: expr=[b@1 ASC NULLS LAST,a@0 ASC NULLS LAST] ------------CoalesceBatchesExec: target_batch_size=8192 From 286dc61bcf527cf9c612c134b7794613298b593d Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 12:32:09 +0100 Subject: [PATCH 12/27] VTX-4075: more updates --- datafusion/sqllogictest/test_files/groupby.slt | 2 +- datafusion/sqllogictest/test_files/insert.slt | 2 +- datafusion/sqllogictest/test_files/insert_to_external.slt | 2 +- .../test_files/join_disable_repartition_joins.slt | 2 +- datafusion/sqllogictest/test_files/joins.slt | 2 +- datafusion/sqllogictest/test_files/order.slt | 2 +- datafusion/sqllogictest/test_files/select.slt | 2 +- datafusion/sqllogictest/test_files/union.slt | 4 ++-- datafusion/sqllogictest/test_files/window.slt | 4 ++-- 9 files changed, 11 insertions(+), 11 deletions(-) diff --git a/datafusion/sqllogictest/test_files/groupby.slt b/datafusion/sqllogictest/test_files/groupby.slt index 172cdf7bb88b..f7e04c5a4f63 100644 --- a/datafusion/sqllogictest/test_files/groupby.slt +++ b/datafusion/sqllogictest/test_files/groupby.slt @@ -2113,7 +2113,7 @@ Projection: annotated_data_infinite2.a, annotated_data_infinite2.b, SUM(annotate --Aggregate: groupBy=[[annotated_data_infinite2.b, annotated_data_infinite2.a]], aggr=[[SUM(CAST(annotated_data_infinite2.c AS Int64))]] ----TableScan: annotated_data_infinite2 projection=[a, b, c] physical_plan -ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_infinite2.c)@2 as summation1], sort_expr=[a@0 ASC NULLS LAST,b@1 ASC NULLS LAST] +ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_infinite2.c)@2 as summation1], sort_expr=[a@0 ASC NULLS LAST,b@1 ASC NULLS LAST], sort_expr=[a@0 ASC NULLS LAST] --AggregateExec: mode=Single, gby=[b@1 as b, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=Sorted ----StreamingTableExec: partition_sizes=1, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST] diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt index 3fbbe2e3299e..7fbf29119252 100644 --- a/datafusion/sqllogictest/test_files/insert.slt +++ b/datafusion/sqllogictest/test_files/insert.slt @@ -178,7 +178,7 @@ physical_plan FileSinkExec: sink=MemoryTable (partitions=8) --ProjectionExec: expr=[a1@0 as a1, a2@1 as a2] ----SortPreservingMergeExec: [c1@2 ASC NULLS LAST] -------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as a1, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as a2, c1@0 as c1] +------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as a1, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as a2, c1@0 as c1], sort_expr=[c1@2 ASC NULLS LAST] --------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted] ----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST] ------------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt index cdaf0bb64339..2022b423fdac 100644 --- a/datafusion/sqllogictest/test_files/insert_to_external.slt +++ b/datafusion/sqllogictest/test_files/insert_to_external.slt @@ -334,7 +334,7 @@ physical_plan FileSinkExec: sink=ParquetSink(file_groups=[]) --ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2] ----SortPreservingMergeExec: [c1@2 ASC NULLS LAST] -------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1] +------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1], sort_expr=[c1@2 ASC NULLS LAST] --------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted] ----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST] ------------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt index 1312f2916ed6..1b8e4d837011 100644 --- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt +++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt @@ -57,7 +57,7 @@ Limit: skip=0, fetch=5 physical_plan GlobalLimitExec: skip=0, fetch=5 --SortPreservingMergeExec: [a@0 ASC NULLS LAST], fetch=5 -----ProjectionExec: expr=[a@1 as a] +----ProjectionExec: expr=[a@1 as a], sort_expr=[a@0 ASC NULLS LAST] ------CoalesceBatchesExec: target_batch_size=8192 --------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c@0, c@1)] ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], has_header=true diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index e78f5ce13f2c..ed4e20a17fe6 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -3198,7 +3198,7 @@ SortPreservingMergeExec: [rn1@5 ASC NULLS LAST] ------CoalesceBatchesExec: target_batch_size=2 --------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2 ----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 -------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1] +------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1], sort_expr=[a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST] --------------BoundedWindowAggExec: wdw=[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }], mode=[Sorted] ----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true ----SortExec: expr=[a@1 ASC] diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index 7ceb208d7a1f..87770a086cfb 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -439,7 +439,7 @@ Sort: result ASC NULLS LAST ----TableScan: multiple_ordered_table projection=[a, b, c] physical_plan SortPreservingMergeExec: [result@0 ASC NULLS LAST] ---ProjectionExec: expr=[b@1 + a@0 + c@2 as result] +--ProjectionExec: expr=[b@1 + a@0 + c@2 as result], sort_expr=[result@0 ASC NULLS LAST] ----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 ------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_orderings=[[a@0 ASC NULLS LAST], [b@1 ASC NULLS LAST], [c@2 ASC NULLS LAST]], has_header=true diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index f56c057d8c55..3bfe1b3ceb22 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -919,7 +919,7 @@ Sort: annotated_data_finite2.a ASC NULLS LAST --Projection: annotated_data_finite2.a, annotated_data_finite2.b, Int64(2) ----TableScan: annotated_data_finite2 projection=[a, b] physical_plan -ProjectionExec: expr=[a@0 as a, b@1 as b, 2 as Int64(2)] +ProjectionExec: expr=[a@0 as a, b@1 as b, 2 as Int64(2)], sort_expr=[a@0 ASC NULLS LAST,b@1 ASC NULLS LAST] --CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true # source is ordered by a,b,c diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index afe91ea76198..e4b303f4aaea 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -383,7 +383,7 @@ explain SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggre logical_plan Limit: skip=0, fetch=5 --Sort: aggregate_test_100.c9 DESC NULLS FIRST, fetch=5 -----Union sort_expr=[c9@1 DESC] +----Union ------Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c9 AS Int64) AS c9 --------TableScan: aggregate_test_100 projection=[c1, c9] ------Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c3 AS Int64) AS c9 @@ -391,7 +391,7 @@ Limit: skip=0, fetch=5 physical_plan GlobalLimitExec: skip=0, fetch=5 --SortPreservingMergeExec: [c9@1 DESC], fetch=5 -----UnionExec +----UnionExec: sort_expr=[c9@1 DESC] ------SortExec: expr=[c9@1 DESC] --------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Int64) as c9] ----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index e4c2cec2b95d..4e4ac1df1811 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1208,9 +1208,9 @@ Projection: aggregate_test_100.c9, SUM(aggregate_test_100.c9) ORDER BY [aggregat ------WindowAggr: windowExpr=[[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] --------TableScan: aggregate_test_100 projection=[c8, c9] physical_plan -ProjectionExec: expr=[c9@0 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum2] +ProjectionExec: expr=[c9@0 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum2], sort_expr=[c9@0 ASC NULLS LAST] --BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted] -----ProjectionExec: expr=[c9@1 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] +----ProjectionExec: expr=[c9@1 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], sort_expr=[c9@0 ASC NULLS LAST] ------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted] --------SortExec: expr=[c9@1 ASC NULLS LAST,c8@0 ASC NULLS LAST] ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c8, c9], has_header=true From fb93e6623d1b9a56ef3b4fc566f40c1e9f5553f5 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 12:50:57 +0100 Subject: [PATCH 13/27] VTX-4075: more updates --- datafusion/sqllogictest/test_files/groupby.slt | 2 +- .../test_files/join_disable_repartition_joins.slt | 2 +- datafusion/sqllogictest/test_files/joins.slt | 4 ++-- datafusion/sqllogictest/test_files/order.slt | 2 +- datafusion/sqllogictest/test_files/union.slt | 4 ++-- datafusion/sqllogictest/test_files/window.slt | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) diff --git a/datafusion/sqllogictest/test_files/groupby.slt b/datafusion/sqllogictest/test_files/groupby.slt index f7e04c5a4f63..172cdf7bb88b 100644 --- a/datafusion/sqllogictest/test_files/groupby.slt +++ b/datafusion/sqllogictest/test_files/groupby.slt @@ -2113,7 +2113,7 @@ Projection: annotated_data_infinite2.a, annotated_data_infinite2.b, SUM(annotate --Aggregate: groupBy=[[annotated_data_infinite2.b, annotated_data_infinite2.a]], aggr=[[SUM(CAST(annotated_data_infinite2.c AS Int64))]] ----TableScan: annotated_data_infinite2 projection=[a, b, c] physical_plan -ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_infinite2.c)@2 as summation1], sort_expr=[a@0 ASC NULLS LAST,b@1 ASC NULLS LAST], sort_expr=[a@0 ASC NULLS LAST] +ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_infinite2.c)@2 as summation1], sort_expr=[a@0 ASC NULLS LAST,b@1 ASC NULLS LAST] --AggregateExec: mode=Single, gby=[b@1 as b, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=Sorted ----StreamingTableExec: partition_sizes=1, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST] diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt index 1b8e4d837011..c568b9717816 100644 --- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt +++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt @@ -100,7 +100,7 @@ Limit: skip=0, fetch=10 physical_plan GlobalLimitExec: skip=0, fetch=10 --SortPreservingMergeExec: [a2@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10 -----ProjectionExec: expr=[a@0 as a2, b@1 as b] +----ProjectionExec: expr=[a@0 as a2, b@1 as b], sort_expr=[a2@0 ASC NULLS LAST,b@1 ASC NULLS LAST] ------CoalesceBatchesExec: target_batch_size=8192 --------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(d@1, d@3), (c@0, c@2)] ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], has_header=true diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index ed4e20a17fe6..602b8f6b4366 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -3198,7 +3198,7 @@ SortPreservingMergeExec: [rn1@5 ASC NULLS LAST] ------CoalesceBatchesExec: target_batch_size=2 --------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2 ----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 -------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1], sort_expr=[a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST] +------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1],sort_expr=[a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST] --------------BoundedWindowAggExec: wdw=[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }], mode=[Sorted] ----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true ----SortExec: expr=[a@1 ASC] @@ -3239,7 +3239,7 @@ SortPreservingMergeExec: [rn1@10 ASC NULLS LAST] ------CoalesceBatchesExec: target_batch_size=2 --------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2 ----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 -------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1] +------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1], sort_expr=[a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST] --------------BoundedWindowAggExec: wdw=[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }], mode=[Sorted] ----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index 87770a086cfb..f8730c19fa10 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -469,7 +469,7 @@ Sort: db15 ASC NULLS LAST ----TableScan: csv_with_timestamps projection=[ts] physical_plan SortPreservingMergeExec: [db15@0 ASC NULLS LAST] ---ProjectionExec: expr=[date_bin(900000000000, ts@0, 1659537600000000000) as db15] +--ProjectionExec: expr=[date_bin(900000000000, ts@0, 1659537600000000000) as db15], sort_expr=[db15@0 ASC NULLS LAST] ----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 ------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/timestamps.csv]]}, projection=[ts], output_ordering=[ts@0 ASC NULLS LAST], has_header=false diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index e4b303f4aaea..8609afd6822f 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -520,9 +520,9 @@ Sort: t1.c1 ASC NULLS LAST ------TableScan: t2 projection=[c1a] physical_plan SortPreservingMergeExec: [c1@0 ASC NULLS LAST] ---UnionExec +--UnionExec: sort_expr=[c1@0 ASC NULLS LAST] ----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], output_ordering=[c1@0 ASC NULLS LAST], has_header=true -----ProjectionExec: expr=[c1a@0 as c1] +----ProjectionExec: expr=[c1a@0 as c1], sort_expr=[c1@0 ASC NULLS LAST] ------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1a], output_ordering=[c1a@0 ASC NULLS LAST], has_header=true statement ok diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 4e4ac1df1811..b3cea719a2f5 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1210,7 +1210,7 @@ Projection: aggregate_test_100.c9, SUM(aggregate_test_100.c9) ORDER BY [aggregat physical_plan ProjectionExec: expr=[c9@0 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum2], sort_expr=[c9@0 ASC NULLS LAST] --BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted] -----ProjectionExec: expr=[c9@1 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], sort_expr=[c9@0 ASC NULLS LAST] +----ProjectionExec: expr=[c9@1 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], sort_expr=[c9@0 ASC NULLS LAST ------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted] --------SortExec: expr=[c9@1 ASC NULLS LAST,c8@0 ASC NULLS LAST] ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c8, c9], has_header=true @@ -1229,7 +1229,7 @@ Projection: aggregate_test_100.c2, MAX(aggregate_test_100.c9) ORDER BY [aggregat ------WindowAggr: windowExpr=[[MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] --------TableScan: aggregate_test_100 projection=[c2, c9] physical_plan -ProjectionExec: expr=[c2@0 as c2, MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] +ProjectionExec: expr=[c2@0 as c2, MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], sort_expr=[c2@0 ASC NULLS LAST] --WindowAggExec: wdw=[SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }] ----BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow }], mode=[Sorted] ------BoundedWindowAggExec: wdw=[MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow }], mode=[Sorted] From 597c9309d530d4f7d17605c24761d01854dbbb9d Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 12:59:45 +0100 Subject: [PATCH 14/27] VTX-4075: verbose out --- datafusion/physical-plan/src/projection.rs | 50 +++++++++++----------- datafusion/physical-plan/src/union.rs | 23 +++++----- 2 files changed, 37 insertions(+), 36 deletions(-) diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index 3e185e4ff4b8..82a877f25632 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -126,33 +126,33 @@ impl DisplayAs for ProjectionExec { t: DisplayFormatType, f: &mut std::fmt::Formatter, ) -> std::fmt::Result { - match t { - DisplayFormatType::Default | DisplayFormatType::Verbose => { - let expr: Vec = self - .expr - .iter() - .map(|(e, alias)| { - let e = e.to_string(); - if &e != alias { - format!("{e} as {alias}") - } else { - e - } - }) - .collect(); - - match self.output_ordering() { - Some(exprs) => { - write!( - f, - "ProjectionExec: expr=[{}], sort_expr=[{}]", - expr.join(", "), - PhysicalSortExpr::format_list(exprs) - ) - } - _ => write!(f, "ProjectionExec: expr=[{}]", expr.join(", ")), + let expr: Vec = self + .expr + .iter() + .map(|(e, alias)| { + let e = e.to_string(); + if &e != alias { + format!("{e} as {alias}") + } else { + e } + }) + .collect(); + match t { + DisplayFormatType::Default => { + write!(f, "ProjectionExec: expr=[{}]", expr.join(", ")) } + DisplayFormatType::Verbose => match self.output_ordering() { + Some(exprs) => { + write!( + f, + "ProjectionExec: expr=[{}], sort_expr=[{}]", + expr.join(", "), + PhysicalSortExpr::format_list(exprs) + ) + } + _ => write!(f, "ProjectionExec: expr=[{}]", expr.join(", ")), + }, } } } diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index 8a0935a0b3d0..f5dc7e6241dd 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -119,18 +119,19 @@ impl DisplayAs for UnionExec { f: &mut std::fmt::Formatter, ) -> std::fmt::Result { match t { - DisplayFormatType::Default | DisplayFormatType::Verbose => { - match self.output_ordering() { - Some(exprs) => { - write!( - f, - "UnionExec: sort_expr=[{}]", - PhysicalSortExpr::format_list(exprs) - ) - } - _ => write!(f, "UnionExec"), - } + DisplayFormatType::Default => { + write!(f, "UnionExec") } + DisplayFormatType::Verbose => match self.output_ordering() { + Some(exprs) => { + write!( + f, + "UnionExec: sort_expr=[{}]", + PhysicalSortExpr::format_list(exprs) + ) + } + _ => write!(f, "UnionExec"), + }, } } } From 771e1e12aea3122d59d61070c020275d0ef058ee Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 13:00:54 +0100 Subject: [PATCH 15/27] Revert "VTX-4075: ore verbose display for union" This reverts commit ebf2f5a7416daab0477d1f9ee89f4706ef3a4479. From 09fff24bf5c182100e2bf1e8812fc921aa92c9a9 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 13:02:22 +0100 Subject: [PATCH 16/27] Revert "VTX-4075: fix more tests" This reverts commit 76083272c86ce1afc97148393219142c31a1771d. From c3c6a4f310593c2af1d6cccb5aed30ec80261bc5 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 13:04:54 +0100 Subject: [PATCH 17/27] Revert "VTX-4075: some updates" This reverts commit 0722ae4f2cc91f08a32a01265d02d46da236da07. --- datafusion/sqllogictest/test_files/groupby.slt | 2 +- datafusion/sqllogictest/test_files/insert.slt | 2 +- datafusion/sqllogictest/test_files/joins.slt | 2 +- datafusion/sqllogictest/test_files/order.slt | 2 +- datafusion/sqllogictest/test_files/select.slt | 2 +- datafusion/sqllogictest/test_files/window.slt | 4 ++-- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/datafusion/sqllogictest/test_files/groupby.slt b/datafusion/sqllogictest/test_files/groupby.slt index 172cdf7bb88b..b915c439059b 100644 --- a/datafusion/sqllogictest/test_files/groupby.slt +++ b/datafusion/sqllogictest/test_files/groupby.slt @@ -2113,7 +2113,7 @@ Projection: annotated_data_infinite2.a, annotated_data_infinite2.b, SUM(annotate --Aggregate: groupBy=[[annotated_data_infinite2.b, annotated_data_infinite2.a]], aggr=[[SUM(CAST(annotated_data_infinite2.c AS Int64))]] ----TableScan: annotated_data_infinite2 projection=[a, b, c] physical_plan -ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_infinite2.c)@2 as summation1], sort_expr=[a@0 ASC NULLS LAST,b@1 ASC NULLS LAST] +ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_infinite2.c)@2 as summation1] --AggregateExec: mode=Single, gby=[b@1 as b, a@0 as a], aggr=[SUM(annotated_data_infinite2.c)], ordering_mode=Sorted ----StreamingTableExec: partition_sizes=1, projection=[a, b, c], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS LAST] diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt index 7fbf29119252..ce85ed73d958 100644 --- a/datafusion/sqllogictest/test_files/insert.slt +++ b/datafusion/sqllogictest/test_files/insert.slt @@ -67,7 +67,7 @@ physical_plan FileSinkExec: sink=MemoryTable (partitions=1) --ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2] ----SortPreservingMergeExec: [c1@2 ASC NULLS LAST] -------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1], sort_expr=[c1@2 ASC NULLS LAST] +------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1] --------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted] ----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST] ------------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index 602b8f6b4366..d1ee307231f7 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -2755,7 +2755,7 @@ Right Join: CAST(t1.c3 AS Decimal128(10, 2)) = t2.c3 --SubqueryAlias: t2 ----TableScan: hashjoin_datatype_table_t2 projection=[c1, c2, c3, c4] physical_plan -ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3, c4@3 as c4, c1@5 as c1, c2@6 as c2, c3@7 as c3, c4@8 as c4], sort_expr=[c3@6 ASC] +ProjectionExec: expr=[c1@0 as c1, c2@1 as c2, c3@2 as c3, c4@3 as c4, c1@5 as c1, c2@6 as c2, c3@7 as c3, c4@8 as c4] --SortMergeJoin: join_type=Right, on=[(CAST(t1.c3 AS Decimal128(10, 2))@4, c3@2)] ----SortExec: expr=[CAST(t1.c3 AS Decimal128(10, 2))@4 ASC] ------CoalesceBatchesExec: target_batch_size=2 diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index f8730c19fa10..bb3d1b4de769 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -163,7 +163,7 @@ Projection: aggregate_test_100.c1, aggregate_test_100.c2 --Sort: aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c3 ASC NULLS LAST ----TableScan: aggregate_test_100 projection=[c1, c2, c3] physical_plan -ProjectionExec: expr=[c1@0 as c1, c2@1 as c2], sort_expr=[c2@1 ASC NULLS LAST] +ProjectionExec: expr=[c1@0 as c1, c2@1 as c2] --SortExec: expr=[c2@1 ASC NULLS LAST,c3@2 ASC NULLS LAST] ----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1, c2, c3], has_header=true diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index 3bfe1b3ceb22..5a4566fc2165 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -901,7 +901,7 @@ Sort: annotated_data_finite2.a ASC NULLS LAST ----TableScan: annotated_data_finite2 projection=[a, b] physical_plan SortPreservingMergeExec: [a@0 ASC NULLS LAST] ---ProjectionExec: expr=[a@0 as a, a@0 + b@1 as annotated_data_finite2.a + annotated_data_finite2.b], sort_expr=[a@0 ASC NULLS LAST,annotated_data_finite2.a + annotated_data_finite2.b@1 ASC NULLS LAST] +--ProjectionExec: expr=[a@0 as a, a@0 + b@1 as annotated_data_finite2.a + annotated_data_finite2.b] ----RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 ------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index b3cea719a2f5..5ec3971ff537 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -356,9 +356,9 @@ Sort: d.b ASC NULLS LAST ----------------------EmptyRelation physical_plan SortPreservingMergeExec: [b@0 ASC NULLS LAST] ---ProjectionExec: expr=[b@0 as b, MAX(d.a)@1 as max_a, MAX(d.seq)@2 as MAX(d.seq)], sort_expr=[b@0 ASC NULLS LAST] +--ProjectionExec: expr=[b@0 as b, MAX(d.a)@1 as max_a, MAX(d.seq)@2 as MAX(d.seq)] ----AggregateExec: mode=SinglePartitioned, gby=[b@2 as b], aggr=[MAX(d.a), MAX(d.seq)], ordering_mode=Sorted -------ProjectionExec: expr=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b], sort_expr=[b@2 ASC NULLS LAST,seq@0 ASC NULLS LAST,a@1 ASC NULLS LAST] +------ProjectionExec: expr=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as seq, a@0 as a, b@1 as b] --------BoundedWindowAggExec: wdw=[ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "ROW_NUMBER() PARTITION BY [s.b] ORDER BY [s.a ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int64(NULL)), end_bound: CurrentRow }], mode=[Sorted] ----------SortExec: expr=[b@1 ASC NULLS LAST,a@0 ASC NULLS LAST] ------------CoalesceBatchesExec: target_batch_size=8192 From 2ba2c288442202dfdad7174316cf900d963dc08f Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 13:05:51 +0100 Subject: [PATCH 18/27] Revert "VTX-4075: more updates" This reverts commit 286dc61bcf527cf9c612c134b7794613298b593d. From 60daf1cb70fe29413b7aaeace81f6132544feace Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 13:06:23 +0100 Subject: [PATCH 19/27] Revert "VTX-4075: more updates" This reverts commit fb93e6623d1b9a56ef3b4fc566f40c1e9f5553f5. From a24cdf19c5ac4d18e8e6f7fe223c0399284d3554 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 13:09:34 +0100 Subject: [PATCH 20/27] Revert "VTX-4075: ore verbose display for union" This reverts commit ebf2f5a7416daab0477d1f9ee89f4706ef3a4479. From 500dd133f88c6f142e457e89aad0b221a0108d46 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 13:10:06 +0100 Subject: [PATCH 21/27] Revert "VTX-4075: ore verbose display for union" This reverts commit ebf2f5a7416daab0477d1f9ee89f4706ef3a4479. --- .../enforce_distribution.rs | 8 ++-- .../src/physical_optimizer/enforce_sorting.rs | 42 +++++++++---------- 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs index 5a5eacad969d..364d966fec48 100644 --- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs @@ -3574,7 +3574,7 @@ pub(crate) mod tests { // should not repartition / sort (as the data was already sorted) let expected = &[ "SortPreservingMergeExec: [c@2 ASC]", - "UnionExec: sort_expr=[c@2 ASC]", + "UnionExec", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", ]; @@ -3584,7 +3584,7 @@ pub(crate) mod tests { let expected = &[ "SortExec: expr=[c@2 ASC]", "CoalescePartitionsExec", - "UnionExec: sort_expr=[c@2 ASC]", + "UnionExec", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", ]; @@ -4180,13 +4180,13 @@ pub(crate) mod tests { // should not sort (as the data was already sorted) let expected_parquet = &[ "SortPreservingMergeExec: [c@2 ASC]", - "UnionExec: sort_expr=[c@2 ASC]", + "UnionExec", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", ]; let expected_csv = &[ "SortPreservingMergeExec: [c@2 ASC]", - "UnionExec: sort_expr=[c@2 ASC]", + "UnionExec", "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], has_header=false", "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], has_header=false", ]; diff --git a/datafusion/core/src/physical_optimizer/enforce_sorting.rs b/datafusion/core/src/physical_optimizer/enforce_sorting.rs index 19bb60bfa4b3..277404b301c4 100644 --- a/datafusion/core/src/physical_optimizer/enforce_sorting.rs +++ b/datafusion/core/src/physical_optimizer/enforce_sorting.rs @@ -1185,7 +1185,7 @@ mod tests { let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " GlobalLimitExec: skip=0, fetch=100", " LocalLimitExec: fetch=100", @@ -1196,7 +1196,7 @@ mod tests { let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " RepartitionExec: partitioning=RoundRobinBatch(10), input_partitions=2", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " GlobalLimitExec: skip=0, fetch=100", " LocalLimitExec: fetch=100", @@ -1272,7 +1272,7 @@ mod tests { // one input to the union is already sorted, one is not. let expected_input = vec![ "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", @@ -1303,7 +1303,7 @@ mod tests { // one input to the union is already sorted, one is not. let expected_input = vec![ "SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC]", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", @@ -1335,13 +1335,13 @@ mod tests { // First ParquetExec has output ordering(nullable_col@0 ASC). However, it doesn't satisfy the // required ordering of SortPreservingMergeExec. let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", + " UnionExec", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", @@ -1373,7 +1373,7 @@ mod tests { // Second input to the union is already Sorted (matches with the required ordering by the SortPreservingMergeExec above). // Third input to the union is not Sorted (SortExec is matches required ordering by the SortPreservingMergeExec above). let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", @@ -1381,7 +1381,7 @@ mod tests { " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; // should adjust sorting in the first input of the union such that it is not unnecessarily fine let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", @@ -1414,14 +1414,14 @@ mod tests { // `UnionExec` satisfy the ordering, OR add a single sort after // the `UnionExec` (both of which are equally good for this example). let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", + " UnionExec", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", @@ -1463,13 +1463,13 @@ mod tests { // example below. However, we should be able to change the unnecessarily // fine `SortExec`s below with required `SortExec`s that are absolutely necessary. let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " SortExec: expr=[nullable_col@0 ASC]", @@ -1515,7 +1515,7 @@ mod tests { // Should adjust the requirement in the third input of the union so // that it is not unnecessarily fine. let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", @@ -1544,14 +1544,14 @@ mod tests { // Union has unnecessarily fine ordering below it. We should be able to replace them with absolutely necessary ordering. let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", + " UnionExec", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; // Union preserves the inputs ordering and we should not change any of the SortExecs under UnionExec let expected_output = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " SortExec: expr=[nullable_col@0 ASC]", @@ -1642,7 +1642,7 @@ mod tests { let expected_input = [ "BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow }], mode=[Sorted]", " SortPreservingMergeExec: [nullable_col@0 DESC NULLS LAST]", - " UnionExec: sort_expr=[nullable_col@0 DESC NULLS LAST]", + " UnionExec", " SortExec: expr=[nullable_col@0 DESC NULLS LAST]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC]", " SortExec: expr=[nullable_col@0 DESC NULLS LAST]", @@ -1650,7 +1650,7 @@ mod tests { let expected_optimized = [ "WindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: CurrentRow, end_bound: Following(NULL) }]", " SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC, non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -1679,14 +1679,14 @@ mod tests { // The unnecessary SortExecs should be removed let expected_input = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow }], mode=[Sorted]", " SortPreservingMergeExec: [nullable_col@0 ASC,non_nullable_col@1 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", + " UnionExec", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"]; let expected_optimized = ["BoundedWindowAggExec: wdw=[count: Ok(Field { name: \"count\", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(NULL), end_bound: CurrentRow }], mode=[Sorted]", " SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col], output_ordering=[nullable_col@0 ASC]"]; assert_optimized!(expected_input, expected_optimized, physical_plan, true); @@ -1725,7 +1725,7 @@ mod tests { // Should not change the unnecessarily fine `SortExec`s because there is `LimitExec` let expected_input = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " GlobalLimitExec: skip=0, fetch=100", @@ -1733,7 +1733,7 @@ mod tests { " SortExec: expr=[nullable_col@0 ASC,non_nullable_col@1 DESC NULLS LAST]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]"]; let expected_optimized = ["SortPreservingMergeExec: [nullable_col@0 ASC]", - " UnionExec: sort_expr=[nullable_col@0 ASC]", + " UnionExec", " SortExec: expr=[nullable_col@0 ASC]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[nullable_col, non_nullable_col]", " GlobalLimitExec: skip=0, fetch=100", From aadbc1b2a7d378d4a270656144a1460d032dd432 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 13:12:43 +0100 Subject: [PATCH 22/27] Revert "VTX-4075: fix more tests" This reverts commit 76083272c86ce1afc97148393219142c31a1771d. --- .../src/physical_optimizer/enforce_distribution.rs | 10 +++++----- .../core/src/physical_optimizer/projection_pushdown.rs | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/datafusion/core/src/physical_optimizer/enforce_distribution.rs b/datafusion/core/src/physical_optimizer/enforce_distribution.rs index 364d966fec48..f2e04989ef66 100644 --- a/datafusion/core/src/physical_optimizer/enforce_distribution.rs +++ b/datafusion/core/src/physical_optimizer/enforce_distribution.rs @@ -3718,7 +3718,7 @@ pub(crate) mod tests { let expected = &[ "SortRequiredExec: [c@2 ASC]", // Since this projection is trivial, increasing parallelism is not beneficial - "ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c], sort_expr=[c@2 ASC]", + "ProjectionExec: expr=[a@0 as a, b@1 as b, c@2 as c]", "ParquetExec: file_groups={2 groups: [[x], [y]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", ]; @@ -4250,14 +4250,14 @@ pub(crate) mod tests { sort_preserving_merge_exec(sort_key_after_projection, proj_parquet); let expected = &[ "SortPreservingMergeExec: [c2@1 ASC]", - " ProjectionExec: expr=[a@0 as a2, c@2 as c2], sort_expr=[c2@1 ASC]", + " ProjectionExec: expr=[a@0 as a2, c@2 as c2]", " ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", ]; plans_matches_expected!(expected, &plan_parquet); // data should not be repartitioned / resorted let expected_parquet = &[ - "ProjectionExec: expr=[a@0 as a2, c@2 as c2], sort_expr=[c2@1 ASC]", + "ProjectionExec: expr=[a@0 as a2, c@2 as c2]", "ParquetExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC]", ]; @@ -4289,14 +4289,14 @@ pub(crate) mod tests { let plan_csv = sort_preserving_merge_exec(sort_key_after_projection, proj_csv); let expected = &[ "SortPreservingMergeExec: [c2@1 ASC]", - " ProjectionExec: expr=[a@0 as a2, c@2 as c2], sort_expr=[c2@1 ASC]", + " ProjectionExec: expr=[a@0 as a2, c@2 as c2]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], has_header=false", ]; plans_matches_expected!(expected, &plan_csv); // data should not be repartitioned / resorted let expected_csv = &[ - "ProjectionExec: expr=[a@0 as a2, c@2 as c2], sort_expr=[c2@1 ASC]", + "ProjectionExec: expr=[a@0 as a2, c@2 as c2]", "CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], output_ordering=[c@2 ASC], has_header=false", ]; diff --git a/datafusion/core/src/physical_optimizer/projection_pushdown.rs b/datafusion/core/src/physical_optimizer/projection_pushdown.rs index 4038f8a8297a..b5eab01daf8c 100644 --- a/datafusion/core/src/physical_optimizer/projection_pushdown.rs +++ b/datafusion/core/src/physical_optimizer/projection_pushdown.rs @@ -2200,7 +2200,7 @@ mod tests { let initial = get_plan_string(&projection); let expected_initial = [ - "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b], sort_expr=[b@2 ASC,c@0 + new_a@1 ASC]", + "ProjectionExec: expr=[c@2 as c, a@0 as new_a, b@1 as b]", " SortExec: expr=[b@1 ASC,c@2 + a@0 ASC]", " CsvExec: file_groups={1 group: [[x]]}, projection=[a, b, c, d, e], has_header=false" ]; From c021af11f22233ff7831a9aad4eec1a3dff5022a Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 13:16:21 +0100 Subject: [PATCH 23/27] Revert "VTX-4075: more updates" This reverts commit 286dc61bcf527cf9c612c134b7794613298b593d. --- datafusion/sqllogictest/test_files/insert.slt | 2 +- datafusion/sqllogictest/test_files/insert_to_external.slt | 2 +- .../test_files/join_disable_repartition_joins.slt | 2 +- datafusion/sqllogictest/test_files/joins.slt | 2 +- datafusion/sqllogictest/test_files/order.slt | 2 +- datafusion/sqllogictest/test_files/select.slt | 2 +- datafusion/sqllogictest/test_files/union.slt | 4 ++-- datafusion/sqllogictest/test_files/window.slt | 4 ++-- 8 files changed, 10 insertions(+), 10 deletions(-) diff --git a/datafusion/sqllogictest/test_files/insert.slt b/datafusion/sqllogictest/test_files/insert.slt index ce85ed73d958..e20b3779459b 100644 --- a/datafusion/sqllogictest/test_files/insert.slt +++ b/datafusion/sqllogictest/test_files/insert.slt @@ -178,7 +178,7 @@ physical_plan FileSinkExec: sink=MemoryTable (partitions=8) --ProjectionExec: expr=[a1@0 as a1, a2@1 as a2] ----SortPreservingMergeExec: [c1@2 ASC NULLS LAST] -------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as a1, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as a2, c1@0 as c1], sort_expr=[c1@2 ASC NULLS LAST] +------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as a1, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as a2, c1@0 as c1] --------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted] ----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST] ------------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/insert_to_external.slt b/datafusion/sqllogictest/test_files/insert_to_external.slt index 2022b423fdac..cdaf0bb64339 100644 --- a/datafusion/sqllogictest/test_files/insert_to_external.slt +++ b/datafusion/sqllogictest/test_files/insert_to_external.slt @@ -334,7 +334,7 @@ physical_plan FileSinkExec: sink=ParquetSink(file_groups=[]) --ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@0 as field1, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@1 as field2] ----SortPreservingMergeExec: [c1@2 ASC NULLS LAST] -------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1], sort_expr=[c1@2 ASC NULLS LAST] +------ProjectionExec: expr=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@3 as SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING@4 as COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING, c1@0 as c1] --------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c4) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }, COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING: Ok(Field { name: "COUNT(*) PARTITION BY [aggregate_test_100.c1] ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(1)), end_bound: Following(UInt64(1)) }], mode=[Sorted] ----------SortExec: expr=[c1@0 ASC NULLS LAST,c9@2 ASC NULLS LAST] ------------CoalesceBatchesExec: target_batch_size=8192 diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt index c568b9717816..662b967a89ef 100644 --- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt +++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt @@ -57,7 +57,7 @@ Limit: skip=0, fetch=5 physical_plan GlobalLimitExec: skip=0, fetch=5 --SortPreservingMergeExec: [a@0 ASC NULLS LAST], fetch=5 -----ProjectionExec: expr=[a@1 as a], sort_expr=[a@0 ASC NULLS LAST] +----ProjectionExec: expr=[a@1 as a] ------CoalesceBatchesExec: target_batch_size=8192 --------HashJoinExec: mode=CollectLeft, join_type=Inner, on=[(c@0, c@1)] ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c], has_header=true diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index d1ee307231f7..bc4d98b2a187 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -3198,7 +3198,7 @@ SortPreservingMergeExec: [rn1@5 ASC NULLS LAST] ------CoalesceBatchesExec: target_batch_size=2 --------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2 ----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 -------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1],sort_expr=[a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST] +------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1] --------------BoundedWindowAggExec: wdw=[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }], mode=[Sorted] ----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true ----SortExec: expr=[a@1 ASC] diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index bb3d1b4de769..69f815c40d39 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -439,7 +439,7 @@ Sort: result ASC NULLS LAST ----TableScan: multiple_ordered_table projection=[a, b, c] physical_plan SortPreservingMergeExec: [result@0 ASC NULLS LAST] ---ProjectionExec: expr=[b@1 + a@0 + c@2 as result], sort_expr=[result@0 ASC NULLS LAST] +--ProjectionExec: expr=[b@1 + a@0 + c@2 as result] ----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 ------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, c], output_orderings=[[a@0 ASC NULLS LAST], [b@1 ASC NULLS LAST], [c@2 ASC NULLS LAST]], has_header=true diff --git a/datafusion/sqllogictest/test_files/select.slt b/datafusion/sqllogictest/test_files/select.slt index 5a4566fc2165..ea570b99d4dd 100644 --- a/datafusion/sqllogictest/test_files/select.slt +++ b/datafusion/sqllogictest/test_files/select.slt @@ -919,7 +919,7 @@ Sort: annotated_data_finite2.a ASC NULLS LAST --Projection: annotated_data_finite2.a, annotated_data_finite2.b, Int64(2) ----TableScan: annotated_data_finite2 projection=[a, b] physical_plan -ProjectionExec: expr=[a@0 as a, b@1 as b, 2 as Int64(2)], sort_expr=[a@0 ASC NULLS LAST,b@1 ASC NULLS LAST] +ProjectionExec: expr=[a@0 as a, b@1 as b, 2 as Int64(2)] --CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true # source is ordered by a,b,c diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index 8609afd6822f..dd49cc7b4285 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -383,7 +383,7 @@ explain SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggre logical_plan Limit: skip=0, fetch=5 --Sort: aggregate_test_100.c9 DESC NULLS FIRST, fetch=5 -----Union +----Union sort_expr=[c9@1 DESC] ------Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c9 AS Int64) AS c9 --------TableScan: aggregate_test_100 projection=[c1, c9] ------Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c3 AS Int64) AS c9 @@ -391,7 +391,7 @@ Limit: skip=0, fetch=5 physical_plan GlobalLimitExec: skip=0, fetch=5 --SortPreservingMergeExec: [c9@1 DESC], fetch=5 -----UnionExec: sort_expr=[c9@1 DESC] +----UnionExec ------SortExec: expr=[c9@1 DESC] --------ProjectionExec: expr=[c1@0 as c1, CAST(c9@1 AS Int64) as c9] ----------RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 5ec3971ff537..8b22dfb9ec5a 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1208,9 +1208,9 @@ Projection: aggregate_test_100.c9, SUM(aggregate_test_100.c9) ORDER BY [aggregat ------WindowAggr: windowExpr=[[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] --------TableScan: aggregate_test_100 projection=[c8, c9] physical_plan -ProjectionExec: expr=[c9@0 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum2], sort_expr=[c9@0 ASC NULLS LAST] +ProjectionExec: expr=[c9@0 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as sum1, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@1 as sum2] --BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted] -----ProjectionExec: expr=[c9@1 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], sort_expr=[c9@0 ASC NULLS LAST +----ProjectionExec: expr=[c9@1 as c9, SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] ------BoundedWindowAggExec: wdw=[SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "SUM(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c9 ASC NULLS LAST, aggregate_test_100.c8 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(UInt64(NULL)), end_bound: CurrentRow }], mode=[Sorted] --------SortExec: expr=[c9@1 ASC NULLS LAST,c8@0 ASC NULLS LAST] ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c8, c9], has_header=true From 644cc4f8bc43a1e61343601efb8975f87c6a0b1b Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 13:18:33 +0100 Subject: [PATCH 24/27] Revert "VTX-4075: more updates" This reverts commit fb93e6623d1b9a56ef3b4fc566f40c1e9f5553f5. --- .../test_files/join_disable_repartition_joins.slt | 2 +- datafusion/sqllogictest/test_files/joins.slt | 2 +- datafusion/sqllogictest/test_files/order.slt | 2 +- datafusion/sqllogictest/test_files/union.slt | 4 ++-- datafusion/sqllogictest/test_files/window.slt | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) diff --git a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt index 662b967a89ef..1312f2916ed6 100644 --- a/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt +++ b/datafusion/sqllogictest/test_files/join_disable_repartition_joins.slt @@ -100,7 +100,7 @@ Limit: skip=0, fetch=10 physical_plan GlobalLimitExec: skip=0, fetch=10 --SortPreservingMergeExec: [a2@0 ASC NULLS LAST,b@1 ASC NULLS LAST], fetch=10 -----ProjectionExec: expr=[a@0 as a2, b@1 as b], sort_expr=[a2@0 ASC NULLS LAST,b@1 ASC NULLS LAST] +----ProjectionExec: expr=[a@0 as a2, b@1 as b] ------CoalesceBatchesExec: target_batch_size=8192 --------HashJoinExec: mode=CollectLeft, join_type=RightSemi, on=[(d@1, d@3), (c@0, c@2)] ----------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[c, d], has_header=true diff --git a/datafusion/sqllogictest/test_files/joins.slt b/datafusion/sqllogictest/test_files/joins.slt index bc4d98b2a187..67e3750113da 100644 --- a/datafusion/sqllogictest/test_files/joins.slt +++ b/datafusion/sqllogictest/test_files/joins.slt @@ -3239,7 +3239,7 @@ SortPreservingMergeExec: [rn1@10 ASC NULLS LAST] ------CoalesceBatchesExec: target_batch_size=2 --------RepartitionExec: partitioning=Hash([a@1], 2), input_partitions=2 ----------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1 -------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1], sort_expr=[a@1 ASC,b@2 ASC NULLS LAST,c@3 ASC NULLS LAST,rn1@5 ASC NULLS LAST] +------------ProjectionExec: expr=[a0@0 as a0, a@1 as a, b@2 as b, c@3 as c, d@4 as d, ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@5 as rn1] --------------BoundedWindowAggExec: wdw=[ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "ROW_NUMBER() ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: false, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }], mode=[Sorted] ----------------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a, b, c, d], output_ordering=[a@1 ASC, b@2 ASC NULLS LAST, c@3 ASC NULLS LAST], has_header=true diff --git a/datafusion/sqllogictest/test_files/order.slt b/datafusion/sqllogictest/test_files/order.slt index 69f815c40d39..77df9e0bb493 100644 --- a/datafusion/sqllogictest/test_files/order.slt +++ b/datafusion/sqllogictest/test_files/order.slt @@ -469,7 +469,7 @@ Sort: db15 ASC NULLS LAST ----TableScan: csv_with_timestamps projection=[ts] physical_plan SortPreservingMergeExec: [db15@0 ASC NULLS LAST] ---ProjectionExec: expr=[date_bin(900000000000, ts@0, 1659537600000000000) as db15], sort_expr=[db15@0 ASC NULLS LAST] +--ProjectionExec: expr=[date_bin(900000000000, ts@0, 1659537600000000000) as db15] ----RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1 ------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/datafusion/core/tests/data/timestamps.csv]]}, projection=[ts], output_ordering=[ts@0 ASC NULLS LAST], has_header=false diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index dd49cc7b4285..afe91ea76198 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -520,9 +520,9 @@ Sort: t1.c1 ASC NULLS LAST ------TableScan: t2 projection=[c1a] physical_plan SortPreservingMergeExec: [c1@0 ASC NULLS LAST] ---UnionExec: sort_expr=[c1@0 ASC NULLS LAST] +--UnionExec ----CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1], output_ordering=[c1@0 ASC NULLS LAST], has_header=true -----ProjectionExec: expr=[c1a@0 as c1], sort_expr=[c1@0 ASC NULLS LAST] +----ProjectionExec: expr=[c1a@0 as c1] ------CsvExec: file_groups={1 group: [[WORKSPACE_ROOT/testing/data/csv/aggregate_test_100.csv]]}, projection=[c1a], output_ordering=[c1a@0 ASC NULLS LAST], has_header=true statement ok diff --git a/datafusion/sqllogictest/test_files/window.slt b/datafusion/sqllogictest/test_files/window.slt index 8b22dfb9ec5a..7b628f9b6f14 100644 --- a/datafusion/sqllogictest/test_files/window.slt +++ b/datafusion/sqllogictest/test_files/window.slt @@ -1229,7 +1229,7 @@ Projection: aggregate_test_100.c2, MAX(aggregate_test_100.c9) ORDER BY [aggregat ------WindowAggr: windowExpr=[[MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW]] --------TableScan: aggregate_test_100 projection=[c2, c9] physical_plan -ProjectionExec: expr=[c2@0 as c2, MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW], sort_expr=[c2@0 ASC NULLS LAST] +ProjectionExec: expr=[c2@0 as c2, MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@3 as MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW, SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING@4 as SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING, MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW@2 as MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW] --WindowAggExec: wdw=[SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING: Ok(Field { name: "SUM(aggregate_test_100.c9) ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Rows, start_bound: Preceding(UInt64(NULL)), end_bound: Following(UInt64(NULL)) }] ----BoundedWindowAggExec: wdw=[MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MAX(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow }], mode=[Sorted] ------BoundedWindowAggExec: wdw=[MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW: Ok(Field { name: "MIN(aggregate_test_100.c9) ORDER BY [aggregate_test_100.c2 ASC NULLS LAST, aggregate_test_100.c9 ASC NULLS LAST] RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW", data_type: UInt64, nullable: true, dict_id: 0, dict_is_ordered: false, metadata: {} }), frame: WindowFrame { units: Range, start_bound: Preceding(Int8(NULL)), end_bound: CurrentRow }], mode=[Sorted] From 738f2b4b1b932dee4668dd5235f04ab404869a36 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 13:19:23 +0100 Subject: [PATCH 25/27] VTX-4075: cleanup --- datafusion/sqllogictest/test_files/union.slt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/sqllogictest/test_files/union.slt b/datafusion/sqllogictest/test_files/union.slt index afe91ea76198..b4e338875e24 100644 --- a/datafusion/sqllogictest/test_files/union.slt +++ b/datafusion/sqllogictest/test_files/union.slt @@ -383,7 +383,7 @@ explain SELECT c1, c9 FROM aggregate_test_100 UNION ALL SELECT c1, c3 FROM aggre logical_plan Limit: skip=0, fetch=5 --Sort: aggregate_test_100.c9 DESC NULLS FIRST, fetch=5 -----Union sort_expr=[c9@1 DESC] +----Union ------Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c9 AS Int64) AS c9 --------TableScan: aggregate_test_100 projection=[c1, c9] ------Projection: aggregate_test_100.c1, CAST(aggregate_test_100.c3 AS Int64) AS c9 From a912f393a17ce0b68159ab99b82be5f11a6036a4 Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 13:38:57 +0100 Subject: [PATCH 26/27] VTX-4075: revert even verbose out, don't want to mess with tests --- datafusion/physical-plan/src/projection.rs | 39 ++++++++-------------- datafusion/physical-plan/src/union.rs | 13 +------- 2 files changed, 15 insertions(+), 37 deletions(-) diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index 82a877f25632..aa2e3a21e042 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -126,34 +126,23 @@ impl DisplayAs for ProjectionExec { t: DisplayFormatType, f: &mut std::fmt::Formatter, ) -> std::fmt::Result { - let expr: Vec = self - .expr - .iter() - .map(|(e, alias)| { - let e = e.to_string(); - if &e != alias { - format!("{e} as {alias}") - } else { - e - } - }) - .collect(); match t { - DisplayFormatType::Default => { + DisplayFormatType::Default | DisplayFormatType::Verbose => { + let expr: Vec = self + .expr + .iter() + .map(|(e, alias)| { + let e = e.to_string(); + if &e != alias { + format!("{e} as {alias}") + } else { + e + } + }) + .collect(); + write!(f, "ProjectionExec: expr=[{}]", expr.join(", ")) } - DisplayFormatType::Verbose => match self.output_ordering() { - Some(exprs) => { - write!( - f, - "ProjectionExec: expr=[{}], sort_expr=[{}]", - expr.join(", "), - PhysicalSortExpr::format_list(exprs) - ) - } - _ => write!(f, "ProjectionExec: expr=[{}]", expr.join(", ")), - }, - } } } diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index f5dc7e6241dd..ab56062d0d77 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -119,20 +119,9 @@ impl DisplayAs for UnionExec { f: &mut std::fmt::Formatter, ) -> std::fmt::Result { match t { - DisplayFormatType::Default => { + DisplayFormatType::Default | DisplayFormatType::Verbose => { write!(f, "UnionExec") } - DisplayFormatType::Verbose => match self.output_ordering() { - Some(exprs) => { - write!( - f, - "UnionExec: sort_expr=[{}]", - PhysicalSortExpr::format_list(exprs) - ) - } - _ => write!(f, "UnionExec"), - }, - } } } From d885449c7c9e0436baa6430f55ddf16dff9f4aca Mon Sep 17 00:00:00 2001 From: Faiaz Sanaulla Date: Fri, 26 Jan 2024 13:39:47 +0100 Subject: [PATCH 27/27] VTX-4075: fix --- datafusion/physical-plan/src/projection.rs | 1 + datafusion/physical-plan/src/union.rs | 1 + 2 files changed, 2 insertions(+) diff --git a/datafusion/physical-plan/src/projection.rs b/datafusion/physical-plan/src/projection.rs index aa2e3a21e042..cc2ab62049ed 100644 --- a/datafusion/physical-plan/src/projection.rs +++ b/datafusion/physical-plan/src/projection.rs @@ -143,6 +143,7 @@ impl DisplayAs for ProjectionExec { write!(f, "ProjectionExec: expr=[{}]", expr.join(", ")) } + } } } diff --git a/datafusion/physical-plan/src/union.rs b/datafusion/physical-plan/src/union.rs index ab56062d0d77..14ef9c2ec27b 100644 --- a/datafusion/physical-plan/src/union.rs +++ b/datafusion/physical-plan/src/union.rs @@ -122,6 +122,7 @@ impl DisplayAs for UnionExec { DisplayFormatType::Default | DisplayFormatType::Verbose => { write!(f, "UnionExec") } + } } }