Skip to content

Commit

Permalink
Fix not passing all columns through (I think this was a bug?)
Browse files Browse the repository at this point in the history
  • Loading branch information
jurplel committed Dec 7, 2024
1 parent b9c70ee commit 0fc8cb9
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 66 deletions.
2 changes: 1 addition & 1 deletion optd-datafusion-repr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ pub use optd_core::nodes::Value;
use optd_core::optimizer::Optimizer;
use optd_core::rules::Rule;
pub use optimizer_ext::OptimizerExt;
use plan_nodes::{ArcDfPlanNode, DfNodeType};
use plan_nodes::{ArcDfPlanNode, DfNodeType, DfReprPlanNode};
use properties::column_ref::ColumnRefPropertyBuilder;
use properties::schema::{Catalog, SchemaPropertyBuilder};

Expand Down
4 changes: 1 addition & 3 deletions optd-datafusion-repr/src/rules/subquery/depjoin_pushdown.rs
Original file line number Diff line number Diff line change
Expand Up @@ -391,9 +391,7 @@ fn apply_dep_join_past_agg(
new_outer_join.into_plan_node(),
ListPred::new(
(0..left_schema_size)
.chain(
left_schema_size + new_agg_groups_size..left_schema_size + new_agg_schema_size,
)
.chain(left_schema_size + left_schema_size..left_schema_size + new_agg_schema_size)
.map(|x| {
// Count(*) special case: We want all NULLs to be transformed into 0s.
if x >= left_schema_size + new_agg_groups_size {
Expand Down
182 changes: 120 additions & 62 deletions optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql
Original file line number Diff line number Diff line change
Expand Up @@ -132,48 +132,87 @@ LogicalProjection { exprs: [ #0, #1 ] }
│ └── #2
├── LogicalScan { table: t1 }
└── LogicalProjection { exprs: [ #0, #1 ] }
└── LogicalAgg
├── exprs:Agg(Sum)
│ └── [ #2 ]
├── groups: [ #0 ]
└── LogicalProjection { exprs: [ #0, #1, #2 ] }
└── LogicalProjection { exprs: [ #0, #2 ] }
└── LogicalJoin
├── join_type: LeftOuter
├── cond:And
│ └── Eq
│ ├── #0
│ └── #1
├── LogicalAgg { exprs: [], groups: [ #0 ] }
│ └── LogicalScan { table: t1 }
└── LogicalAgg
├── exprs:Agg(Sum)
│ └── [ #2 ]
├── groups: [ #0 ]
└── LogicalProjection { exprs: [ #0, #1, #2 ] }
└── LogicalProjection { exprs: [ #0, #2, #3 ] }
└── LogicalJoin
├── join_type: LeftOuter
├── cond:And
│ └── Eq
│ ├── #0
│ └── #1
├── LogicalAgg { exprs: [], groups: [ #0 ] }
│ └── LogicalScan { table: t1 }
└── LogicalAgg
├── exprs:Agg(Sum)
│ └── [ Cast { cast_to: Int64, child: #2 } ]
├── groups: [ #0, #1 ]
└── LogicalFilter
├── cond:Eq
│ ├── #1
│ └── #0
└── LogicalJoin { join_type: Inner, cond: true }
├── LogicalAgg { exprs: [], groups: [ #0 ] }
│ └── LogicalScan { table: t1 }
└── LogicalScan { table: t2 }
PhysicalProjection { exprs: [ #0, #1 ], cost: {compute=44228003,io=5000}, stat: {row_cnt=1} }
└── PhysicalFilter
├── cond:Gt
│ ├── #4
│ └── 100(i64)
├── cost: {compute=44228000,io=5000}
├── stat: {row_cnt=1}
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=44225000,io=5000}, stat: {row_cnt=1000} }
├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalNestedLoopJoin
├── join_type: LeftOuter
├── cond:And
│ └── Eq
│ ├── #0
│ └── #1
├── cost: {compute=44123000,io=4000}
├── stat: {row_cnt=100000}
├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} }
│ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalAgg
├── aggrs:Agg(Sum)
│ └── [ #2 ]
├── groups: [ #0 ]
├── cost: {compute=4119000,io=3000}
├── stat: {row_cnt=10000}
└── PhysicalProjection { exprs: [ #0, #2, #3 ], cost: {compute=4059000,io=3000}, stat: {row_cnt=10000} }
└── PhysicalNestedLoopJoin
├── join_type: LeftOuter
├── cond:And
│ └── Eq
│ ├── #0
│ └── #1
├── cost: {compute=4019000,io=3000}
├── stat: {row_cnt=10000}
├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} }
│ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalAgg
├── aggrs:Agg(Sum)
│ └── [ Cast { cast_to: Int64, child: #2 } ]
├── groups: [ #0, #1 ]
└── LogicalFilter
├── cond:Eq
│ ├── #1
│ └── #0
└── LogicalJoin { join_type: Inner, cond: true }
├── LogicalAgg { exprs: [], groups: [ #0 ] }
│ └── LogicalScan { table: t1 }
└── LogicalScan { table: t2 }
PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=25005,io=3000}, stat: {row_cnt=1} }
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=25002,io=3000}, stat: {row_cnt=1} }
├── PhysicalFilter
│ ├── cond:Gt
│ │ ├── #1
│ │ └── 100(i64)
│ ├── cost: {compute=24000,io=2000}
│ ├── stat: {row_cnt=1}
│ └── PhysicalAgg
│ ├── aggrs:Agg(Sum)
│ │ └── [ #2 ]
│ ├── groups: [ #0 ]
│ ├── cost: {compute=21000,io=2000}
│ ├── stat: {row_cnt=1000}
│ └── PhysicalAgg
│ ├── aggrs:Agg(Sum)
│ │ └── [ Cast { cast_to: Int64, child: #2 } ]
│ ├── groups: [ #0, #1 ]
│ ├── cost: {compute=15000,io=2000}
│ ├── stat: {row_cnt=1000}
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} }
│ ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} }
│ │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
│ └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
├── cost: {compute=15000,io=2000}
├── stat: {row_cnt=1000}
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} }
├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} }
│ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
*/

-- Test whether the optimizer can unnest correlated subqueries with scalar agg in select list
Expand Down Expand Up @@ -202,31 +241,50 @@ LogicalProjection { exprs: [ #0, #2 ] }
│ └── #2
├── LogicalScan { table: t1 }
└── LogicalProjection { exprs: [ #0, #1 ] }
└── LogicalAgg
├── exprs:Agg(Sum)
│ └── [ Cast { cast_to: Int64, child: #2 } ]
├── groups: [ #0 ]
└── LogicalFilter
├── cond:Eq
│ ├── #1
│ └── #0
└── LogicalJoin { join_type: Inner, cond: true }
├── LogicalAgg { exprs: [], groups: [ #0 ] }
│ └── LogicalScan { table: t1 }
└── LogicalScan { table: t2 }
PhysicalProjection { exprs: [ #0, #3 ], cost: {compute=20000,io=3000}, stat: {row_cnt=1000} }
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=17000,io=3000}, stat: {row_cnt=1000} }
└── LogicalProjection { exprs: [ #0, #2 ] }
└── LogicalJoin
├── join_type: LeftOuter
├── cond:And
│ └── Eq
│ ├── #0
│ └── #1
├── LogicalAgg { exprs: [], groups: [ #0 ] }
│ └── LogicalScan { table: t1 }
└── LogicalAgg
├── exprs:Agg(Sum)
│ └── [ Cast { cast_to: Int64, child: #2 } ]
├── groups: [ #0 ]
└── LogicalFilter
├── cond:Eq
│ ├── #1
│ └── #0
└── LogicalJoin { join_type: Inner, cond: true }
├── LogicalAgg { exprs: [], groups: [ #0 ] }
│ └── LogicalScan { table: t1 }
└── LogicalScan { table: t2 }
PhysicalProjection { exprs: [ #0, #4 ], cost: {compute=4033000,io=4000}, stat: {row_cnt=1000} }
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=4030000,io=4000}, stat: {row_cnt=1000} }
├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalAgg
├── aggrs:Agg(Sum)
│ └── [ Cast { cast_to: Int64, child: #2 } ]
├── groups: [ #0 ]
├── cost: {compute=14000,io=2000}
├── stat: {row_cnt=1000}
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} }
├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} }
│ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalNestedLoopJoin
├── join_type: LeftOuter
├── cond:And
│ └── Eq
│ ├── #0
│ └── #1
├── cost: {compute=4018000,io=3000}
├── stat: {row_cnt=10000}
├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} }
│ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalAgg
├── aggrs:Agg(Sum)
│ └── [ Cast { cast_to: Int64, child: #2 } ]
├── groups: [ #0 ]
├── cost: {compute=14000,io=2000}
├── stat: {row_cnt=1000}
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} }
├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} }
│ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
*/

-- Test whether the optimizer can unnest correlated subqueries.
Expand Down

0 comments on commit 0fc8cb9

Please sign in to comment.