Skip to content

Commit

Permalink
refactor(plannertest): separate each TPC-H query (#256)
Browse files Browse the repository at this point in the history
Signed-off-by: Alex Chi Z <[email protected]>
  • Loading branch information
skyzh authored Dec 7, 2024
1 parent 93aa6be commit 3fd39a8
Show file tree
Hide file tree
Showing 47 changed files with 2,657 additions and 3,174 deletions.
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion optd-sqlplannertest/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ repository = { workspace = true }
[dependencies]
clap = { version = "4.5.4", features = ["derive"] }
anyhow = { version = "1", features = ["backtrace"] }
sqlplannertest = "0.2"
sqlplannertest = "0.3"
async-trait = "0.1"
datafusion-optd-cli = { path = "../datafusion-optd-cli", version = "32.0.0" }
optd-datafusion-repr-adv-cost = { path = "../optd-datafusion-repr-adv-cost", version = "0.1" }
Expand Down
8 changes: 4 additions & 4 deletions optd-sqlplannertest/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -321,12 +321,12 @@ impl DatafusionDBMS {
#[async_trait]
impl sqlplannertest::PlannerTestRunner for DatafusionDBMS {
async fn run(&mut self, test_case: &sqlplannertest::ParsedTestCase) -> Result<String> {
if !test_case.before_sql.is_empty() {
panic!("before is not supported in optd-sqlplannertest, always specify the task type to run");
}

let mut result = String::new();
let r = &mut result;
for sql in &test_case.before_sql {
// We drop output of before statements
self.execute(sql, &TestFlags::default()).await?;
}
for task in &test_case.tasks {
let flags = extract_flags(task)?;
if task.starts_with("execute") {
Expand Down
30 changes: 30 additions & 0 deletions optd-sqlplannertest/tests/joins/join_enumerate.planner.sql
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ select * from t2, t1 where t1v1 = t2v1;
(Join t1 t2)
(Join t2 t1)
(Join t1 t2)
(Join t2 t1)
0 200 0 0
1 201 1 1
2 202 2 2
Expand All @@ -37,6 +40,15 @@ select * from t2, t1, t3 where t1v1 = t2v1 and t1v1 = t3v2;
(Join (Join t2 t1) t3)
(Join (Join t3 t1) t2)
(Join t2 (Join t1 t3))
(Join t2 (Join t3 t1))
(Join t3 (Join t1 t2))
(Join t3 (Join t2 t1))
(Join (Join t1 t2) t3)
(Join (Join t1 t3) t2)
(Join (Join t2 t1) t3)
(Join (Join t3 t1) t2)
0 200 0 0 0 300
1 201 1 1 1 301
2 202 2 2 2 302
Expand All @@ -55,6 +67,15 @@ select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2;
(Join (Join t2 t1) t3)
(Join (Join t3 t1) t2)
(Join t2 (Join t1 t3))
(Join t2 (Join t3 t1))
(Join t3 (Join t1 t2))
(Join t3 (Join t2 t1))
(Join (Join t1 t2) t3)
(Join (Join t1 t3) t2)
(Join (Join t2 t1) t3)
(Join (Join t3 t1) t2)
0 200 0 0 0 300
1 201 1 1 1 301
2 202 2 2 2 302
Expand All @@ -73,6 +94,15 @@ select * from t1, t2, t3 where t1v1 = t2v1 and t1v2 = t3v2;
(Join (Join t2 t1) t3)
(Join (Join t3 t1) t2)
(Join t2 (Join t1 t3))
(Join t2 (Join t3 t1))
(Join t3 (Join t1 t2))
(Join t3 (Join t2 t1))
(Join (Join t1 t2) t3)
(Join (Join t1 t3) t2)
(Join (Join t2 t1) t3)
(Join (Join t3 t1) t2)
0 0 0 200 0 300
1 1 1 201 1 301
2 2 2 202 2 302
Expand Down
5 changes: 4 additions & 1 deletion optd-sqlplannertest/tests/joins/join_enumerate.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,24 +11,27 @@
select * from t2, t1 where t1v1 = t2v1;
desc: Test whether the optimizer enumerates all 2-join orders.
tasks:
# well actually pruning doesn't matter b/c join order is logical, but we are now missing join orders with t1 as the outer table
- explain[disable_pruning]:logical_join_orders
- explain:logical_join_orders
- execute
- sql: |
select * from t2, t1, t3 where t1v1 = t2v1 and t1v1 = t3v2;
desc: Test whether the optimizer enumerates all 3-join orders. (It should)
tasks:
- explain[disable_pruning]:logical_join_orders
- explain:logical_join_orders
- execute
- sql: |
select * from t2, t1, t3 where t1v1 = t2v1 and t1v2 = t3v2;
desc: Test whether the optimizer enumerates all 3-join orders. (It don't currently)
tasks:
- explain[disable_pruning]:logical_join_orders
- explain:logical_join_orders
- execute
- sql: |
select * from t1, t2, t3 where t1v1 = t2v1 and t1v2 = t3v2;
desc: Test whether the optimizer enumerates all 3-join orders. (It don't currently)
tasks:
- explain[disable_pruning]:logical_join_orders
- explain:logical_join_orders
- execute
30 changes: 18 additions & 12 deletions optd-sqlplannertest/tests/joins/self-join.planner.sql
Original file line number Diff line number Diff line change
Expand Up @@ -10,22 +10,28 @@ insert into t2 values (0, 200), (1, 201), (2, 202);
*/

-- test self join
select * from t1 as a, t1 as b where a.t1v1 = b.t1v1;
select * from t1 as a, t1 as b where a.t1v1 = b.t1v1 order by a.t1v1;

/*
(Join t1 t1)
LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
└── LogicalFilter
├── cond:Eq
│ ├── #0
│ └── #2
└── LogicalJoin { join_type: Cross, cond: true }
├── LogicalScan { table: t1 }
└── LogicalScan { table: t1 }
PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
├── PhysicalScan { table: t1 }
└── PhysicalScan { table: t1 }
LogicalSort
├── exprs:SortOrder { order: Asc }
│ └── #0
└── LogicalProjection { exprs: [ #0, #1, #2, #3 ] }
└── LogicalFilter
├── cond:Eq
│ ├── #0
│ └── #2
└── LogicalJoin { join_type: Cross, cond: true }
├── LogicalScan { table: t1 }
└── LogicalScan { table: t1 }
PhysicalSort
├── exprs:SortOrder { order: Asc }
│ └── #0
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ] }
├── PhysicalScan { table: t1 }
└── PhysicalScan { table: t1 }
0 0 0 0
1 1 1 1
2 2 2 2
Expand Down
2 changes: 1 addition & 1 deletion optd-sqlplannertest/tests/joins/self-join.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
tasks:
- execute
- sql: |
select * from t1 as a, t1 as b where a.t1v1 = b.t1v1;
select * from t1 as a, t1 as b where a.t1v1 = b.t1v1 order by a.t1v1;
desc: test self join
tasks:
- explain:logical_join_orders,logical_optd,physical_optd
Expand Down
138 changes: 137 additions & 1 deletion optd-sqlplannertest/tests/subqueries/subquery_unnesting.planner.sql
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ create table t3(t3v2 int, t3v4 int);
*/

-- Test whether the optimizer can unnest correlated subqueries.
-- Test whether the optimizer can unnest correlated subqueries with (scalar op agg)
select * from t1 where (select sum(t2v3) from t2 where t2v1 = t1v1) > 100;

/*
Expand Down Expand Up @@ -74,6 +74,142 @@ PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=18005,io=3000}, stat: {ro
└── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
*/

-- Test whether the optimizer can unnest correlated subqueries with (scalar op group agg)
select * from t1 where (select sum(sumt2v3) from (select t2v1, sum(t2v3) as sumt2v3 from t2 where t2v1 = t1v1 group by t2v1)) > 100;

/*
LogicalProjection { exprs: [ #0, #1 ] }
└── LogicalFilter
├── cond:Gt
│ ├── #2
│ └── 100(i64)
└── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] }
├── LogicalScan { table: t1 }
└── LogicalProjection { exprs: [ #0 ] }
└── LogicalAgg
├── exprs:Agg(Sum)
│ └── [ #1 ]
├── groups: []
└── LogicalProjection { exprs: [ #0, #1 ] }
└── LogicalAgg
├── exprs:Agg(Sum)
│ └── [ Cast { cast_to: Int64, child: #1 } ]
├── groups: [ #0 ]
└── LogicalFilter
├── cond:Eq
│ ├── #0
│ └── Extern(#0)
└── LogicalScan { table: t2 }
LogicalProjection { exprs: [ #0, #1 ] }
└── LogicalFilter
├── cond:Gt
│ ├── #2
│ └── 100(i64)
└── LogicalProjection { exprs: [ #0, #1, #3 ] }
└── LogicalJoin
├── join_type: Inner
├── cond:Eq
│ ├── #0
│ └── #2
├── LogicalScan { table: t1 }
└── LogicalProjection { exprs: [ #0, #1 ] }
└── LogicalAgg
├── exprs:Agg(Sum)
│ └── [ #2 ]
├── groups: [ #0 ]
└── LogicalProjection { exprs: [ #0, #1, #2 ] }
└── LogicalAgg
├── exprs:Agg(Sum)
│ └── [ Cast { cast_to: Int64, child: #2 } ]
├── groups: [ #0, #1 ]
└── LogicalFilter
├── cond:Eq
│ ├── #1
│ └── #0
└── LogicalJoin { join_type: Inner, cond: true }
├── LogicalAgg { exprs: [], groups: [ #0 ] }
│ └── LogicalScan { table: t1 }
└── LogicalScan { table: t2 }
PhysicalProjection { exprs: [ #2, #3 ], cost: {compute=25005,io=3000}, stat: {row_cnt=1} }
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=25002,io=3000}, stat: {row_cnt=1} }
├── PhysicalFilter
│ ├── cond:Gt
│ │ ├── #1
│ │ └── 100(i64)
│ ├── cost: {compute=24000,io=2000}
│ ├── stat: {row_cnt=1}
│ └── PhysicalAgg
│ ├── aggrs:Agg(Sum)
│ │ └── [ #2 ]
│ ├── groups: [ #0 ]
│ ├── cost: {compute=21000,io=2000}
│ ├── stat: {row_cnt=1000}
│ └── PhysicalAgg
│ ├── aggrs:Agg(Sum)
│ │ └── [ Cast { cast_to: Int64, child: #2 } ]
│ ├── groups: [ #0, #1 ]
│ ├── cost: {compute=15000,io=2000}
│ ├── stat: {row_cnt=1000}
│ └── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} }
│ ├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} }
│ │ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
│ └── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
*/

-- Test whether the optimizer can unnest correlated subqueries with scalar agg in select list
select t1v1, (select sum(t2v3) from t2 where t2v1 = t1v1) as sum from t1;

/*
LogicalProjection { exprs: [ #0, #2 ] }
└── RawDependentJoin { join_type: Cross, cond: true, extern_cols: [ Extern(#0) ] }
├── LogicalScan { table: t1 }
└── LogicalProjection { exprs: [ #0 ] }
└── LogicalAgg
├── exprs:Agg(Sum)
│ └── [ Cast { cast_to: Int64, child: #1 } ]
├── groups: []
└── LogicalFilter
├── cond:Eq
│ ├── #0
│ └── Extern(#0)
└── LogicalScan { table: t2 }
LogicalProjection { exprs: [ #0, #2 ] }
└── LogicalProjection { exprs: [ #0, #1, #3 ] }
└── LogicalJoin
├── join_type: Inner
├── cond:Eq
│ ├── #0
│ └── #2
├── LogicalScan { table: t1 }
└── LogicalProjection { exprs: [ #0, #1 ] }
└── LogicalAgg
├── exprs:Agg(Sum)
│ └── [ Cast { cast_to: Int64, child: #2 } ]
├── groups: [ #0 ]
└── LogicalFilter
├── cond:Eq
│ ├── #1
│ └── #0
└── LogicalJoin { join_type: Inner, cond: true }
├── LogicalAgg { exprs: [], groups: [ #0 ] }
│ └── LogicalScan { table: t1 }
└── LogicalScan { table: t2 }
PhysicalProjection { exprs: [ #0, #3 ], cost: {compute=20000,io=3000}, stat: {row_cnt=1000} }
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=17000,io=3000}, stat: {row_cnt=1000} }
├── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalAgg
├── aggrs:Agg(Sum)
│ └── [ Cast { cast_to: Int64, child: #2 } ]
├── groups: [ #0 ]
├── cost: {compute=14000,io=2000}
├── stat: {row_cnt=1000}
└── PhysicalHashJoin { join_type: Inner, left_keys: [ #0 ], right_keys: [ #0 ], cost: {compute=6000,io=2000}, stat: {row_cnt=1000} }
├── PhysicalAgg { aggrs: [], groups: [ #0 ], cost: {compute=3000,io=1000}, stat: {row_cnt=1000} }
│ └── PhysicalScan { table: t1, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
└── PhysicalScan { table: t2, cost: {compute=0,io=1000}, stat: {row_cnt=1000} }
*/

-- Test whether the optimizer can unnest correlated subqueries.
select * from t1 where (select sum(t2v3) from (select * from t2, t3 where t2v1 = t1v1 and t2v3 = t3v2)) > 100;

Expand Down
18 changes: 17 additions & 1 deletion optd-sqlplannertest/tests/subqueries/subquery_unnesting.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,25 @@
# - explain_logical
- sql: |
select * from t1 where (select sum(t2v3) from t2 where t2v1 = t1v1) > 100;
desc: Test whether the optimizer can unnest correlated subqueries.
desc: Test whether the optimizer can unnest correlated subqueries with (scalar op agg)
tasks:
- explain[verbose]:logical_optd,optimized_logical_optd,physical_optd
- sql: |
select * from t1 where (select sum(sumt2v3) from (select t2v1, sum(t2v3) as sumt2v3 from t2 where t2v1 = t1v1 group by t2v1)) > 100;
desc: Test whether the optimizer can unnest correlated subqueries with (scalar op group agg)
tasks:
- explain[verbose]:logical_optd,optimized_logical_optd,physical_optd
- sql: |
select t1v1, (select sum(t2v3) from t2 where t2v1 = t1v1) as sum from t1;
desc: Test whether the optimizer can unnest correlated subqueries with scalar agg in select list
tasks:
- explain[verbose]:logical_optd,optimized_logical_optd,physical_optd
# - sql: |
# select * from t1 where exists (select * from t2 where t2v1 = t1v1);
# desc: Test whether the optimizer can unnest correlated subqueries with exists
# tasks:
# - explain[verbose]:logical_optd,optimized_logical_optd,physical_optd
# todo: a test case on quantifier (any/all)
- sql: |
select * from t1 where (select sum(t2v3) from (select * from t2, t3 where t2v1 = t1v1 and t2v3 = t3v2)) > 100;
desc: Test whether the optimizer can unnest correlated subqueries.
Expand Down
Loading

0 comments on commit 3fd39a8

Please sign in to comment.