Skip to content

Commit

Permalink
test: add tpc-h q8 and fix expression parsing (#64)
Browse files Browse the repository at this point in the history
TPC-H Q8 has been broken since cross join support b/c it adds join
filter support but some expressions were not handled. This pull request
adds tests for TPC-H Q8 and implements between + cast expressions. Note
that the type inference is broken and someone should add a `Type` rel
node to df-repr in order to process types correctly.

Signed-off-by: Alex Chi <[email protected]>
  • Loading branch information
skyzh authored Feb 13, 2024
1 parent feffd5b commit 2d91160
Show file tree
Hide file tree
Showing 9 changed files with 636 additions and 39 deletions.
44 changes: 19 additions & 25 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions optd-core/src/rel_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ pub enum Value {
Float(OrderedFloat<f64>),
String(Arc<str>),
Bool(bool),
Date32(i32),
Decimal128(i128),
Serialized(Arc<[u8]>),
}

Expand All @@ -55,6 +57,8 @@ impl std::fmt::Display for Value {
Self::Float(x) => write!(f, "{x}"),
Self::String(x) => write!(f, "\"{x}\""),
Self::Bool(x) => write!(f, "{x}"),
Self::Date32(x) => write!(f, "{x}"),
Self::Decimal128(x) => write!(f, "{x}"),
Self::Serialized(x) => write!(f, "<len:{}>", x.len()),
}
}
Expand Down
2 changes: 1 addition & 1 deletion optd-datafusion-bridge/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

[dependencies]
arrow-schema = "*"
arrow-schema = "47.0.0"
datafusion = "32.0.0"
datafusion-expr = "32.0.0"
async-trait = "0.1"
Expand Down
40 changes: 34 additions & 6 deletions optd-datafusion-bridge/src/from_optd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@ use datafusion::{
};
use optd_datafusion_repr::{
plan_nodes::{
BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ConstantType, Expr, FuncExpr, FuncType,
JoinType, LogOpExpr, LogOpType, OptRelNode, OptRelNodeRef, OptRelNodeTyp, PhysicalAgg,
PhysicalEmptyRelation, PhysicalFilter, PhysicalHashJoin, PhysicalLimit,
PhysicalNestedLoopJoin, PhysicalProjection, PhysicalScan, PhysicalSort, PlanNode,
SortOrderExpr, SortOrderType,
BetweenExpr, BinOpExpr, BinOpType, CastExpr, ColumnRefExpr, ConstantExpr, ConstantType,
Expr, ExprList, FuncExpr, FuncType, JoinType, LogOpExpr, LogOpType, OptRelNode,
OptRelNodeRef, OptRelNodeTyp, PhysicalAgg, PhysicalEmptyRelation, PhysicalFilter,
PhysicalHashJoin, PhysicalLimit, PhysicalNestedLoopJoin, PhysicalProjection, PhysicalScan,
PhysicalSort, PlanNode, SortOrderExpr, SortOrderType,
},
properties::schema::Schema as OptdSchema,
PhysicalCollector,
PhysicalCollector, Value,
};

use crate::{physical_collector::CollectorExec, OptdPlanContext};
Expand Down Expand Up @@ -228,6 +228,34 @@ impl OptdPlanContext<'_> {
)) as Arc<dyn PhysicalExpr>,
)
}
OptRelNodeTyp::Between => {
// TODO: should we just convert between to x <= c1 and x >= c2?
let expr = BetweenExpr::from_rel_node(expr.into_rel_node()).unwrap();
Self::conv_from_optd_expr(
LogOpExpr::new(
LogOpType::And,
ExprList::new(vec![
BinOpExpr::new(expr.child(), expr.lower(), BinOpType::Geq).into_expr(),
BinOpExpr::new(expr.child(), expr.upper(), BinOpType::Leq).into_expr(),
]),
)
.into_expr(),
context,
)
}
OptRelNodeTyp::Cast => {
let expr = CastExpr::from_rel_node(expr.into_rel_node()).unwrap();
let child = Self::conv_from_optd_expr(expr.child(), context)?;
let data_type = match expr.cast_to() {
Value::Bool(_) => DataType::Boolean,
Value::Decimal128(_) => DataType::Decimal128(15, 2), /* TODO: AVOID HARD CODE PRECISION */
Value::Date32(_) => DataType::Date32,
other => unimplemented!("{}", other),
};
Ok(Arc::new(
datafusion::physical_plan::expressions::CastExpr::new(child, data_type, None),
))
}
_ => unimplemented!("{}", expr.into_rel_node()),
}
}
Expand Down
31 changes: 26 additions & 5 deletions optd-datafusion-bridge/src/into_optd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,14 @@ use datafusion::{
};
use datafusion_expr::Expr as DFExpr;
use optd_core::rel_node::RelNode;
use optd_datafusion_repr::plan_nodes::{
BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, Expr, ExprList, FuncExpr, FuncType,
JoinType, LogOpExpr, LogOpType, LogicalAgg, LogicalEmptyRelation, LogicalFilter, LogicalJoin,
LogicalLimit, LogicalProjection, LogicalScan, LogicalSort, OptRelNode, OptRelNodeRef,
OptRelNodeTyp, PlanNode, SortOrderExpr, SortOrderType,
use optd_datafusion_repr::{
plan_nodes::{
BetweenExpr, BinOpExpr, BinOpType, CastExpr, ColumnRefExpr, ConstantExpr, Expr, ExprList,
FuncExpr, FuncType, JoinType, LogOpExpr, LogOpType, LogicalAgg, LogicalEmptyRelation,
LogicalFilter, LogicalJoin, LogicalLimit, LogicalProjection, LogicalScan, LogicalSort,
OptRelNode, OptRelNodeRef, OptRelNodeTyp, PlanNode, SortOrderExpr, SortOrderType,
},
Value,
};

use crate::OptdPlanContext;
Expand Down Expand Up @@ -151,6 +154,24 @@ impl OptdPlanContext<'_> {
)
.into_expr())
}
Expr::Between(x) => {
let expr = self.conv_into_optd_expr(x.expr.as_ref(), context)?;
let low = self.conv_into_optd_expr(x.low.as_ref(), context)?;
let high = self.conv_into_optd_expr(x.high.as_ref(), context)?;
assert!(!x.negated, "unimplemented");
Ok(BetweenExpr::new(expr, low, high).into_expr())
}
Expr::Cast(x) => {
let expr = self.conv_into_optd_expr(x.expr.as_ref(), context)?;
let data_type = x.data_type.clone();
let val = match data_type {
arrow_schema::DataType::Int8 => Value::Int8(0),
arrow_schema::DataType::Date32 => Value::Date32(0),
arrow_schema::DataType::Decimal128(_, _) => Value::Decimal128(0),
other => unimplemented!("unimplemented datatype {:?}", other),
};
Ok(CastExpr::new(expr, val).into_expr())
}
_ => bail!("Unsupported expression: {:?}", expr),
}
}
Expand Down
15 changes: 13 additions & 2 deletions optd-datafusion-repr/src/plan_nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,9 @@ pub use agg::{LogicalAgg, PhysicalAgg};
pub use apply::{ApplyType, LogicalApply};
pub use empty_relation::{LogicalEmptyRelation, PhysicalEmptyRelation};
pub use expr::{
BinOpExpr, BinOpType, ColumnRefExpr, ConstantExpr, ConstantType, ExprList, FuncExpr, FuncType,
LogOpExpr, LogOpType, SortOrderExpr, SortOrderType, UnOpExpr, UnOpType,
BetweenExpr, BinOpExpr, BinOpType, CastExpr, ColumnRefExpr, ConstantExpr, ConstantType,
ExprList, FuncExpr, FuncType, LogOpExpr, LogOpType, SortOrderExpr, SortOrderType, UnOpExpr,
UnOpType,
};
pub use filter::{LogicalFilter, PhysicalFilter};
pub use join::{JoinType, LogicalJoin, PhysicalHashJoin, PhysicalNestedLoopJoin};
Expand Down Expand Up @@ -73,6 +74,8 @@ pub enum OptRelNodeTyp {
LogOp(LogOpType),
Func(FuncType),
SortOrder(SortOrderType),
Between,
Cast,
}

impl OptRelNodeTyp {
Expand Down Expand Up @@ -111,6 +114,8 @@ impl OptRelNodeTyp {
| Self::Func(_)
| Self::SortOrder(_)
| Self::LogOp(_)
| Self::Between
| Self::Cast
)
}
}
Expand Down Expand Up @@ -371,6 +376,12 @@ pub fn explain(rel_node: OptRelNodeRef) -> Pretty<'static> {
OptRelNodeTyp::PhysicalLimit => PhysicalLimit::from_rel_node(rel_node)
.unwrap()
.dispatch_explain(),
OptRelNodeTyp::Between => BetweenExpr::from_rel_node(rel_node)
.unwrap()
.dispatch_explain(),
OptRelNodeTyp::Cast => CastExpr::from_rel_node(rel_node)
.unwrap()
.dispatch_explain(),
}
}

Expand Down
Loading

0 comments on commit 2d91160

Please sign in to comment.