From 91cd0a2154a4c200f4e47b27119d389c91a5a9ed Mon Sep 17 00:00:00 2001 From: "Alex Chi Z." <4198311+skyzh@users.noreply.github.com> Date: Wed, 13 Nov 2024 17:38:59 -0500 Subject: [PATCH] refactor(core): drop children_cost on the cost model, doc property builders (#235) As discussed today :) Signed-off-by: Alex Chi --- optd-core/src/cascades/tasks/optimize_inputs.rs | 1 - optd-core/src/cost.rs | 1 - optd-core/src/logical_property.rs | 6 ++++++ optd-core/src/physical_property.rs | 6 ++++++ optd-datafusion-repr-adv-cost/src/lib.rs | 11 ++--------- optd-datafusion-repr/src/cost/adaptive_cost.rs | 11 ++--------- optd-datafusion-repr/src/cost/base_cost.rs | 1 - optd-datafusion-repr/src/testing/dummy_cost.rs | 1 - 8 files changed, 16 insertions(+), 22 deletions(-) diff --git a/optd-core/src/cascades/tasks/optimize_inputs.rs b/optd-core/src/cascades/tasks/optimize_inputs.rs index c8c5a958..b19705bb 100644 --- a/optd-core/src/cascades/tasks/optimize_inputs.rs +++ b/optd-core/src/cascades/tasks/optimize_inputs.rs @@ -197,7 +197,6 @@ impl> Task for OptimizeInputsTask { &expr.typ, &preds, &input_statistics_ref, - &input_cost, Some(context.clone()), Some(optimizer), ); diff --git a/optd-core/src/cost.rs b/optd-core/src/cost.rs index ecb76b02..f68386d1 100644 --- a/optd-core/src/cost.rs +++ b/optd-core/src/cost.rs @@ -23,7 +23,6 @@ pub trait CostModel>: 'static + Send + Sync { node: &T, predicates: &[ArcPredNode], children_stats: &[Option<&Statistics>], - children_costs: &[Cost], context: Option, optimizer: Option<&CascadesOptimizer>, ) -> Cost; diff --git a/optd-core/src/logical_property.rs b/optd-core/src/logical_property.rs index d507a1a8..97687324 100644 --- a/optd-core/src/logical_property.rs +++ b/optd-core/src/logical_property.rs @@ -8,10 +8,15 @@ use std::fmt::{Debug, Display}; use crate::nodes::{ArcPredNode, NodeType}; +/// The trait enables we store any logical property in the memo table by erasing the concrete type. +/// In the future, we can implement `serialize`/`deserialize` on this trait so that we can serialize +/// the logical properties. pub trait LogicalProperty: 'static + Any + Send + Sync + Debug + Display { fn as_any(&self) -> &dyn Any; } +/// A wrapper around the `LogicalPropertyBuilder` so that we can erase the concrete type and store +/// it safely in the memo table. pub trait LogicalPropertyBuilderAny: 'static + Send + Sync { fn derive_any( &self, @@ -22,6 +27,7 @@ pub trait LogicalPropertyBuilderAny: 'static + Send + Sync { fn property_name(&self) -> &'static str; } +/// The trait for building logical properties for a plan node. pub trait LogicalPropertyBuilder: 'static + Send + Sync + Sized { type Prop: LogicalProperty + Sized + Clone; diff --git a/optd-core/src/physical_property.rs b/optd-core/src/physical_property.rs index 60a96725..697d2de9 100644 --- a/optd-core/src/physical_property.rs +++ b/optd-core/src/physical_property.rs @@ -12,11 +12,16 @@ use itertools::Itertools; use crate::nodes::{ArcPredNode, NodeType, PlanNode, PlanNodeOrGroup}; +/// The trait enables we store any physical property in the memo table by erasing the concrete type. +/// In the future, we can implement `serialize`/`deserialize` on this trait so that we can serialize +/// the physical properties. pub trait PhysicalProperty: 'static + Any + Send + Sync + Debug + Display { fn as_any(&self) -> &dyn Any; fn to_boxed(&self) -> Box; } +/// A wrapper around the `PhysicalPropertyBuilder` so that we can erase the concrete type and store +/// it safely in the memo table. pub trait PhysicalPropertyBuilderAny: 'static + Send + Sync { fn derive_any( &self, @@ -41,6 +46,7 @@ pub trait PhysicalPropertyBuilderAny: 'static + Send + Sync { fn property_name(&self) -> &'static str; } +/// The trait for building physical properties for a plan node. pub trait PhysicalPropertyBuilder: 'static + Send + Sync + Sized { type Prop: PhysicalProperty + Clone + Sized; diff --git a/optd-datafusion-repr-adv-cost/src/lib.rs b/optd-datafusion-repr-adv-cost/src/lib.rs index 2205ca8f..8d409ccd 100644 --- a/optd-datafusion-repr-adv-cost/src/lib.rs +++ b/optd-datafusion-repr-adv-cost/src/lib.rs @@ -61,18 +61,11 @@ impl CostModel> for AdvancedCostModel { node: &DfNodeType, predicates: &[ArcDfPredNode], children_stats: &[Option<&Statistics>], - children_costs: &[Cost], context: Option, optimizer: Option<&CascadesOptimizer>, ) -> Cost { - self.base_model.compute_operation_cost( - node, - predicates, - children_stats, - children_costs, - context, - optimizer, - ) + self.base_model + .compute_operation_cost(node, predicates, children_stats, context, optimizer) } fn derive_statistics( diff --git a/optd-datafusion-repr/src/cost/adaptive_cost.rs b/optd-datafusion-repr/src/cost/adaptive_cost.rs index 8a6c7b29..653d48d0 100644 --- a/optd-datafusion-repr/src/cost/adaptive_cost.rs +++ b/optd-datafusion-repr/src/cost/adaptive_cost.rs @@ -67,7 +67,6 @@ impl CostModel> for AdaptiveCostModel { node: &DfNodeType, predicates: &[ArcDfPredNode], children: &[Option<&Statistics>], - children_cost: &[Cost], context: Option, optimizer: Option<&CascadesOptimizer>, ) -> Cost { @@ -75,14 +74,8 @@ impl CostModel> for AdaptiveCostModel { let row_cnt = self.get_row_cnt(&context); return DfCostModel::cost(0.0, row_cnt); } - self.base_model.compute_operation_cost( - node, - predicates, - children, - children_cost, - context, - optimizer, - ) + self.base_model + .compute_operation_cost(node, predicates, children, context, optimizer) } fn derive_statistics( diff --git a/optd-datafusion-repr/src/cost/base_cost.rs b/optd-datafusion-repr/src/cost/base_cost.rs index 7a91b85b..9e5dfd88 100644 --- a/optd-datafusion-repr/src/cost/base_cost.rs +++ b/optd-datafusion-repr/src/cost/base_cost.rs @@ -132,7 +132,6 @@ impl CostModel> for DfCostModel { node: &DfNodeType, predicates: &[ArcDfPredNode], children: &[Option<&Statistics>], - _: &[Cost], _context: Option, _optimizer: Option<&CascadesOptimizer>, ) -> Cost { diff --git a/optd-datafusion-repr/src/testing/dummy_cost.rs b/optd-datafusion-repr/src/testing/dummy_cost.rs index 63f0b7db..c5f24bd0 100644 --- a/optd-datafusion-repr/src/testing/dummy_cost.rs +++ b/optd-datafusion-repr/src/testing/dummy_cost.rs @@ -19,7 +19,6 @@ impl CostModel> for DummyCostModel { _: &DfNodeType, _: &[ArcDfPredNode], _: &[Option<&Statistics>], - _: &[Cost], _: Option, _: Option<&CascadesOptimizer>, ) -> Cost {