Skip to content

Commit

Permalink
refactor(core): drop value-bag dependency for statistics (#227)
Browse files Browse the repository at this point in the history
value-bag was added for easy ser/deserialization of the statistics.
However, after second thought, this wouldn't be possible to be
compatible with serde API: when someone wants to deserialize something,
they need to know the underlying type to deserialize, and this info is
erased behind `dyn`.

Therefore, the correct approach to enable serialization for the
properties is to have two new methods on the PropertyBuilder / CostModel
trait: `serialize_stats(&self, &dyn Any) -> serde_json::Value` and
`deserialize_stats(&self, serde_json::Value) -> Box<dyn Any>`.

---------

Signed-off-by: Alex Chi <[email protected]>
  • Loading branch information
skyzh authored Nov 8, 2024
1 parent 4f84645 commit 75f9fe6
Show file tree
Hide file tree
Showing 8 changed files with 23 additions and 156 deletions.
152 changes: 9 additions & 143 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion optd-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,4 @@ itertools = "0.13"
serde = { version = "1.0", features = ["derive", "rc"] }
arrow-schema = "47.0.0"
chrono = "0.4"
value-bag = { version = "1", features = ["owned"] }
erased-serde = "0.4"
6 changes: 3 additions & 3 deletions optd-core/src/cascades/memo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ impl<T: NodeType> std::fmt::Display for MemoPlanNode<T> {
}
}

#[derive(Debug, Clone)]
#[derive(Clone)]
pub struct WinnerInfo {
pub expr_id: ExprId,
pub total_weighted_cost: f64,
Expand All @@ -51,7 +51,7 @@ pub struct WinnerInfo {
pub statistics: Arc<Statistics>,
}

#[derive(Debug, Clone)]
#[derive(Clone)]
pub enum Winner {
Unknown,
Impossible,
Expand Down Expand Up @@ -81,7 +81,7 @@ impl Default for Winner {
}
}

#[derive(Default, Debug, Clone)]
#[derive(Default, Clone)]
pub struct GroupInfo {
pub winner: Winner,
}
Expand Down
3 changes: 1 addition & 2 deletions optd-core/src/cost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ use crate::cascades::{CascadesOptimizer, Memo, RelNodeContext};
use crate::nodes::{ArcPredNode, NodeType};

/// The statistics of a group.
#[derive(Clone, Debug)]
pub struct Statistics(pub value_bag::OwnedValueBag);
pub struct Statistics(pub Box<dyn std::any::Any + Send + Sync + 'static>);

/// The cost of an operation. The cost is represented as a vector of double values.
/// For example, it can be represented as `[compute_cost, io_cost]`.
Expand Down
2 changes: 1 addition & 1 deletion optd-core/src/nodes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -358,7 +358,7 @@ impl<T: NodeType> PredNode<T> {
}

/// Metadata for a rel node.
#[derive(Clone, Debug)]
#[derive(Clone)]
pub struct PlanNodeMeta {
/// The group (id) of the `RelNode`
pub group_id: GroupId,
Expand Down
1 change: 0 additions & 1 deletion optd-datafusion-repr/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -22,4 +22,3 @@ camelpaste = "0.1"
datafusion-expr = "32.0.0"
serde = { version = "1.0", features = ["derive"] }
bincode = "1.3.3"
value-bag = { version = "1", features = ["owned"] }
10 changes: 7 additions & 3 deletions optd-datafusion-repr/src/cost/base_cost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,14 @@ use std::collections::HashMap;
use itertools::Itertools;
use optd_core::cascades::{CascadesOptimizer, NaiveMemo, RelNodeContext};
use optd_core::cost::{Cost, CostModel, Statistics};
use value_bag::ValueBag;

use crate::plan_nodes::{ArcDfPredNode, ConstantPred, DfNodeType, DfReprPredNode};

#[derive(Debug, Clone)]
pub struct DfStatistics {
row_cnt: f64,
}

pub struct DfCostModel {
table_stat: HashMap<String, usize>,
}
Expand All @@ -31,15 +35,15 @@ impl DfCostModel {
}

pub fn row_cnt(Statistics(stat): &Statistics) -> f64 {
stat.by_ref().as_f64()
stat.downcast_ref::<DfStatistics>().unwrap().row_cnt
}

pub fn cost(compute_cost: f64, io_cost: f64) -> Cost {
Cost(vec![compute_cost, io_cost])
}

pub fn stat(row_cnt: f64) -> Statistics {
Statistics(ValueBag::from_f64(row_cnt).to_owned())
Statistics(Box::new(DfStatistics { row_cnt }))
}

pub fn cost_tuple(Cost(cost): &Cost) -> (f64, f64) {
Expand Down
3 changes: 1 addition & 2 deletions optd-datafusion-repr/src/testing/dummy_cost.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

use optd_core::cascades::{CascadesOptimizer, NaiveMemo, RelNodeContext};
use optd_core::cost::{Cost, CostModel, Statistics};
use value_bag::ValueBag;

use crate::plan_nodes::{ArcDfPredNode, DfNodeType};

Expand Down Expand Up @@ -36,7 +35,7 @@ impl CostModel<DfNodeType, NaiveMemo<DfNodeType>> for DummyCostModel {
_: Option<RelNodeContext>,
_: Option<&CascadesOptimizer<DfNodeType>>,
) -> Statistics {
Statistics(ValueBag::empty().to_owned())
Statistics(Box::new(()))
}

fn explain_cost(&self, _: &Cost) -> String {
Expand Down

0 comments on commit 75f9fe6

Please sign in to comment.