Skip to content

Commit

Permalink
Update DataFusion to 43 (#1125)
Browse files Browse the repository at this point in the history
* Update DataFusions to 43

* Fmt

* Debug

* Remove comment

* Update proto

* Update common proto as well
  • Loading branch information
Dandandan authored Dec 19, 2024
1 parent 80c2c56 commit 167bbf9
Show file tree
Hide file tree
Showing 13 changed files with 119 additions and 117 deletions.
9 changes: 4 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,10 @@ arrow-flight = { version = "53", features = ["flight-sql-experimental"] }
clap = { version = "4.5", features = ["derive", "cargo"] }
configure_me = { version = "0.4.0" }
configure_me_codegen = { version = "0.4.4" }
# bump directly to datafusion v43 to avoid the serde bug on v42 (https://github.com/apache/datafusion/pull/12626)
datafusion = "42.0.0"
datafusion-cli = "42.0.0"
datafusion-proto = "42.0.0"
datafusion-proto-common = "42.0.0"
datafusion = "43.0.0"
datafusion-cli = "43.0.0"
datafusion-proto = "43.0.0"
datafusion-proto-common = "43.0.0"
object_store = "0.11"
prost = "0.13"
prost-types = "0.13"
Expand Down
2 changes: 1 addition & 1 deletion ballista/client/tests/context_setup.rs
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,7 @@ mod standalone {
}
}

#[derive(Default)]
#[derive(Debug, Default)]
struct BadPlanner {}

#[async_trait::async_trait]
Expand Down
174 changes: 80 additions & 94 deletions ballista/core/proto/datafusion.proto
Original file line number Diff line number Diff line change
Expand Up @@ -75,10 +75,6 @@ message LogicalExprNodeCollection {
repeated LogicalExprNode logical_expr_nodes = 1;
}

message SortExprNodeCollection {
repeated SortExprNode sort_expr_nodes = 1;
}

message ListingTableScanNode {
reserved 1; // was string table_name
TableReference table_name = 14;
Expand All @@ -94,9 +90,8 @@ message ListingTableScanNode {
datafusion_common.CsvFormat csv = 10;
datafusion_common.ParquetFormat parquet = 11;
datafusion_common.AvroFormat avro = 12;
datafusion_common.NdJsonFormat json = 15;
}
repeated SortExprNodeCollection file_sort_order = 13;
repeated LogicalExprNodeCollection file_sort_order = 13;
}

message ViewTableScanNode {
Expand Down Expand Up @@ -133,7 +128,7 @@ message SelectionNode {

message SortNode {
LogicalPlanNode input = 1;
repeated SortExprNode expr = 2;
repeated LogicalExprNode expr = 2;
// Maximum number of highest/lowest rows to fetch; negative means no limit
int64 fetch = 3;
}
Expand Down Expand Up @@ -164,12 +159,12 @@ message CreateExternalTableNode {
repeated string table_partition_cols = 5;
bool if_not_exists = 6;
string definition = 7;
repeated SortExprNodeCollection order_exprs = 10;
repeated LogicalExprNodeCollection order_exprs = 10;
bool unbounded = 11;
map<string, string> options = 8;
datafusion_common.Constraints constraints = 12;
map<string, LogicalExprNode> column_defaults = 13;
}
}

message PrepareNode {
string name = 1;
Expand Down Expand Up @@ -249,51 +244,35 @@ message DistinctNode {
message DistinctOnNode {
repeated LogicalExprNode on_expr = 1;
repeated LogicalExprNode select_expr = 2;
repeated SortExprNode sort_expr = 3;
repeated LogicalExprNode sort_expr = 3;
LogicalPlanNode input = 4;
}

message CopyToNode {
LogicalPlanNode input = 1;
string output_url = 2;
bytes file_type = 3;
repeated string partition_by = 7;
LogicalPlanNode input = 1;
string output_url = 2;
oneof format_options {
datafusion_common.CsvOptions csv = 8;
datafusion_common.JsonOptions json = 9;
datafusion_common.TableParquetOptions parquet = 10;
datafusion_common.AvroOptions avro = 11;
datafusion_common.ArrowOptions arrow = 12;
}
repeated string partition_by = 7;
}

message UnnestNode {
LogicalPlanNode input = 1;
repeated ColumnUnnestExec exec_columns = 2;
repeated ColumnUnnestListItem list_type_columns = 3;
repeated uint64 struct_type_columns = 4;
repeated uint64 dependency_indices = 5;
datafusion_common.DfSchema schema = 6;
UnnestOptions options = 7;
}
message ColumnUnnestListItem {
uint32 input_index = 1;
ColumnUnnestListRecursion recursion = 2;
}

message ColumnUnnestListRecursions {
repeated ColumnUnnestListRecursion recursions = 2;
}

message ColumnUnnestListRecursion {
datafusion_common.Column output_column = 1;
uint32 depth = 2;
}

message ColumnUnnestExec {
datafusion_common.Column column = 1;
oneof UnnestType {
ColumnUnnestListRecursions list = 2;
datafusion_common.EmptyMessage struct = 3;
datafusion_common.EmptyMessage inferred = 4;
}
LogicalPlanNode input = 1;
repeated datafusion_common.Column exec_columns = 2;
repeated uint64 list_type_columns = 3;
repeated uint64 struct_type_columns = 4;
repeated uint64 dependency_indices = 5;
datafusion_common.DfSchema schema = 6;
UnnestOptions options = 7;
}

message UnnestOptions {
bool preserve_nulls = 1;
bool preserve_nulls = 1;
}

message UnionNode {
Expand Down Expand Up @@ -337,6 +316,8 @@ message LogicalExprNode {
// binary expressions
BinaryExprNode binary_expr = 4;

// aggregate expressions
AggregateExprNode aggregate_expr = 5;

// null checks
IsNull is_null_expr = 6;
Expand All @@ -346,6 +327,7 @@ message LogicalExprNode {
BetweenNode between = 9;
CaseNode case_ = 10;
CastNode cast = 11;
SortExprNode sort = 12;
NegativeNode negative = 13;
InListNode in_list = 14;
Wildcard wildcard = 15;
Expand Down Expand Up @@ -387,7 +369,7 @@ message LogicalExprNode {
}

message Wildcard {
TableReference qualifier = 1;
string qualifier = 1;
}

message PlaceholderNode {
Expand Down Expand Up @@ -489,14 +471,57 @@ message InListNode {
bool negated = 3;
}

enum AggregateFunction {
MIN = 0;
MAX = 1;
SUM = 2;
AVG = 3;
COUNT = 4;
APPROX_DISTINCT = 5;
ARRAY_AGG = 6;
// VARIANCE = 7;
VARIANCE_POP = 8;
// COVARIANCE = 9;
// COVARIANCE_POP = 10;
STDDEV = 11;
STDDEV_POP = 12;
CORRELATION = 13;
APPROX_PERCENTILE_CONT = 14;
APPROX_MEDIAN = 15;
APPROX_PERCENTILE_CONT_WITH_WEIGHT = 16;
GROUPING = 17;
// MEDIAN = 18;
BIT_AND = 19;
BIT_OR = 20;
BIT_XOR = 21;
BOOL_AND = 22;
BOOL_OR = 23;
REGR_SLOPE = 26;
REGR_INTERCEPT = 27;
REGR_COUNT = 28;
REGR_R2 = 29;
REGR_AVGX = 30;
REGR_AVGY = 31;
REGR_SXX = 32;
REGR_SYY = 33;
REGR_SXY = 34;
STRING_AGG = 35;
NTH_VALUE_AGG = 36;
}

message AggregateExprNode {
AggregateFunction aggr_function = 1;
repeated LogicalExprNode expr = 2;
bool distinct = 3;
LogicalExprNode filter = 4;
repeated LogicalExprNode order_by = 5;
}

message AggregateUDFExprNode {
string fun_name = 1;
repeated LogicalExprNode args = 2;
bool distinct = 5;
LogicalExprNode filter = 3;
repeated SortExprNode order_by = 4;
optional bytes fun_definition = 6;
repeated LogicalExprNode order_by = 4;
}

message ScalarUDFExprNode {
Expand All @@ -506,8 +531,7 @@ message ScalarUDFExprNode {
}

enum BuiltInWindowFunction {
UNSPECIFIED = 0; // https://protobuf.dev/programming-guides/dos-donts/#unspecified-enum
// ROW_NUMBER = 0;
ROW_NUMBER = 0;
RANK = 1;
DENSE_RANK = 2;
PERCENT_RANK = 3;
Expand All @@ -522,16 +546,16 @@ enum BuiltInWindowFunction {

message WindowExprNode {
oneof window_function {
AggregateFunction aggr_function = 1;
BuiltInWindowFunction built_in_function = 2;
string udaf = 3;
string udwf = 9;
}
LogicalExprNode expr = 4;
repeated LogicalExprNode partition_by = 5;
repeated SortExprNode order_by = 6;
repeated LogicalExprNode order_by = 6;
// repeated LogicalExprNode filter = 7;
WindowFrame window_frame = 8;
optional bytes fun_definition = 10;
}

message BetweenNode {
Expand Down Expand Up @@ -650,11 +674,9 @@ message PlanType {
datafusion_common.EmptyMessage FinalLogicalPlan = 3;
datafusion_common.EmptyMessage InitialPhysicalPlan = 4;
datafusion_common.EmptyMessage InitialPhysicalPlanWithStats = 9;
datafusion_common.EmptyMessage InitialPhysicalPlanWithSchema = 11;
OptimizedPhysicalPlanType OptimizedPhysicalPlan = 5;
datafusion_common.EmptyMessage FinalPhysicalPlan = 6;
datafusion_common.EmptyMessage FinalPhysicalPlanWithStats = 10;
datafusion_common.EmptyMessage FinalPhysicalPlanWithSchema = 12;
}
}

Expand Down Expand Up @@ -715,11 +737,10 @@ message PhysicalPlanNode {
AnalyzeExecNode analyze = 23;
JsonSinkExecNode json_sink = 24;
SymmetricHashJoinExecNode symmetric_hash_join = 25;
InterleaveExecNode interleave = 26;
InterleaveExecNode interleave = 26;
PlaceholderRowExecNode placeholder_row = 27;
CsvSinkExecNode csv_sink = 28;
ParquetSinkExecNode parquet_sink = 29;
UnnestExecNode unnest = 30;
}
}

Expand All @@ -731,21 +752,13 @@ message PartitionColumn {

message FileSinkConfig {
reserved 6; // writer_mode
reserved 8; // was `overwrite` which has been superseded by `insert_op`

string object_store_url = 1;
repeated PartitionedFile file_groups = 2;
repeated string table_paths = 3;
datafusion_common.Schema output_schema = 4;
repeated PartitionColumn table_partition_cols = 5;
bool keep_partition_by_columns = 9;
InsertOp insert_op = 10;
}

enum InsertOp {
Append = 0;
Overwrite = 1;
Replace = 2;
bool overwrite = 8;
}

message JsonSink {
Expand Down Expand Up @@ -784,19 +797,6 @@ message ParquetSinkExecNode {
PhysicalSortExprNodeCollection sort_order = 4;
}

message UnnestExecNode {
PhysicalPlanNode input = 1;
datafusion_common.Schema schema = 2;
repeated ListUnnest list_type_columns = 3;
repeated uint64 struct_type_columns = 4;
UnnestOptions options = 5;
}

message ListUnnest {
uint32 index_in_input_schema = 1;
uint32 depth = 2;
}

message PhysicalExtensionNode {
bytes node = 1;
repeated PhysicalPlanNode inputs = 2;
Expand Down Expand Up @@ -838,8 +838,6 @@ message PhysicalExprNode {
// was PhysicalDateTimeIntervalExprNode date_time_interval_expr = 17;

PhysicalLikeExprNode like_expr = 18;

PhysicalExtensionExprNode extension = 19;
}
}

Expand All @@ -852,17 +850,17 @@ message PhysicalScalarUdfNode {

message PhysicalAggregateExprNode {
oneof AggregateFunction {
AggregateFunction aggr_function = 1;
string user_defined_aggr_function = 4;
}
repeated PhysicalExprNode expr = 2;
repeated PhysicalSortExprNode ordering_req = 5;
bool distinct = 3;
bool ignore_nulls = 6;
optional bytes fun_definition = 7;
}

message PhysicalWindowExprNode {
oneof window_function {
AggregateFunction aggr_function = 1;
BuiltInWindowFunction built_in_function = 2;
string user_defined_aggr_function = 3;
}
Expand All @@ -871,7 +869,6 @@ message PhysicalWindowExprNode {
repeated PhysicalSortExprNode order_by = 6;
WindowFrame window_frame = 7;
string name = 8;
optional bytes fun_definition = 9;
}

message PhysicalIsNull {
Expand Down Expand Up @@ -947,16 +944,10 @@ message PhysicalNegativeNode {
PhysicalExprNode expr = 1;
}

message PhysicalExtensionExprNode {
bytes expr = 1;
repeated PhysicalExprNode inputs = 2;
}

message FilterExecNode {
PhysicalPlanNode input = 1;
PhysicalExprNode expr = 2;
uint32 default_filter_selectivity = 3;
repeated uint32 projection = 9;
}

message FileGroup {
Expand Down Expand Up @@ -1003,10 +994,6 @@ message CsvScanExecNode {
oneof optional_escape {
string escape = 5;
}
oneof optional_comment {
string comment = 6;
}
bool newlines_in_values = 7;
}

message AvroScanExecNode {
Expand Down Expand Up @@ -1187,7 +1174,6 @@ message NestedLoopJoinExecNode {
message CoalesceBatchesExecNode {
PhysicalPlanNode input = 1;
uint32 target_batch_size = 2;
optional uint32 fetch = 3;
}

message CoalescePartitionsExecNode {
Expand Down
Loading

0 comments on commit 167bbf9

Please sign in to comment.