diff --git a/Cargo.lock b/Cargo.lock index b40311b7f494..4af34e8623bb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1227,6 +1227,7 @@ dependencies = [ "common-meta", "moka", "snafu 0.8.2", + "substrait 0.8.1", ] [[package]] @@ -1260,6 +1261,8 @@ dependencies = [ "arrow-schema", "async-stream", "async-trait", + "bytes", + "cache", "catalog", "chrono", "common-catalog", @@ -2026,6 +2029,7 @@ version = "0.8.1" dependencies = [ "api", "async-trait", + "bytes", "common-base", "common-error", "common-macro", @@ -3197,7 +3201,6 @@ dependencies = [ "session", "snafu 0.8.2", "store-api", - "substrait 0.8.1", "table", "tokio", "toml 0.8.12", @@ -4185,7 +4188,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "greptime-proto" version = "0.1.0" -source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=902f75fdd170c572e90b1f640161d90995f20218#902f75fdd170c572e90b1f640161d90995f20218" +source = "git+https://github.com/killme2008/greptime-proto.git?rev=a15a54a714fe117d7e9f7635e149c4eecac773fa#a15a54a714fe117d7e9f7635e149c4eecac773fa" dependencies = [ "prost 0.12.4", "serde", @@ -7586,23 +7589,17 @@ name = "promql" version = "0.8.1" dependencies = [ "ahash 0.8.11", - "async-recursion", "async-trait", "bytemuck", - "catalog", - "common-catalog", "common-error", "common-macro", - "common-query", "common-recordbatch", "common-telemetry", "datafusion 37.0.0", "datafusion-expr 37.0.0", - "datafusion-functions 37.0.0", "datatypes", "futures", "greptime-proto", - "itertools 0.10.5", "lazy_static", "prometheus", "promql-parser", @@ -7610,7 +7607,6 @@ dependencies = [ "query", "session", "snafu 0.8.2", - "table", "tokio", ] @@ -7918,6 +7914,7 @@ dependencies = [ "async-recursion", "async-stream", "async-trait", + "bytes", "catalog", "chrono", "common-base", @@ -7930,11 +7927,13 @@ dependencies = [ "common-plugins", "common-query", "common-recordbatch", + "common-runtime", "common-telemetry", "common-time", "datafusion 37.0.0", "datafusion-common 37.0.0", "datafusion-expr 37.0.0", + "datafusion-functions 37.0.0", "datafusion-optimizer 37.0.0", "datafusion-physical-expr 37.0.0", "datafusion-sql 37.0.0", @@ -7944,6 +7943,7 @@ dependencies = [ "futures-util", "greptime-proto", "humantime", + "itertools 0.10.5", "lazy_static", "meter-core", "meter-macros", @@ -7955,6 +7955,7 @@ dependencies = [ "prometheus", "promql", "promql-parser", + "prost 0.12.4", "rand", "regex", "session", @@ -10370,9 +10371,7 @@ version = "0.8.1" dependencies = [ "async-trait", "bytes", - "catalog", "common-error", - "common-function", "common-macro", "common-telemetry", "datafusion 37.0.0", @@ -10382,7 +10381,6 @@ dependencies = [ "datatypes", "promql", "prost 0.12.4", - "session", "snafu 0.8.2", "substrait 0.17.1", "tokio", @@ -10559,6 +10557,7 @@ dependencies = [ name = "table" version = "0.8.1" dependencies = [ + "api", "async-trait", "chrono", "common-base", diff --git a/Cargo.toml b/Cargo.toml index 4b825509405b..7d03eb0f4387 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -120,7 +120,7 @@ etcd-client = { git = "https://github.com/MichaelScofield/etcd-client.git", rev fst = "0.4.7" futures = "0.3" futures-util = "0.3" -greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "902f75fdd170c572e90b1f640161d90995f20218" } +greptime-proto = { git = "https://github.com/killme2008/greptime-proto.git", rev = "a15a54a714fe117d7e9f7635e149c4eecac773fa" } humantime = "2.1" humantime-serde = "1.1" itertools = "0.10" diff --git a/src/cache/Cargo.toml b/src/cache/Cargo.toml index 07870fa904a5..9a2888e5fc13 100644 --- a/src/cache/Cargo.toml +++ b/src/cache/Cargo.toml @@ -11,3 +11,4 @@ common-macro.workspace = true common-meta.workspace = true moka.workspace = true snafu.workspace = true +substrait.workspace = true diff --git a/src/cache/src/lib.rs b/src/cache/src/lib.rs index 85dc9c05f1f3..4adf0ff1ff33 100644 --- a/src/cache/src/lib.rs +++ b/src/cache/src/lib.rs @@ -20,7 +20,8 @@ use std::time::Duration; use catalog::kvbackend::new_table_cache; use common_meta::cache::{ new_table_flownode_set_cache, new_table_info_cache, new_table_name_cache, - new_table_route_cache, CacheRegistry, CacheRegistryBuilder, LayeredCacheRegistryBuilder, + new_table_route_cache, new_view_info_cache, CacheRegistry, CacheRegistryBuilder, + LayeredCacheRegistryBuilder, }; use common_meta::kv_backend::KvBackendRef; use moka::future::CacheBuilder; @@ -33,6 +34,7 @@ const DEFAULT_CACHE_TTL: Duration = Duration::from_secs(10 * 60); const DEFAULT_CACHE_TTI: Duration = Duration::from_secs(5 * 60); pub const TABLE_INFO_CACHE_NAME: &str = "table_info_cache"; +pub const VIEW_INFO_CACHE_NAME: &str = "view_info_cache"; pub const TABLE_NAME_CACHE_NAME: &str = "table_name_cache"; pub const TABLE_CACHE_NAME: &str = "table_cache"; pub const TABLE_FLOWNODE_SET_CACHE_NAME: &str = "table_flownode_set_cache"; @@ -82,11 +84,22 @@ pub fn build_fundamental_cache_registry(kv_backend: KvBackendRef) -> CacheRegist cache, kv_backend.clone(), )); + // Builds the view info cache + let cache = CacheBuilder::new(DEFAULT_CACHE_MAX_CAPACITY) + .time_to_live(DEFAULT_CACHE_TTL) + .time_to_idle(DEFAULT_CACHE_TTI) + .build(); + let view_info_cache = Arc::new(new_view_info_cache( + VIEW_INFO_CACHE_NAME.to_string(), + cache, + kv_backend.clone(), + )); CacheRegistryBuilder::default() .add_cache(table_info_cache) .add_cache(table_name_cache) .add_cache(table_route_cache) + .add_cache(view_info_cache) .add_cache(table_flownode_set_cache) .build() } diff --git a/src/catalog/Cargo.toml b/src/catalog/Cargo.toml index ddda28ba8864..185614e98152 100644 --- a/src/catalog/Cargo.toml +++ b/src/catalog/Cargo.toml @@ -16,6 +16,7 @@ arrow.workspace = true arrow-schema.workspace = true async-stream.workspace = true async-trait = "0.1" +bytes.workspace = true common-catalog.workspace = true common-config.workspace = true common-error.workspace = true @@ -48,8 +49,11 @@ table.workspace = true tokio.workspace = true [dev-dependencies] +cache.workspace = true catalog = { workspace = true, features = ["testing"] } chrono.workspace = true +common-meta = { workspace = true, features = ["testing"] } +common-query = { workspace = true, features = ["testing"] } common-test-util.workspace = true log-store.workspace = true object-store.workspace = true diff --git a/src/catalog/src/error.rs b/src/catalog/src/error.rs index 5834eaed359d..eaad22f4c9f6 100644 --- a/src/catalog/src/error.rs +++ b/src/catalog/src/error.rs @@ -19,10 +19,7 @@ use common_error::ext::{BoxedError, ErrorExt}; use common_error::status_code::StatusCode; use common_macro::stack_trace_debug; use datafusion::error::DataFusionError; -use datatypes::prelude::ConcreteDataType; use snafu::{Location, Snafu}; -use table::metadata::TableId; -use tokio::task::JoinError; #[derive(Snafu)] #[snafu(visibility(pub))] @@ -65,19 +62,6 @@ pub enum Error { location: Location, source: BoxedError, }, - #[snafu(display("Failed to open system catalog table"))] - OpenSystemCatalog { - #[snafu(implicit)] - location: Location, - source: table::error::Error, - }, - - #[snafu(display("Failed to create system catalog table"))] - CreateSystemCatalog { - #[snafu(implicit)] - location: Location, - source: table::error::Error, - }, #[snafu(display("Failed to create table, table info: {}", table_info))] CreateTable { @@ -94,52 +78,6 @@ pub enum Error { location: Location, }, - #[snafu(display( - "System catalog table type mismatch, expected: binary, found: {:?}", - data_type, - ))] - SystemCatalogTypeMismatch { - data_type: ConcreteDataType, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Invalid system catalog entry type: {:?}", entry_type))] - InvalidEntryType { - entry_type: Option, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Invalid system catalog key: {:?}", key))] - InvalidKey { - key: Option, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Catalog value is not present"))] - EmptyValue { - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Failed to deserialize value"))] - ValueDeserialize { - #[snafu(source)] - error: serde_json::error::Error, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Table engine not found: {}", engine_name))] - TableEngineNotFound { - engine_name: String, - #[snafu(implicit)] - location: Location, - source: table::error::Error, - }, - #[snafu(display("Cannot find catalog by name: {}", catalog_name))] CatalogNotFound { catalog_name: String, @@ -169,44 +107,9 @@ pub enum Error { location: Location, }, - #[snafu(display("Schema {} already exists", schema))] - SchemaExists { - schema: String, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Operation {} not implemented yet", operation))] - Unimplemented { - operation: String, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Operation {} not supported", op))] - NotSupported { - op: String, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Failed to open table {table_id}"))] - OpenTable { - table_id: TableId, - #[snafu(implicit)] - location: Location, - source: table::error::Error, - }, - - #[snafu(display("Failed to open table in parallel"))] - ParallelOpenTable { - #[snafu(source)] - error: JoinError, - }, - - #[snafu(display("Table not found while opening table, table info: {}", table_info))] - TableNotFound { - table_info: String, + #[snafu(display("View info not found: {}", name))] + ViewInfoNotFound { + name: String, #[snafu(implicit)] location: Location, }, @@ -217,13 +120,6 @@ pub enum Error { #[snafu(display("Failed to find region routes"))] FindRegionRoutes { source: partition::error::Error }, - #[snafu(display("Failed to read system catalog table records"))] - ReadSystemCatalog { - #[snafu(implicit)] - location: Location, - source: common_recordbatch::error::Error, - }, - #[snafu(display("Failed to create recordbatch"))] CreateRecordBatch { #[snafu(implicit)] @@ -231,20 +127,6 @@ pub enum Error { source: common_recordbatch::error::Error, }, - #[snafu(display("Failed to insert table creation record to system catalog"))] - InsertCatalogRecord { - #[snafu(implicit)] - location: Location, - source: table::error::Error, - }, - - #[snafu(display("Failed to scan system catalog table"))] - SystemCatalogTableScan { - #[snafu(implicit)] - location: Location, - source: table::error::Error, - }, - #[snafu(display("Internal error"))] Internal { #[snafu(implicit)] @@ -258,20 +140,14 @@ pub enum Error { location: Location, }, - #[snafu(display("Failed to execute system catalog table scan"))] - SystemCatalogTableScanExec { + #[snafu(display("Failed to decode logical plan for view: {}", name))] + DecodePlan { + name: String, #[snafu(implicit)] location: Location, source: common_query::error::Error, }, - #[snafu(display("Cannot parse catalog value"))] - InvalidCatalogValue { - #[snafu(implicit)] - location: Location, - source: common_catalog::error::Error, - }, - #[snafu(display("Failed to perform metasrv operation"))] Metasrv { #[snafu(implicit)] @@ -297,30 +173,36 @@ pub enum Error { location: Location, }, - #[snafu(display("Table schema mismatch"))] - TableSchemaMismatch { + #[snafu(display("Table metadata manager error"))] + TableMetadataManager { + source: common_meta::error::Error, #[snafu(implicit)] location: Location, - source: table::error::Error, }, - #[snafu(display("A generic error has occurred, msg: {}", msg))] - Generic { - msg: String, + #[snafu(display("Failed to get table cache"))] + GetTableCache { + source: common_meta::error::Error, #[snafu(implicit)] location: Location, }, - #[snafu(display("Table metadata manager error"))] - TableMetadataManager { + #[snafu(display("Failed to get view info from cache"))] + GetViewCache { source: common_meta::error::Error, #[snafu(implicit)] location: Location, }, - #[snafu(display("Failed to get table cache"))] - GetTableCache { - source: common_meta::error::Error, + #[snafu(display("Cache not found: {name}"))] + CacheNotFound { + name: String, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Failed to cast the catalog manager"))] + CastManager { #[snafu(implicit)] location: Location, }, @@ -331,61 +213,43 @@ pub type Result = std::result::Result; impl ErrorExt for Error { fn status_code(&self) -> StatusCode { match self { - Error::InvalidKey { .. } - | Error::SchemaNotFound { .. } + Error::SchemaNotFound { .. } | Error::CatalogNotFound { .. } | Error::FindPartitions { .. } | Error::FindRegionRoutes { .. } - | Error::InvalidEntryType { .. } - | Error::ParallelOpenTable { .. } => StatusCode::Unexpected, + | Error::CacheNotFound { .. } + | Error::CastManager { .. } => StatusCode::Unexpected, - Error::TableNotFound { .. } => StatusCode::TableNotFound, + Error::ViewInfoNotFound { .. } => StatusCode::TableNotFound, - Error::SystemCatalog { .. } - | Error::EmptyValue { .. } - | Error::ValueDeserialize { .. } => StatusCode::StorageUnavailable, + Error::SystemCatalog { .. } => StatusCode::StorageUnavailable, - Error::Generic { .. } - | Error::SystemCatalogTypeMismatch { .. } - | Error::UpgradeWeakCatalogManagerRef { .. } => StatusCode::Internal, - - Error::ReadSystemCatalog { source, .. } | Error::CreateRecordBatch { source, .. } => { - source.status_code() - } - Error::InvalidCatalogValue { source, .. } => source.status_code(), + Error::UpgradeWeakCatalogManagerRef { .. } => StatusCode::Internal, + Error::CreateRecordBatch { source, .. } => source.status_code(), Error::TableExists { .. } => StatusCode::TableAlreadyExists, Error::TableNotExist { .. } => StatusCode::TableNotFound, - Error::SchemaExists { .. } | Error::TableEngineNotFound { .. } => { - StatusCode::InvalidArguments - } - Error::ListCatalogs { source, .. } | Error::ListNodes { source, .. } | Error::ListSchemas { source, .. } | Error::ListTables { source, .. } => source.status_code(), - Error::OpenSystemCatalog { source, .. } - | Error::CreateSystemCatalog { source, .. } - | Error::InsertCatalogRecord { source, .. } - | Error::OpenTable { source, .. } - | Error::CreateTable { source, .. } - | Error::TableSchemaMismatch { source, .. } => source.status_code(), + Error::CreateTable { source, .. } => source.status_code(), Error::Metasrv { source, .. } => source.status_code(), - Error::SystemCatalogTableScan { source, .. } => source.status_code(), - Error::SystemCatalogTableScanExec { source, .. } => source.status_code(), + Error::DecodePlan { source, .. } => source.status_code(), Error::InvalidTableInfoInCatalog { source, .. } => source.status_code(), Error::CompileScriptInternal { source, .. } | Error::Internal { source, .. } => { source.status_code() } - Error::Unimplemented { .. } | Error::NotSupported { .. } => StatusCode::Unsupported, Error::QueryAccessDenied { .. } => StatusCode::AccessDenied, Error::Datafusion { .. } => StatusCode::EngineExecuteQuery, Error::TableMetadataManager { source, .. } => source.status_code(), - Error::GetTableCache { .. } => StatusCode::Internal, + Error::GetViewCache { source, .. } | Error::GetTableCache { source, .. } => { + source.status_code() + } } } @@ -417,11 +281,6 @@ mod tests { .status_code() ); - assert_eq!( - StatusCode::Unexpected, - InvalidKeySnafu { key: None }.build().status_code() - ); - assert_eq!( StatusCode::StorageUnavailable, Error::SystemCatalog { @@ -430,19 +289,6 @@ mod tests { } .status_code() ); - - assert_eq!( - StatusCode::Internal, - Error::SystemCatalogTypeMismatch { - data_type: ConcreteDataType::binary_datatype(), - location: Location::generate(), - } - .status_code() - ); - assert_eq!( - StatusCode::StorageUnavailable, - EmptyValueSnafu {}.build().status_code() - ); } #[test] diff --git a/src/catalog/src/kvbackend/manager.rs b/src/catalog/src/kvbackend/manager.rs index e7a4ef4be39c..0bf51643b1b1 100644 --- a/src/catalog/src/kvbackend/manager.rs +++ b/src/catalog/src/kvbackend/manager.rs @@ -22,14 +22,13 @@ use common_catalog::consts::{ }; use common_config::Mode; use common_error::ext::BoxedError; -use common_meta::cache::TableRouteCacheRef; +use common_meta::cache::{LayeredCacheRegistryRef, ViewInfoCacheRef}; use common_meta::key::catalog_name::CatalogNameKey; use common_meta::key::schema_name::SchemaNameKey; use common_meta::key::table_info::TableInfoValue; use common_meta::key::table_name::TableNameKey; use common_meta::key::{TableMetadataManager, TableMetadataManagerRef}; use common_meta::kv_backend::KvBackendRef; -use common_meta::table_name::TableName; use futures_util::stream::BoxStream; use futures_util::{StreamExt, TryStreamExt}; use meta_client::client::MetaClient; @@ -38,11 +37,12 @@ use partition::manager::{PartitionRuleManager, PartitionRuleManagerRef}; use snafu::prelude::*; use table::dist_table::DistTable; use table::table::numbers::{NumbersTable, NUMBERS_TABLE_NAME}; +use table::table_name::TableName; use table::TableRef; use crate::error::{ - GetTableCacheSnafu, InvalidTableInfoInCatalogSnafu, ListCatalogsSnafu, ListSchemasSnafu, - ListTablesSnafu, Result, TableMetadataManagerSnafu, + CacheNotFoundSnafu, GetTableCacheSnafu, InvalidTableInfoInCatalogSnafu, ListCatalogsSnafu, + ListSchemasSnafu, ListTablesSnafu, Result, TableMetadataManagerSnafu, }; use crate::information_schema::InformationSchemaProvider; use crate::kvbackend::TableCacheRef; @@ -61,25 +61,26 @@ pub struct KvBackendCatalogManager { table_metadata_manager: TableMetadataManagerRef, /// A sub-CatalogManager that handles system tables system_catalog: SystemCatalog, - table_cache: TableCacheRef, + cache_registry: LayeredCacheRegistryRef, } const CATALOG_CACHE_MAX_CAPACITY: u64 = 128; impl KvBackendCatalogManager { - pub async fn new( + pub fn new( mode: Mode, meta_client: Option>, backend: KvBackendRef, - table_cache: TableCacheRef, - table_route_cache: TableRouteCacheRef, + cache_registry: LayeredCacheRegistryRef, ) -> Arc { Arc::new_cyclic(|me| Self { mode, meta_client, partition_manager: Arc::new(PartitionRuleManager::new( backend.clone(), - table_route_cache, + cache_registry + .get() + .expect("Failed to get table_route_cache"), )), table_metadata_manager: Arc::new(TableMetadataManager::new(backend)), system_catalog: SystemCatalog { @@ -90,7 +91,7 @@ impl KvBackendCatalogManager { me.clone(), )), }, - table_cache, + cache_registry, }) } @@ -99,6 +100,12 @@ impl KvBackendCatalogManager { &self.mode } + pub fn view_info_cache(&self) -> Result { + self.cache_registry.get().context(CacheNotFoundSnafu { + name: "view_info_cache", + }) + } + /// Returns the `[MetaClient]`. pub fn meta_client(&self) -> Option> { self.meta_client.clone() @@ -215,7 +222,11 @@ impl CatalogManager for KvBackendCatalogManager { return Ok(Some(table)); } - self.table_cache + let table_cache: TableCacheRef = self.cache_registry.get().context(CacheNotFoundSnafu { + name: "table_cache", + })?; + + table_cache .get_by_ref(&TableName { catalog_name: catalog_name.to_string(), schema_name: schema_name.to_string(), diff --git a/src/catalog/src/kvbackend/table_cache.rs b/src/catalog/src/kvbackend/table_cache.rs index 4ab73bf9d256..93980d1a0612 100644 --- a/src/catalog/src/kvbackend/table_cache.rs +++ b/src/catalog/src/kvbackend/table_cache.rs @@ -17,11 +17,11 @@ use std::sync::Arc; use common_meta::cache::{CacheContainer, Initializer, TableInfoCacheRef, TableNameCacheRef}; use common_meta::error::{Result as MetaResult, ValueNotExistSnafu}; use common_meta::instruction::CacheIdent; -use common_meta::table_name::TableName; use futures::future::BoxFuture; use moka::future::Cache; use snafu::OptionExt; use table::dist_table::DistTable; +use table::table_name::TableName; use table::TableRef; pub type TableCacheRef = Arc; diff --git a/src/catalog/src/table_source.rs b/src/catalog/src/table_source.rs index 58813a460e33..7399dca550da 100644 --- a/src/catalog/src/table_source.rs +++ b/src/catalog/src/table_source.rs @@ -15,15 +15,25 @@ use std::collections::HashMap; use std::sync::Arc; +use bytes::Bytes; use common_catalog::format_full_table_name; +use common_query::logical_plan::SubstraitPlanDecoderRef; use datafusion::common::{ResolvedTableReference, TableReference}; -use datafusion::datasource::provider_as_source; +use datafusion::datasource::view::ViewTable; +use datafusion::datasource::{provider_as_source, TableProvider}; use datafusion::logical_expr::TableSource; use session::context::QueryContext; -use snafu::{ensure, OptionExt}; +use snafu::{ensure, OptionExt, ResultExt}; +use table::metadata::TableType; use table::table::adapter::DfTableProviderAdapter; +mod dummy_catalog; +use dummy_catalog::DummyCatalogList; -use crate::error::{QueryAccessDeniedSnafu, Result, TableNotExistSnafu}; +use crate::error::{ + CastManagerSnafu, DatafusionSnafu, DecodePlanSnafu, GetViewCacheSnafu, QueryAccessDeniedSnafu, + Result, TableNotExistSnafu, ViewInfoNotFoundSnafu, +}; +use crate::kvbackend::KvBackendCatalogManager; use crate::CatalogManagerRef; pub struct DfTableSourceProvider { @@ -32,6 +42,7 @@ pub struct DfTableSourceProvider { disallow_cross_catalog_query: bool, default_catalog: String, default_schema: String, + plan_decoder: SubstraitPlanDecoderRef, } impl DfTableSourceProvider { @@ -39,6 +50,7 @@ impl DfTableSourceProvider { catalog_manager: CatalogManagerRef, disallow_cross_catalog_query: bool, query_ctx: &QueryContext, + plan_decoder: SubstraitPlanDecoderRef, ) -> Self { Self { catalog_manager, @@ -46,6 +58,7 @@ impl DfTableSourceProvider { resolved_tables: HashMap::new(), default_catalog: query_ctx.current_catalog().to_owned(), default_schema: query_ctx.current_schema().to_owned(), + plan_decoder, } } @@ -94,8 +107,39 @@ impl DfTableSourceProvider { table: format_full_table_name(catalog_name, schema_name, table_name), })?; - let provider = DfTableProviderAdapter::new(table); - let source = provider_as_source(Arc::new(provider)); + let provider: Arc = if table.table_info().table_type == TableType::View { + let catalog_manager = self + .catalog_manager + .as_any() + .downcast_ref::() + .context(CastManagerSnafu)?; + + let view_info = catalog_manager + .view_info_cache()? + .get(table.table_info().ident.table_id) + .await + .context(GetViewCacheSnafu)? + .context(ViewInfoNotFoundSnafu { + name: &table.table_info().name, + })?; + + // Build the catalog list provider for deserialization. + let catalog_list = Arc::new(DummyCatalogList::new(self.catalog_manager.clone())); + let logical_plan = self + .plan_decoder + .decode(Bytes::from(view_info.view_info.clone()), catalog_list, true) + .await + .context(DecodePlanSnafu { + name: &table.table_info().name, + })?; + + Arc::new(ViewTable::try_new(logical_plan, None).context(DatafusionSnafu)?) + } else { + Arc::new(DfTableProviderAdapter::new(table)) + }; + + let source = provider_as_source(provider); + let _ = self.resolved_tables.insert(resolved_name, source.clone()); Ok(source) } @@ -103,6 +147,7 @@ impl DfTableSourceProvider { #[cfg(test)] mod tests { + use common_query::test_util::DummyDecoder; use session::context::QueryContext; use super::*; @@ -112,8 +157,12 @@ mod tests { fn test_validate_table_ref() { let query_ctx = &QueryContext::with("greptime", "public"); - let table_provider = - DfTableSourceProvider::new(MemoryCatalogManager::with_default_setup(), true, query_ctx); + let table_provider = DfTableSourceProvider::new( + MemoryCatalogManager::with_default_setup(), + true, + query_ctx, + DummyDecoder::arc(), + ); let table_ref = TableReference::bare("table_name"); let result = table_provider.resolve_table_ref(table_ref); @@ -148,4 +197,99 @@ mod tests { let table_ref = TableReference::full("greptime", "greptime_private", "columns"); assert!(table_provider.resolve_table_ref(table_ref).is_ok()); } + + use std::collections::HashSet; + + use arrow::datatypes::{DataType, Field, Schema, SchemaRef}; + use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry}; + use common_config::Mode; + use common_meta::cache::{CacheRegistryBuilder, LayeredCacheRegistryBuilder}; + use common_meta::key::TableMetadataManager; + use common_meta::kv_backend::memory::MemoryKvBackend; + use common_query::error::Result as QueryResult; + use common_query::logical_plan::SubstraitPlanDecoder; + use datafusion::catalog::CatalogProviderList; + use datafusion::logical_expr::builder::LogicalTableSource; + use datafusion::logical_expr::{col, lit, LogicalPlan, LogicalPlanBuilder}; + + struct MockDecoder; + impl MockDecoder { + pub fn arc() -> Arc { + Arc::new(MockDecoder) + } + } + + #[async_trait::async_trait] + impl SubstraitPlanDecoder for MockDecoder { + async fn decode( + &self, + _message: bytes::Bytes, + _catalog_list: Arc, + _optimize: bool, + ) -> QueryResult { + Ok(mock_plan()) + } + } + + fn mock_plan() -> LogicalPlan { + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, true), + Field::new("name", DataType::Utf8, true), + ]); + let table_source = LogicalTableSource::new(SchemaRef::new(schema)); + + let projection = None; + + let builder = + LogicalPlanBuilder::scan("person", Arc::new(table_source), projection).unwrap(); + + builder + .filter(col("id").gt(lit(500))) + .unwrap() + .build() + .unwrap() + } + + #[tokio::test] + async fn test_resolve_view() { + let query_ctx = &QueryContext::with("greptime", "public"); + let backend = Arc::new(MemoryKvBackend::default()); + let layered_cache_builder = LayeredCacheRegistryBuilder::default() + .add_cache_registry(CacheRegistryBuilder::default().build()); + let fundamental_cache_registry = build_fundamental_cache_registry(backend.clone()); + let layered_cache_registry = Arc::new( + with_default_composite_cache_registry( + layered_cache_builder.add_cache_registry(fundamental_cache_registry), + ) + .unwrap() + .build(), + ); + + let catalog_manager = KvBackendCatalogManager::new( + Mode::Standalone, + None, + backend.clone(), + layered_cache_registry, + ); + let table_metadata_manager = TableMetadataManager::new(backend); + let mut view_info = common_meta::key::test_utils::new_test_table_info(1024, vec![]); + view_info.table_type = TableType::View; + let logical_plan = vec![1, 2, 3]; + // Create view metadata + table_metadata_manager + .create_view_metadata(view_info.clone().into(), logical_plan, HashSet::new()) + .await + .unwrap(); + + let mut table_provider = + DfTableSourceProvider::new(catalog_manager, true, query_ctx, MockDecoder::arc()); + + // View not found + let table_ref = TableReference::bare("not_exists_view"); + assert!(table_provider.resolve_table(table_ref).await.is_err()); + + let table_ref = TableReference::bare(view_info.name); + let source = table_provider.resolve_table(table_ref).await.unwrap(); + assert_eq!(*source.get_logical_plan().unwrap(), mock_plan()); + } } diff --git a/src/catalog/src/table_source/dummy_catalog.rs b/src/catalog/src/table_source/dummy_catalog.rs new file mode 100644 index 000000000000..602a5c9cbe0f --- /dev/null +++ b/src/catalog/src/table_source/dummy_catalog.rs @@ -0,0 +1,129 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! Dummy catalog for region server. + +use std::any::Any; +use std::sync::Arc; + +use async_trait::async_trait; +use common_catalog::format_full_table_name; +use datafusion::catalog::schema::SchemaProvider; +use datafusion::catalog::{CatalogProvider, CatalogProviderList}; +use datafusion::datasource::TableProvider; +use snafu::OptionExt; +use table::table::adapter::DfTableProviderAdapter; + +use crate::error::TableNotExistSnafu; +use crate::CatalogManagerRef; + +/// Delegate the resolving requests to the `[CatalogManager]` unconditionally. +#[derive(Clone)] +pub struct DummyCatalogList { + catalog_manager: CatalogManagerRef, +} + +impl DummyCatalogList { + /// Creates a new catalog list with the given catalog manager. + pub fn new(catalog_manager: CatalogManagerRef) -> Self { + Self { catalog_manager } + } +} + +impl CatalogProviderList for DummyCatalogList { + fn as_any(&self) -> &dyn Any { + self + } + + fn register_catalog( + &self, + _name: String, + _catalog: Arc, + ) -> Option> { + None + } + + fn catalog_names(&self) -> Vec { + vec![] + } + + fn catalog(&self, catalog_name: &str) -> Option> { + Some(Arc::new(DummyCatalogProvider { + catalog_name: catalog_name.to_string(), + catalog_manager: self.catalog_manager.clone(), + })) + } +} + +/// A dummy catalog provider for [DummyCatalogList]. +#[derive(Clone)] +struct DummyCatalogProvider { + catalog_name: String, + catalog_manager: CatalogManagerRef, +} + +impl CatalogProvider for DummyCatalogProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema_names(&self) -> Vec { + vec![] + } + + fn schema(&self, schema_name: &str) -> Option> { + Some(Arc::new(DummySchemaProvider { + catalog_name: self.catalog_name.clone(), + schema_name: schema_name.to_string(), + catalog_manager: self.catalog_manager.clone(), + })) + } +} + +/// A dummy schema provider for [DummyCatalogList]. +#[derive(Clone)] +struct DummySchemaProvider { + catalog_name: String, + schema_name: String, + catalog_manager: CatalogManagerRef, +} + +#[async_trait] +impl SchemaProvider for DummySchemaProvider { + fn as_any(&self) -> &dyn Any { + self + } + + fn table_names(&self) -> Vec { + vec![] + } + + async fn table(&self, name: &str) -> datafusion::error::Result>> { + let table = self + .catalog_manager + .table(&self.catalog_name, &self.schema_name, name) + .await? + .with_context(|| TableNotExistSnafu { + table: format_full_table_name(&self.catalog_name, &self.schema_name, name), + })?; + + let table_provider: Arc = Arc::new(DfTableProviderAdapter::new(table)); + + Ok(Some(table_provider)) + } + + fn table_exist(&self, _name: &str) -> bool { + true + } +} diff --git a/src/cmd/src/cli/bench.rs b/src/cmd/src/cli/bench.rs index 7f0acfe378bf..bf5a6825f014 100644 --- a/src/cmd/src/cli/bench.rs +++ b/src/cmd/src/cli/bench.rs @@ -23,13 +23,13 @@ use common_meta::key::{TableMetadataManager, TableMetadataManagerRef}; use common_meta::kv_backend::etcd::EtcdStore; use common_meta::peer::Peer; use common_meta::rpc::router::{Region, RegionRoute}; -use common_meta::table_name::TableName; use common_telemetry::info; use datatypes::data_type::ConcreteDataType; use datatypes::schema::{ColumnSchema, RawSchema}; use rand::Rng; use store_api::storage::RegionNumber; use table::metadata::{RawTableInfo, RawTableMeta, TableId, TableIdent, TableType}; +use table::table_name::TableName; use tracing_appender::non_blocking::WorkerGuard; use self::metadata::TableMetadataBencher; diff --git a/src/cmd/src/cli/bench/metadata.rs b/src/cmd/src/cli/bench/metadata.rs index a1009cfe8d6c..9229b0342e88 100644 --- a/src/cmd/src/cli/bench/metadata.rs +++ b/src/cmd/src/cli/bench/metadata.rs @@ -16,7 +16,7 @@ use std::time::Instant; use common_meta::key::table_route::TableRouteValue; use common_meta::key::TableMetadataManagerRef; -use common_meta::table_name::TableName; +use table::table_name::TableName; use crate::cli::bench::{ bench_self_recorded, create_region_routes, create_region_wal_options, create_table_info, diff --git a/src/cmd/src/cli/repl.rs b/src/cmd/src/cli/repl.rs index a9e2e21967f9..74184d523985 100644 --- a/src/cmd/src/cli/repl.rs +++ b/src/cmd/src/cli/repl.rs @@ -37,7 +37,7 @@ use query::datafusion::DatafusionQueryEngine; use query::logical_optimizer::LogicalOptimizer; use query::parser::QueryLanguageParser; use query::plan::LogicalPlan; -use query::query_engine::QueryEngineState; +use query::query_engine::{DefaultSerializer, QueryEngineState}; use query::QueryEngine; use rustyline::error::ReadlineError; use rustyline::Editor; @@ -185,7 +185,7 @@ impl Repl { .context(PlanStatementSnafu)?; let plan = DFLogicalSubstraitConvertor {} - .encode(&plan) + .encode(&plan, DefaultSerializer) .context(SubstraitEncodeLogicalPlanSnafu)?; self.database.logical_plan(plan.to_vec()).await @@ -277,24 +277,12 @@ async fn create_query_engine(meta_addr: &str) -> Result { .build(), ); - let table_cache = layered_cache_registry - .get() - .context(error::CacheRequiredSnafu { - name: TABLE_CACHE_NAME, - })?; - let table_route_cache = layered_cache_registry - .get() - .context(error::CacheRequiredSnafu { - name: TABLE_ROUTE_CACHE_NAME, - })?; let catalog_manager = KvBackendCatalogManager::new( Mode::Distributed, Some(meta_client.clone()), cached_meta_backend.clone(), - table_cache, - table_route_cache, - ) - .await; + layered_cache_registry, + ); let plugins: Plugins = Default::default(); let state = Arc::new(QueryEngineState::new( catalog_manager, diff --git a/src/cmd/src/error.rs b/src/cmd/src/error.rs index a2a880fa6c1d..fa5371545fcb 100644 --- a/src/cmd/src/error.rs +++ b/src/cmd/src/error.rs @@ -375,11 +375,11 @@ impl ErrorExt for Error { Error::SerdeJson { .. } | Error::FileIo { .. } => StatusCode::Unexpected, - Error::CacheRequired { .. } | Error::BuildCacheRegistry { .. } => StatusCode::Internal, - Error::Other { source, .. } => source.status_code(), Error::BuildRuntime { source, .. } => source.status_code(), + + Error::CacheRequired { .. } | Error::BuildCacheRegistry { .. } => StatusCode::Internal, } } diff --git a/src/cmd/src/frontend.rs b/src/cmd/src/frontend.rs index a2dc2c6fd9ae..a3e744e9c7ec 100644 --- a/src/cmd/src/frontend.rs +++ b/src/cmd/src/frontend.rs @@ -16,10 +16,7 @@ use std::sync::Arc; use std::time::Duration; use async_trait::async_trait; -use cache::{ - build_fundamental_cache_registry, with_default_composite_cache_registry, TABLE_CACHE_NAME, - TABLE_ROUTE_CACHE_NAME, -}; +use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry}; use catalog::kvbackend::{CachedMetaKvBackendBuilder, KvBackendCatalogManager, MetaKvBackend}; use clap::Parser; use client::client_manager::DatanodeClients; @@ -302,25 +299,12 @@ impl StartCommand { .build(), ); - let table_cache = layered_cache_registry - .get() - .context(error::CacheRequiredSnafu { - name: TABLE_CACHE_NAME, - })?; - let table_route_cache = - layered_cache_registry - .get() - .context(error::CacheRequiredSnafu { - name: TABLE_ROUTE_CACHE_NAME, - })?; let catalog_manager = KvBackendCatalogManager::new( opts.mode, Some(meta_client.clone()), cached_meta_backend.clone(), - table_cache, - table_route_cache, - ) - .await; + layered_cache_registry.clone(), + ); let executor = HandlerGroupExecutor::new(vec![ Arc::new(ParseMailboxMessageHandler), diff --git a/src/cmd/src/standalone.rs b/src/cmd/src/standalone.rs index 01fe22d64ad2..90958baf1048 100644 --- a/src/cmd/src/standalone.rs +++ b/src/cmd/src/standalone.rs @@ -16,10 +16,7 @@ use std::sync::Arc; use std::{fs, path}; use async_trait::async_trait; -use cache::{ - build_fundamental_cache_registry, with_default_composite_cache_registry, TABLE_CACHE_NAME, - TABLE_ROUTE_CACHE_NAME, -}; +use cache::{build_fundamental_cache_registry, with_default_composite_cache_registry}; use catalog::kvbackend::KvBackendCatalogManager; use clap::Parser; use common_catalog::consts::{MIN_USER_FLOW_ID, MIN_USER_TABLE_ID}; @@ -61,14 +58,14 @@ use servers::export_metrics::ExportMetricsOption; use servers::http::HttpOptions; use servers::tls::{TlsMode, TlsOption}; use servers::Mode; -use snafu::{OptionExt, ResultExt}; +use snafu::ResultExt; use tracing_appender::non_blocking::WorkerGuard; use crate::error::{ - BuildCacheRegistrySnafu, CacheRequiredSnafu, CreateDirSnafu, IllegalConfigSnafu, - InitDdlManagerSnafu, InitMetadataSnafu, InitTimezoneSnafu, LoadLayeredConfigSnafu, Result, - ShutdownDatanodeSnafu, ShutdownFrontendSnafu, StartDatanodeSnafu, StartFrontendSnafu, - StartProcedureManagerSnafu, StartWalOptionsAllocatorSnafu, StopProcedureManagerSnafu, + BuildCacheRegistrySnafu, CreateDirSnafu, IllegalConfigSnafu, InitDdlManagerSnafu, + InitMetadataSnafu, InitTimezoneSnafu, LoadLayeredConfigSnafu, Result, ShutdownDatanodeSnafu, + ShutdownFrontendSnafu, StartDatanodeSnafu, StartFrontendSnafu, StartProcedureManagerSnafu, + StartWalOptionsAllocatorSnafu, StopProcedureManagerSnafu, }; use crate::options::GlobalOptions; use crate::{log_versions, App}; @@ -421,20 +418,12 @@ impl StartCommand { .build(), ); - let table_cache = layered_cache_registry.get().context(CacheRequiredSnafu { - name: TABLE_CACHE_NAME, - })?; - let table_route_cache = layered_cache_registry.get().context(CacheRequiredSnafu { - name: TABLE_ROUTE_CACHE_NAME, - })?; let catalog_manager = KvBackendCatalogManager::new( dn_opts.mode, None, kv_backend.clone(), - table_cache, - table_route_cache, - ) - .await; + layered_cache_registry.clone(), + ); let table_metadata_manager = Self::create_table_metadata_manager(kv_backend.clone()).await?; diff --git a/src/common/meta/src/cache.rs b/src/common/meta/src/cache.rs index b7d13a6f0ec0..52dae1a094af 100644 --- a/src/common/meta/src/cache.rs +++ b/src/common/meta/src/cache.rs @@ -24,7 +24,7 @@ pub use registry::{ LayeredCacheRegistryBuilder, LayeredCacheRegistryRef, }; pub use table::{ - new_table_info_cache, new_table_name_cache, new_table_route_cache, TableInfoCache, - TableInfoCacheRef, TableNameCache, TableNameCacheRef, TableRoute, TableRouteCache, - TableRouteCacheRef, + new_table_info_cache, new_table_name_cache, new_table_route_cache, new_view_info_cache, + TableInfoCache, TableInfoCacheRef, TableNameCache, TableNameCacheRef, TableRoute, + TableRouteCache, TableRouteCacheRef, ViewInfoCache, ViewInfoCacheRef, }; diff --git a/src/common/meta/src/cache/flow/table_flownode.rs b/src/common/meta/src/cache/flow/table_flownode.rs index eeaa88128628..faf62b8c36f6 100644 --- a/src/common/meta/src/cache/flow/table_flownode.rs +++ b/src/common/meta/src/cache/flow/table_flownode.rs @@ -145,13 +145,13 @@ mod tests { use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; use moka::future::CacheBuilder; + use table::table_name::TableName; use crate::cache::flow::table_flownode::new_table_flownode_set_cache; use crate::instruction::{CacheIdent, CreateFlow, DropFlow}; use crate::key::flow::flow_info::FlowInfoValue; use crate::key::flow::FlowMetadataManager; use crate::kv_backend::memory::MemoryKvBackend; - use crate::table_name::TableName; #[tokio::test] async fn test_cache_empty_set() { diff --git a/src/common/meta/src/cache/table.rs b/src/common/meta/src/cache/table.rs index fa3bcbd30994..82a3ad98df33 100644 --- a/src/common/meta/src/cache/table.rs +++ b/src/common/meta/src/cache/table.rs @@ -15,6 +15,9 @@ mod table_info; mod table_name; mod table_route; +mod view_info; + pub use table_info::{new_table_info_cache, TableInfoCache, TableInfoCacheRef}; pub use table_name::{new_table_name_cache, TableNameCache, TableNameCacheRef}; pub use table_route::{new_table_route_cache, TableRoute, TableRouteCache, TableRouteCacheRef}; +pub use view_info::{new_view_info_cache, ViewInfoCache, ViewInfoCacheRef}; diff --git a/src/common/meta/src/cache/table/table_name.rs b/src/common/meta/src/cache/table/table_name.rs index 0ec88a2d6e9d..926e4de66f63 100644 --- a/src/common/meta/src/cache/table/table_name.rs +++ b/src/common/meta/src/cache/table/table_name.rs @@ -18,6 +18,7 @@ use futures::future::BoxFuture; use moka::future::Cache; use snafu::OptionExt; use table::metadata::TableId; +use table::table_name::TableName; use crate::cache::{CacheContainer, Initializer}; use crate::error; @@ -25,7 +26,6 @@ use crate::error::Result; use crate::instruction::CacheIdent; use crate::key::table_name::{TableNameKey, TableNameManager, TableNameManagerRef}; use crate::kv_backend::KvBackendRef; -use crate::table_name::TableName; /// [TableNameCache] caches the [TableName] to [TableId] mapping. pub type TableNameCache = CacheContainer; diff --git a/src/common/meta/src/cache/table/view_info.rs b/src/common/meta/src/cache/table/view_info.rs new file mode 100644 index 000000000000..cd9d29b2ca4d --- /dev/null +++ b/src/common/meta/src/cache/table/view_info.rs @@ -0,0 +1,143 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use futures::future::BoxFuture; +use moka::future::Cache; +use snafu::OptionExt; +use store_api::storage::TableId; + +use crate::cache::{CacheContainer, Initializer}; +use crate::error; +use crate::error::Result; +use crate::instruction::CacheIdent; +use crate::key::view_info::{ViewInfoManager, ViewInfoManagerRef, ViewInfoValue}; +use crate::kv_backend::KvBackendRef; + +/// [ViewInfoCache] caches the [TableId] to [ViewInfoValue] mapping. +pub type ViewInfoCache = CacheContainer, CacheIdent>; + +pub type ViewInfoCacheRef = Arc; + +/// Constructs a [ViewInfoCache]. +pub fn new_view_info_cache( + name: String, + cache: Cache>, + kv_backend: KvBackendRef, +) -> ViewInfoCache { + let view_info_manager = Arc::new(ViewInfoManager::new(kv_backend)); + let init = init_factory(view_info_manager); + + CacheContainer::new(name, cache, Box::new(invalidator), init, Box::new(filter)) +} + +fn init_factory(view_info_manager: ViewInfoManagerRef) -> Initializer> { + Arc::new(move |view_id| { + let view_info_manager = view_info_manager.clone(); + Box::pin(async move { + let view_info = view_info_manager + .get(*view_id) + .await? + .context(error::ValueNotExistSnafu {})? + .into_inner(); + + Ok(Some(Arc::new(view_info))) + }) + }) +} + +fn invalidator<'a>( + cache: &'a Cache>, + ident: &'a CacheIdent, +) -> BoxFuture<'a, Result<()>> { + Box::pin(async move { + if let CacheIdent::TableId(table_id) = ident { + cache.invalidate(table_id).await + } + Ok(()) + }) +} + +fn filter(ident: &CacheIdent) -> bool { + matches!(ident, CacheIdent::TableId(_)) +} + +#[cfg(test)] +mod tests { + use std::collections::HashSet; + use std::sync::Arc; + + use moka::future::CacheBuilder; + use table::table_name::TableName; + + use super::*; + use crate::ddl::tests::create_view::test_create_view_task; + use crate::key::TableMetadataManager; + use crate::kv_backend::memory::MemoryKvBackend; + + #[tokio::test] + async fn test_view_info_cache() { + let mem_kv = Arc::new(MemoryKvBackend::default()); + let table_metadata_manager = TableMetadataManager::new(mem_kv.clone()); + let cache = CacheBuilder::new(128).build(); + let cache = new_view_info_cache("test".to_string(), cache, mem_kv.clone()); + + let result = cache.get(1024).await.unwrap(); + assert!(result.is_none()); + let mut task = test_create_view_task("my_view"); + let table_names = { + let mut set = HashSet::new(); + set.insert(TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "a_table".to_string(), + }); + set.insert(TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "b_table".to_string(), + }); + set + }; + + task.view_info.ident.table_id = 1024; + table_metadata_manager + .create_view_metadata( + task.view_info.clone(), + task.create_view.logical_plan.clone(), + table_names, + ) + .await + .unwrap(); + + let view_info = cache.get(1024).await.unwrap().unwrap(); + assert_eq!(view_info.view_info, task.create_view.logical_plan); + assert_eq!( + view_info.table_names, + task.create_view + .table_names + .iter() + .map(|t| t.clone().into()) + .collect::>() + ); + + assert!(cache.contains_key(&1024)); + cache + .invalidate(&[CacheIdent::TableId(1024)]) + .await + .unwrap(); + assert!(!cache.contains_key(&1024)); + } +} diff --git a/src/common/meta/src/ddl.rs b/src/common/meta/src/ddl.rs index 8d8cb8d5a45d..c00b6df08e6b 100644 --- a/src/common/meta/src/ddl.rs +++ b/src/common/meta/src/ddl.rs @@ -48,7 +48,7 @@ pub mod table_meta; #[cfg(any(test, feature = "testing"))] pub mod test_util; #[cfg(test)] -mod tests; +pub(crate) mod tests; pub mod truncate_table; pub mod utils; diff --git a/src/common/meta/src/ddl/alter_logical_tables/table_cache_keys.rs b/src/common/meta/src/ddl/alter_logical_tables/table_cache_keys.rs index 23cf22e2c02c..15f6bfbd6f33 100644 --- a/src/common/meta/src/ddl/alter_logical_tables/table_cache_keys.rs +++ b/src/common/meta/src/ddl/alter_logical_tables/table_cache_keys.rs @@ -13,10 +13,10 @@ // limitations under the License. use table::metadata::RawTableInfo; +use table::table_name::TableName; use crate::ddl::alter_logical_tables::AlterLogicalTablesProcedure; use crate::instruction::CacheIdent; -use crate::table_name::TableName; impl AlterLogicalTablesProcedure { pub(crate) fn build_table_cache_keys_to_invalidate(&self) -> Vec { diff --git a/src/common/meta/src/ddl/create_logical_tables/update_metadata.rs b/src/common/meta/src/ddl/create_logical_tables/update_metadata.rs index 61ec611f850f..0309a046138f 100644 --- a/src/common/meta/src/ddl/create_logical_tables/update_metadata.rs +++ b/src/common/meta/src/ddl/create_logical_tables/update_metadata.rs @@ -18,13 +18,13 @@ use common_telemetry::{info, warn}; use itertools::Itertools; use snafu::OptionExt; use table::metadata::TableId; +use table::table_name::TableName; use crate::cache_invalidator::Context; use crate::ddl::create_logical_tables::CreateLogicalTablesProcedure; use crate::ddl::physical_table_metadata; use crate::error::{Result, TableInfoNotFoundSnafu}; use crate::instruction::CacheIdent; -use crate::table_name::TableName; impl CreateLogicalTablesProcedure { pub(crate) async fn update_physical_table_metadata(&mut self) -> Result<()> { diff --git a/src/common/meta/src/ddl/create_view.rs b/src/common/meta/src/ddl/create_view.rs index 5d364ba77417..fa7a115d4d8a 100644 --- a/src/common/meta/src/ddl/create_view.rs +++ b/src/common/meta/src/ddl/create_view.rs @@ -22,9 +22,11 @@ use strum::AsRefStr; use table::metadata::{RawTableInfo, TableId, TableType}; use table::table_reference::TableReference; +use crate::cache_invalidator::Context; use crate::ddl::utils::handle_retry_error; use crate::ddl::{DdlContext, TableMetadata, TableMetadataAllocatorContext}; use crate::error::{self, Result}; +use crate::instruction::CacheIdent; use crate::key::table_name::TableNameKey; use crate::lock_key::{CatalogLock, SchemaLock, TableNameLock}; use crate::rpc::ddl::CreateViewTask; @@ -157,6 +159,25 @@ impl CreateViewProcedure { Ok(Status::executing(true)) } + async fn invalidate_view_cache(&self) -> Result<()> { + let cache_invalidator = &self.context.cache_invalidator; + let ctx = Context { + subject: Some("Invalidate view cache by creating view".to_string()), + }; + + cache_invalidator + .invalidate( + &ctx, + &[ + CacheIdent::TableName(self.data.table_ref().into()), + CacheIdent::TableId(self.view_id()), + ], + ) + .await?; + + Ok(()) + } + /// Creates view metadata /// /// Abort(not-retry): @@ -175,15 +196,21 @@ impl CreateViewProcedure { view_name: self.data.table_ref().to_string(), })?; let new_logical_plan = self.data.task.raw_logical_plan().clone(); + let table_names = self.data.task.table_names(); + manager - .update_view_info(view_id, ¤t_view_info, new_logical_plan) + .update_view_info(view_id, ¤t_view_info, new_logical_plan, table_names) .await?; info!("Updated view metadata for view {view_id}"); } else { let raw_view_info = self.view_info().clone(); manager - .create_view_metadata(raw_view_info, self.data.task.raw_logical_plan()) + .create_view_metadata( + raw_view_info, + self.data.task.raw_logical_plan().clone(), + self.data.task.table_names(), + ) .await?; info!( @@ -191,6 +218,7 @@ impl CreateViewProcedure { ctx.procedure_id ); } + self.invalidate_view_cache().await?; Ok(Status::done_with_output(view_id)) } diff --git a/src/common/meta/src/ddl/drop_database/cursor.rs b/src/common/meta/src/ddl/drop_database/cursor.rs index 7e1cb05bb98d..c3dd8a582684 100644 --- a/src/common/meta/src/ddl/drop_database/cursor.rs +++ b/src/common/meta/src/ddl/drop_database/cursor.rs @@ -14,19 +14,23 @@ use std::any::Any; +use common_catalog::format_full_table_name; use common_procedure::Status; use futures::TryStreamExt; use serde::{Deserialize, Serialize}; -use table::metadata::TableId; +use snafu::OptionExt; +use table::metadata::{TableId, TableType}; +use table::table_name::TableName; use super::executor::DropDatabaseExecutor; use super::metadata::DropDatabaseRemoveMetadata; use super::DropTableTarget; +use crate::cache_invalidator::Context; use crate::ddl::drop_database::{DropDatabaseContext, State}; use crate::ddl::DdlContext; -use crate::error::Result; +use crate::error::{Result, TableInfoNotFoundSnafu}; +use crate::instruction::CacheIdent; use crate::key::table_route::TableRouteValue; -use crate::table_name::TableName; #[derive(Debug, Serialize, Deserialize)] pub(crate) struct DropDatabaseCursor { @@ -101,6 +105,40 @@ impl DropDatabaseCursor { )), } } + + async fn handle_view( + &self, + ddl_ctx: &DdlContext, + ctx: &mut DropDatabaseContext, + table_name: String, + table_id: TableId, + ) -> Result<(Box, Status)> { + let view_name = TableName::new(&ctx.catalog, &ctx.schema, &table_name); + ddl_ctx + .table_metadata_manager + .destroy_view_info(table_id, &view_name) + .await?; + + let cache_invalidator = &ddl_ctx.cache_invalidator; + let ctx = Context { + subject: Some("Invalidate table cache by dropping table".to_string()), + }; + + cache_invalidator + .invalidate( + &ctx, + &[ + CacheIdent::TableName(view_name), + CacheIdent::TableId(table_id), + ], + ) + .await?; + + Ok(( + Box::new(DropDatabaseCursor::new(self.target)), + Status::executing(false), + )) + } } #[async_trait::async_trait] @@ -122,6 +160,20 @@ impl State for DropDatabaseCursor { match ctx.tables.as_mut().unwrap().try_next().await? { Some((table_name, table_name_value)) => { let table_id = table_name_value.table_id(); + + let table_info_value = ddl_ctx + .table_metadata_manager + .table_info_manager() + .get(table_id) + .await? + .with_context(|| TableInfoNotFoundSnafu { + table: format_full_table_name(&ctx.catalog, &ctx.schema, &table_name), + })?; + + if table_info_value.table_info.table_type == TableType::View { + return self.handle_view(ddl_ctx, ctx, table_name, table_id).await; + } + match ddl_ctx .table_metadata_manager .table_route_manager() diff --git a/src/common/meta/src/ddl/drop_database/executor.rs b/src/common/meta/src/ddl/drop_database/executor.rs index 48b840e8d9bf..f3a7f9a9fff5 100644 --- a/src/common/meta/src/ddl/drop_database/executor.rs +++ b/src/common/meta/src/ddl/drop_database/executor.rs @@ -19,6 +19,7 @@ use common_telemetry::info; use serde::{Deserialize, Serialize}; use snafu::OptionExt; use table::metadata::TableId; +use table::table_name::TableName; use super::cursor::DropDatabaseCursor; use super::{DropDatabaseContext, DropTableTarget}; @@ -29,7 +30,6 @@ use crate::error::{self, Result}; use crate::key::table_route::TableRouteValue; use crate::region_keeper::OperatingRegionGuard; use crate::rpc::router::{operating_leader_regions, RegionRoute}; -use crate::table_name::TableName; #[derive(Debug, Serialize, Deserialize)] pub(crate) struct DropDatabaseExecutor { @@ -135,6 +135,7 @@ mod tests { use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; use common_error::ext::BoxedError; use common_recordbatch::SendableRecordBatchStream; + use table::table_name::TableName; use crate::ddl::drop_database::cursor::DropDatabaseCursor; use crate::ddl::drop_database::executor::DropDatabaseExecutor; @@ -144,7 +145,6 @@ mod tests { use crate::key::datanode_table::DatanodeTableKey; use crate::peer::Peer; use crate::rpc::router::region_distribution; - use crate::table_name::TableName; use crate::test_util::{new_ddl_context, MockDatanodeHandler, MockDatanodeManager}; #[derive(Clone)] diff --git a/src/common/meta/src/ddl/drop_table/executor.rs b/src/common/meta/src/ddl/drop_table/executor.rs index aa41d03c6597..0783ce86ccaf 100644 --- a/src/common/meta/src/ddl/drop_table/executor.rs +++ b/src/common/meta/src/ddl/drop_table/executor.rs @@ -23,6 +23,7 @@ use futures::future::join_all; use snafu::ensure; use store_api::storage::RegionId; use table::metadata::TableId; +use table::table_name::TableName; use crate::cache_invalidator::Context; use crate::ddl::utils::add_peer_context_if_needed; @@ -32,7 +33,6 @@ use crate::instruction::CacheIdent; use crate::key::table_name::TableNameKey; use crate::key::table_route::TableRouteValue; use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute}; -use crate::table_name::TableName; /// [Control] indicated to the caller whether to go to the next step. #[derive(Debug)] @@ -224,6 +224,7 @@ mod tests { use api::v1::{ColumnDataType, SemanticType}; use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; use table::metadata::RawTableInfo; + use table::table_name::TableName; use super::*; use crate::ddl::test_util::columns::TestColumnDefBuilder; @@ -231,7 +232,6 @@ mod tests { build_raw_table_info_from_expr, TestCreateTableExprBuilder, }; use crate::key::table_route::TableRouteValue; - use crate::table_name::TableName; use crate::test_util::{new_ddl_context, MockDatanodeManager}; fn test_create_raw_table_info(name: &str) -> RawTableInfo { diff --git a/src/common/meta/src/ddl/tests.rs b/src/common/meta/src/ddl/tests.rs index 3c550883ffc2..9a0db96a37e0 100644 --- a/src/common/meta/src/ddl/tests.rs +++ b/src/common/meta/src/ddl/tests.rs @@ -17,7 +17,7 @@ mod alter_table; mod create_flow; mod create_logical_tables; mod create_table; -mod create_view; +pub(crate) mod create_view; mod drop_database; mod drop_flow; mod drop_table; diff --git a/src/common/meta/src/ddl/tests/create_flow.rs b/src/common/meta/src/ddl/tests/create_flow.rs index e79fe27b848f..a130e0590c47 100644 --- a/src/common/meta/src/ddl/tests/create_flow.rs +++ b/src/common/meta/src/ddl/tests/create_flow.rs @@ -19,6 +19,7 @@ use std::sync::Arc; use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; use common_procedure_test::execute_procedure_until_done; use session::context::QueryContext; +use table::table_name::TableName; use crate::ddl::create_flow::CreateFlowProcedure; use crate::ddl::test_util::create_table::test_create_table_task; @@ -27,7 +28,6 @@ use crate::ddl::DdlContext; use crate::key::table_route::TableRouteValue; use crate::key::FlowId; use crate::rpc::ddl::CreateFlowTask; -use crate::table_name::TableName; use crate::test_util::{new_ddl_context, MockFlownodeManager}; use crate::{error, ClusterId}; diff --git a/src/common/meta/src/ddl/tests/create_view.rs b/src/common/meta/src/ddl/tests/create_view.rs index 693faddeb3f3..4dc589dbb883 100644 --- a/src/common/meta/src/ddl/tests/create_view.rs +++ b/src/common/meta/src/ddl/tests/create_view.rs @@ -13,9 +13,10 @@ // limitations under the License. use std::assert_matches::assert_matches; +use std::collections::HashSet; use std::sync::Arc; -use api::v1::CreateViewExpr; +use api::v1::{CreateViewExpr, TableName}; use common_error::ext::ErrorExt; use common_error::status_code::StatusCode; use common_procedure::{Context as ProcedureContext, Procedure, ProcedureId, Status}; @@ -31,7 +32,35 @@ use crate::error::Error; use crate::rpc::ddl::CreateViewTask; use crate::test_util::{new_ddl_context, MockDatanodeManager}; -fn test_create_view_task(name: &str) -> CreateViewTask { +fn test_table_names() -> HashSet { + let mut set = HashSet::new(); + set.insert(table::table_name::TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "a_table".to_string(), + }); + set.insert(table::table_name::TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "b_table".to_string(), + }); + set +} + +pub(crate) fn test_create_view_task(name: &str) -> CreateViewTask { + let table_names = vec![ + TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "a_table".to_string(), + }, + TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "b_table".to_string(), + }, + ]; + let expr = CreateViewExpr { catalog_name: "greptime".to_string(), schema_name: "public".to_string(), @@ -39,6 +68,7 @@ fn test_create_view_task(name: &str) -> CreateViewTask { or_replace: false, create_if_not_exists: false, logical_plan: vec![1, 2, 3], + table_names, }; let view_info = RawTableInfo { @@ -70,7 +100,11 @@ async fn test_on_prepare_view_exists_err() { // Puts a value to table name key. ddl_context .table_metadata_manager - .create_view_metadata(task.view_info.clone(), &task.create_view.logical_plan) + .create_view_metadata( + task.view_info.clone(), + task.create_view.logical_plan.clone(), + test_table_names(), + ) .await .unwrap(); let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context); @@ -90,7 +124,11 @@ async fn test_on_prepare_with_create_if_view_exists() { // Puts a value to table name key. ddl_context .table_metadata_manager - .create_view_metadata(task.view_info.clone(), &task.create_view.logical_plan) + .create_view_metadata( + task.view_info.clone(), + task.create_view.logical_plan.clone(), + test_table_names(), + ) .await .unwrap(); let mut procedure = CreateViewProcedure::new(cluster_id, task, ddl_context); diff --git a/src/common/meta/src/ddl/tests/drop_flow.rs b/src/common/meta/src/ddl/tests/drop_flow.rs index b8b62b76cc61..97b4632a595a 100644 --- a/src/common/meta/src/ddl/tests/drop_flow.rs +++ b/src/common/meta/src/ddl/tests/drop_flow.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; use common_procedure_test::execute_procedure_until_done; +use table::table_name::TableName; use crate::ddl::drop_flow::DropFlowProcedure; use crate::ddl::test_util::create_table::test_create_table_task; @@ -26,7 +27,6 @@ use crate::ddl::tests::create_flow::create_test_flow; use crate::error; use crate::key::table_route::TableRouteValue; use crate::rpc::ddl::DropFlowTask; -use crate::table_name::TableName; use crate::test_util::{new_ddl_context, MockFlownodeManager}; fn test_drop_flow_task(flow_name: &str, flow_id: u32, drop_if_exists: bool) -> DropFlowTask { diff --git a/src/common/meta/src/ddl/truncate_table.rs b/src/common/meta/src/ddl/truncate_table.rs index ce1341c0add2..edc7321e091c 100644 --- a/src/common/meta/src/ddl/truncate_table.rs +++ b/src/common/meta/src/ddl/truncate_table.rs @@ -28,6 +28,7 @@ use snafu::{ensure, ResultExt}; use store_api::storage::RegionId; use strum::AsRefStr; use table::metadata::{RawTableInfo, TableId}; +use table::table_name::TableName; use table::table_reference::TableReference; use super::utils::handle_retry_error; @@ -40,7 +41,6 @@ use crate::key::DeserializedValueWithBytes; use crate::lock_key::{CatalogLock, SchemaLock, TableLock}; use crate::rpc::ddl::TruncateTableTask; use crate::rpc::router::{find_leader_regions, find_leaders, RegionRoute}; -use crate::table_name::TableName; use crate::{metrics, ClusterId}; pub struct TruncateTableProcedure { diff --git a/src/common/meta/src/instruction.rs b/src/common/meta/src/instruction.rs index 7820985b6571..aee1844b0f80 100644 --- a/src/common/meta/src/instruction.rs +++ b/src/common/meta/src/instruction.rs @@ -20,11 +20,11 @@ use serde::{Deserialize, Serialize}; use store_api::storage::{RegionId, RegionNumber}; use strum::Display; use table::metadata::TableId; +use table::table_name::TableName; use crate::flow_name::FlowName; use crate::key::schema_name::SchemaName; use crate::key::FlowId; -use crate::table_name::TableName; use crate::{ClusterId, DatanodeId, FlownodeId}; #[derive(Eq, Hash, PartialEq, Clone, Debug, Serialize, Deserialize)] diff --git a/src/common/meta/src/key.rs b/src/common/meta/src/key.rs index 9090eb075f3c..a8ef1e7eb781 100644 --- a/src/common/meta/src/key.rs +++ b/src/common/meta/src/key.rs @@ -119,6 +119,7 @@ use serde::{Deserialize, Serialize}; use snafu::{ensure, OptionExt, ResultExt}; use store_api::storage::RegionNumber; use table::metadata::{RawTableInfo, TableId}; +use table::table_name::TableName; use table_info::{TableInfoKey, TableInfoManager, TableInfoValue}; use table_name::{TableNameKey, TableNameManager, TableNameValue}; use view_info::{ViewInfoKey, ViewInfoManager, ViewInfoValue}; @@ -138,7 +139,6 @@ use crate::kv_backend::txn::{Txn, TxnOp}; use crate::kv_backend::KvBackendRef; use crate::rpc::router::{region_distribution, RegionRoute, RegionStatus}; use crate::rpc::store::BatchDeleteRequest; -use crate::table_name::TableName; use crate::DatanodeId; pub const NAME_PATTERN: &str = r"[a-zA-Z_:-][a-zA-Z0-9_:\-\.]*"; @@ -490,7 +490,8 @@ impl TableMetadataManager { pub async fn create_view_metadata( &self, view_info: RawTableInfo, - raw_logical_plan: &Vec, + raw_logical_plan: Vec, + table_names: HashSet, ) -> Result<()> { let view_id = view_info.ident.table_id; @@ -512,7 +513,7 @@ impl TableMetadataManager { .build_create_txn(view_id, &table_info_value)?; // Creates view info - let view_info_value = ViewInfoValue::new(raw_logical_plan); + let view_info_value = ViewInfoValue::new(raw_logical_plan, table_names); let (create_view_info_txn, on_create_view_info_failure) = self .view_info_manager() .build_create_txn(view_id, &view_info_value)?; @@ -804,6 +805,33 @@ impl TableMetadataManager { Ok(()) } + fn view_info_keys(&self, view_id: TableId, view_name: &TableName) -> Result>> { + let mut keys = Vec::with_capacity(3); + let view_name = TableNameKey::new( + &view_name.catalog_name, + &view_name.schema_name, + &view_name.table_name, + ); + let table_info_key = TableInfoKey::new(view_id); + let view_info_key = ViewInfoKey::new(view_id); + keys.push(view_name.to_bytes()); + keys.push(table_info_key.to_bytes()); + keys.push(view_info_key.to_bytes()); + + Ok(keys) + } + + /// Deletes metadata for view **permanently**. + /// The caller MUST ensure it has the exclusive access to `ViewNameKey`. + pub async fn destroy_view_info(&self, view_id: TableId, view_name: &TableName) -> Result<()> { + let keys = self.view_info_keys(view_id, view_name)?; + let _ = self + .kv_backend + .batch_delete(BatchDeleteRequest::new().with_keys(keys)) + .await?; + Ok(()) + } + /// Renames the table name and returns an error if different metadata exists. /// The caller MUST ensure it has the exclusive access to old and new `TableNameKey`s, /// and the new `TableNameKey` MUST be empty. @@ -903,8 +931,9 @@ impl TableMetadataManager { view_id: TableId, current_view_info_value: &DeserializedValueWithBytes, new_view_info: Vec, + table_names: HashSet, ) -> Result<()> { - let new_view_info_value = current_view_info_value.update(new_view_info); + let new_view_info_value = current_view_info_value.update(new_view_info, table_names); // Updates view info. let (update_view_info_txn, on_update_view_info_failure) = self @@ -1174,7 +1203,7 @@ impl_optional_meta_value! { #[cfg(test)] mod tests { - use std::collections::{BTreeMap, HashMap}; + use std::collections::{BTreeMap, HashMap, HashSet}; use std::sync::Arc; use bytes::Bytes; @@ -1183,6 +1212,7 @@ mod tests { use futures::TryStreamExt; use store_api::storage::RegionId; use table::metadata::{RawTableInfo, TableInfo}; + use table::table_name::TableName; use super::datanode_table::DatanodeTableKey; use super::test_utils; @@ -1197,7 +1227,6 @@ mod tests { use crate::kv_backend::memory::MemoryKvBackend; use crate::peer::Peer; use crate::rpc::router::{region_distribution, Region, RegionRoute, RegionStatus}; - use crate::table_name::TableName; #[test] fn test_deserialized_value_with_bytes() { @@ -1250,6 +1279,21 @@ mod tests { test_utils::new_test_table_info(10, region_numbers) } + fn new_test_table_names() -> HashSet { + let mut set = HashSet::new(); + set.insert(TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "a_table".to_string(), + }); + set.insert(TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "b_table".to_string(), + }); + set + } + async fn create_physical_table_metadata( table_metadata_manager: &TableMetadataManager, table_info: RawTableInfo, @@ -1961,9 +2005,11 @@ mod tests { let logical_plan: Vec = vec![1, 2, 3]; + let table_names = new_test_table_names(); + // Create metadata table_metadata_manager - .create_view_metadata(view_info.clone(), &logical_plan) + .create_view_metadata(view_info.clone(), logical_plan.clone(), table_names.clone()) .await .unwrap(); @@ -1977,6 +2023,7 @@ mod tests { .unwrap() .into_inner(); assert_eq!(current_view_info.view_info, logical_plan); + assert_eq!(current_view_info.table_names, table_names); // assert table info let current_table_info = table_metadata_manager .table_info_manager() @@ -1989,16 +2036,43 @@ mod tests { } let new_logical_plan: Vec = vec![4, 5, 6]; - let current_view_info_value = - DeserializedValueWithBytes::from_inner(ViewInfoValue::new(&logical_plan)); + let new_table_names = { + let mut set = HashSet::new(); + set.insert(TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "b_table".to_string(), + }); + set.insert(TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "c_table".to_string(), + }); + set + }; + + let current_view_info_value = DeserializedValueWithBytes::from_inner(ViewInfoValue::new( + logical_plan.clone(), + table_names, + )); // should be ok. table_metadata_manager - .update_view_info(view_id, ¤t_view_info_value, new_logical_plan.clone()) + .update_view_info( + view_id, + ¤t_view_info_value, + new_logical_plan.clone(), + new_table_names.clone(), + ) .await .unwrap(); // if table info was updated, it should be ok. table_metadata_manager - .update_view_info(view_id, ¤t_view_info_value, new_logical_plan.clone()) + .update_view_info( + view_id, + ¤t_view_info_value, + new_logical_plan.clone(), + new_table_names.clone(), + ) .await .unwrap(); @@ -2011,14 +2085,21 @@ mod tests { .unwrap() .into_inner(); assert_eq!(updated_view_info.view_info, new_logical_plan); + assert_eq!(updated_view_info.table_names, new_table_names); let wrong_view_info = logical_plan.clone(); - let wrong_view_info_value = - DeserializedValueWithBytes::from_inner(current_view_info_value.update(wrong_view_info)); + let wrong_view_info_value = DeserializedValueWithBytes::from_inner( + current_view_info_value.update(wrong_view_info, new_table_names.clone()), + ); // if the current_view_info_value is wrong, it should return an error. // The ABA problem. assert!(table_metadata_manager - .update_view_info(view_id, &wrong_view_info_value, new_logical_plan.clone()) + .update_view_info( + view_id, + &wrong_view_info_value, + new_logical_plan.clone(), + new_table_names.clone(), + ) .await .is_err()); @@ -2031,5 +2112,6 @@ mod tests { .unwrap() .into_inner(); assert_eq!(current_view_info.view_info, new_logical_plan); + assert_eq!(current_view_info.table_names, new_table_names); } } diff --git a/src/common/meta/src/key/flow.rs b/src/common/meta/src/key/flow.rs index 1f8db5585433..b2ce5d1cb24b 100644 --- a/src/common/meta/src/key/flow.rs +++ b/src/common/meta/src/key/flow.rs @@ -262,12 +262,12 @@ mod tests { use futures::TryStreamExt; use table::metadata::TableId; + use table::table_name::TableName; use super::*; use crate::key::flow::table_flow::TableFlowKey; use crate::key::FlowPartitionId; use crate::kv_backend::memory::MemoryKvBackend; - use crate::table_name::TableName; use crate::FlownodeId; #[derive(Debug)] diff --git a/src/common/meta/src/key/flow/flow_info.rs b/src/common/meta/src/key/flow/flow_info.rs index f08e7c5def56..c1ce1a1c994f 100644 --- a/src/common/meta/src/key/flow/flow_info.rs +++ b/src/common/meta/src/key/flow/flow_info.rs @@ -20,6 +20,7 @@ use regex::Regex; use serde::{Deserialize, Serialize}; use snafu::OptionExt; use table::metadata::TableId; +use table::table_name::TableName; use crate::error::{self, Result}; use crate::key::flow::FlowScoped; @@ -27,7 +28,6 @@ use crate::key::txn_helper::TxnOpGetResponseSet; use crate::key::{DeserializedValueWithBytes, FlowId, FlowPartitionId, MetaKey, TableMetaValue}; use crate::kv_backend::txn::Txn; use crate::kv_backend::KvBackendRef; -use crate::table_name::TableName; use crate::FlownodeId; const FLOW_INFO_KEY_PREFIX: &str = "info"; diff --git a/src/common/meta/src/key/table_info.rs b/src/common/meta/src/key/table_info.rs index b50d7bb6b037..a652b7caf0fd 100644 --- a/src/common/meta/src/key/table_info.rs +++ b/src/common/meta/src/key/table_info.rs @@ -19,6 +19,7 @@ use std::sync::Arc; use serde::{Deserialize, Serialize}; use snafu::OptionExt; use table::metadata::{RawTableInfo, TableId}; +use table::table_name::TableName; use table::table_reference::TableReference; use super::TABLE_INFO_KEY_PATTERN; @@ -28,7 +29,6 @@ use crate::key::{DeserializedValueWithBytes, MetaKey, TableMetaValue, TABLE_INFO use crate::kv_backend::txn::Txn; use crate::kv_backend::KvBackendRef; use crate::rpc::store::BatchGetRequest; -use crate::table_name::TableName; /// The key stores the metadata of the table. /// diff --git a/src/common/meta/src/key/table_name.rs b/src/common/meta/src/key/table_name.rs index b337fe086662..8a44de7cc695 100644 --- a/src/common/meta/src/key/table_name.rs +++ b/src/common/meta/src/key/table_name.rs @@ -20,6 +20,7 @@ use futures_util::stream::BoxStream; use serde::{Deserialize, Serialize}; use snafu::OptionExt; use table::metadata::TableId; +use table::table_name::TableName; use super::{MetaKey, TableMetaValue, TABLE_NAME_KEY_PATTERN, TABLE_NAME_KEY_PREFIX}; use crate::error::{Error, InvalidTableMetadataSnafu, Result}; @@ -29,7 +30,6 @@ use crate::kv_backend::KvBackendRef; use crate::range_stream::{PaginationStream, DEFAULT_PAGE_SIZE}; use crate::rpc::store::{BatchGetRequest, RangeRequest}; use crate::rpc::KeyValue; -use crate::table_name::TableName; #[derive(Debug, Clone, Copy, Hash, PartialEq, Eq)] pub struct TableNameKey<'a> { diff --git a/src/common/meta/src/key/view_info.rs b/src/common/meta/src/key/view_info.rs index 98c8a1a73178..762acf9aa3d3 100644 --- a/src/common/meta/src/key/view_info.rs +++ b/src/common/meta/src/key/view_info.rs @@ -12,12 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fmt::Display; +use std::sync::Arc; use serde::{Deserialize, Serialize}; use snafu::OptionExt; use table::metadata::TableId; +use table::table_name::TableName; use super::VIEW_INFO_KEY_PATTERN; use crate::error::{InvalidViewInfoSnafu, Result}; @@ -80,21 +82,30 @@ impl<'a> MetaKey<'a, ViewInfoKey> for ViewInfoKey { /// The VIEW info value that keeps the metadata. #[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] pub struct ViewInfoValue { + /// The encoded logical plan pub view_info: RawViewLogicalPlan, + /// The resolved fully table names in logical plan + pub table_names: HashSet, version: u64, } impl ViewInfoValue { - pub fn new(view_info: &RawViewLogicalPlan) -> Self { + pub fn new(view_info: RawViewLogicalPlan, table_names: HashSet) -> Self { Self { - view_info: view_info.clone(), + view_info, + table_names, version: 0, } } - pub(crate) fn update(&self, new_view_info: RawViewLogicalPlan) -> Self { + pub(crate) fn update( + &self, + new_view_info: RawViewLogicalPlan, + table_names: HashSet, + ) -> Self { Self { view_info: new_view_info, + table_names, version: self.version + 1, } } @@ -105,6 +116,8 @@ pub struct ViewInfoManager { kv_backend: KvBackendRef, } +pub type ViewInfoManagerRef = Arc; + impl ViewInfoManager { pub fn new(kv_backend: KvBackendRef) -> Self { Self { kv_backend } @@ -254,9 +267,25 @@ mod tests { #[test] fn test_value_serialization() { + let table_names = { + let mut set = HashSet::new(); + set.insert(TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "a_table".to_string(), + }); + set.insert(TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "b_table".to_string(), + }); + set + }; + let value = ViewInfoValue { view_info: vec![1, 2, 3], version: 1, + table_names, }; let serialized = value.try_as_raw_value().unwrap(); let deserialized = ViewInfoValue::try_from_raw_value(&serialized).unwrap(); diff --git a/src/common/meta/src/lib.rs b/src/common/meta/src/lib.rs index 5398a62a6752..78d111c479a9 100644 --- a/src/common/meta/src/lib.rs +++ b/src/common/meta/src/lib.rs @@ -40,7 +40,6 @@ pub mod region_keeper; pub mod rpc; pub mod sequence; pub mod state_store; -pub mod table_name; #[cfg(any(test, feature = "testing"))] pub mod test_util; pub mod util; diff --git a/src/common/meta/src/rpc/ddl.rs b/src/common/meta/src/rpc/ddl.rs index 8e977f1ca5bc..e6140cdaeff7 100644 --- a/src/common/meta/src/rpc/ddl.rs +++ b/src/common/meta/src/rpc/ddl.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::result; use api::v1::meta::ddl_task_request::Task; @@ -39,11 +39,11 @@ use serde_with::{serde_as, DefaultOnNull}; use session::context::QueryContextRef; use snafu::{OptionExt, ResultExt}; use table::metadata::{RawTableInfo, TableId}; +use table::table_name::TableName; use table::table_reference::TableReference; use crate::error::{self, Result}; use crate::key::FlowId; -use crate::table_name::TableName; /// DDL tasks #[derive(Debug, Clone)] @@ -332,6 +332,14 @@ impl CreateViewTask { pub fn raw_logical_plan(&self) -> &Vec { &self.create_view.logical_plan } + + pub fn table_names(&self) -> HashSet { + self.create_view + .table_names + .iter() + .map(|t| t.clone().into()) + .collect() + } } impl TryFrom for CreateViewTask { diff --git a/src/common/meta/src/rpc/router.rs b/src/common/meta/src/rpc/router.rs index 31be66f64954..3e609e4af4d8 100644 --- a/src/common/meta/src/rpc/router.rs +++ b/src/common/meta/src/rpc/router.rs @@ -25,11 +25,11 @@ use serde::{Deserialize, Deserializer, Serialize, Serializer}; use snafu::OptionExt; use store_api::storage::{RegionId, RegionNumber}; use strum::AsRefStr; +use table::table_name::TableName; use crate::error::{self, Result}; use crate::key::RegionDistribution; use crate::peer::Peer; -use crate::table_name::TableName; use crate::DatanodeId; pub fn region_distribution(region_routes: &[RegionRoute]) -> RegionDistribution { diff --git a/src/common/query/Cargo.toml b/src/common/query/Cargo.toml index 443640016488..d7a0361965bd 100644 --- a/src/common/query/Cargo.toml +++ b/src/common/query/Cargo.toml @@ -4,12 +4,16 @@ version.workspace = true edition.workspace = true license.workspace = true +[features] +testing = [] + [lints] workspace = true [dependencies] api.workspace = true async-trait.workspace = true +bytes.workspace = true common-error.workspace = true common-macro.workspace = true common-recordbatch.workspace = true diff --git a/src/common/query/src/error.rs b/src/common/query/src/error.rs index 6756c58a8449..d544e6166cdf 100644 --- a/src/common/query/src/error.rs +++ b/src/common/query/src/error.rs @@ -206,6 +206,13 @@ pub enum Error { location: Location, }, + #[snafu(display("Failed to decode logical plan: {source}"))] + DecodePlan { + #[snafu(implicit)] + location: Location, + source: BoxedError, + }, + #[snafu(display("Failed to do table mutation"))] TableMutation { source: BoxedError, @@ -282,11 +289,12 @@ impl ErrorExt for Error { | Error::InvalidFuncArgs { .. } => StatusCode::InvalidArguments, Error::ConvertDfRecordBatchStream { source, .. } => source.status_code(), - Error::ExecutePhysicalPlan { source, .. } => source.status_code(), - Error::Execute { source, .. } => source.status_code(), - Error::ProcedureService { source, .. } | Error::TableMutation { source, .. } => { - source.status_code() - } + + Error::DecodePlan { source, .. } + | Error::Execute { source, .. } + | Error::ExecutePhysicalPlan { source, .. } + | Error::ProcedureService { source, .. } + | Error::TableMutation { source, .. } => source.status_code(), Error::PermissionDenied { .. } => StatusCode::PermissionDenied, } diff --git a/src/common/query/src/lib.rs b/src/common/query/src/lib.rs index 49aff8d9a4dd..68c7c2568cbc 100644 --- a/src/common/query/src/lib.rs +++ b/src/common/query/src/lib.rs @@ -18,7 +18,8 @@ mod function; pub mod logical_plan; pub mod prelude; mod signature; - +#[cfg(any(test, feature = "testing"))] +pub mod test_util; use std::fmt::{Debug, Display, Formatter}; use std::sync::Arc; diff --git a/src/common/query/src/logical_plan.rs b/src/common/query/src/logical_plan.rs index 6705a63e4251..3598001d67f7 100644 --- a/src/common/query/src/logical_plan.rs +++ b/src/common/query/src/logical_plan.rs @@ -19,12 +19,15 @@ mod udf; use std::sync::Arc; +use datafusion::catalog::CatalogProviderList; +use datafusion::logical_expr::LogicalPlan; use datatypes::prelude::ConcreteDataType; pub use expr::build_filter_from_timestamp; pub use self::accumulator::{Accumulator, AggregateFunctionCreator, AggregateFunctionCreatorRef}; pub use self::udaf::AggregateFunction; pub use self::udf::ScalarUdf; +use crate::error::Result; use crate::function::{ReturnTypeFunction, ScalarFunctionImplementation}; use crate::logical_plan::accumulator::*; use crate::signature::{Signature, Volatility}; @@ -68,6 +71,25 @@ pub fn create_aggregate_function( ) } +/// The datafusion `[LogicalPlan]` decoder. +#[async_trait::async_trait] +pub trait SubstraitPlanDecoder { + /// Decode the [`LogicalPlan`] from bytes with the [`CatalogProviderList`]. + /// When `optimize` is true, it will do the optimization for decoded plan. + /// + /// TODO(dennis): It's not a good design for an API to do many things. + /// The `optimize` was introduced because of `query` and `catalog` cyclic dependency issue + /// I am happy to refactor it if we have a better solution. + async fn decode( + &self, + message: bytes::Bytes, + catalog_list: Arc, + optimize: bool, + ) -> Result; +} + +pub type SubstraitPlanDecoderRef = Arc; + #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/src/common/query/src/test_util.rs b/src/common/query/src/test_util.rs new file mode 100644 index 000000000000..141c284a7baf --- /dev/null +++ b/src/common/query/src/test_util.rs @@ -0,0 +1,42 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use datafusion::catalog::CatalogProviderList; +use datafusion::logical_expr::LogicalPlan; + +use crate::error::Result; +use crate::logical_plan::SubstraitPlanDecoder; + +/// Dummy `[SubstraitPlanDecoder]` for test. +pub struct DummyDecoder; + +impl DummyDecoder { + pub fn arc() -> Arc { + Arc::new(DummyDecoder) + } +} + +#[async_trait::async_trait] +impl SubstraitPlanDecoder for DummyDecoder { + async fn decode( + &self, + _message: bytes::Bytes, + _catalog_list: Arc, + _optimize: bool, + ) -> Result { + unreachable!() + } +} diff --git a/src/common/substrait/Cargo.toml b/src/common/substrait/Cargo.toml index a2fb0e272594..3da8b6310017 100644 --- a/src/common/substrait/Cargo.toml +++ b/src/common/substrait/Cargo.toml @@ -10,19 +10,15 @@ workspace = true [dependencies] async-trait.workspace = true bytes.workspace = true -catalog.workspace = true common-error.workspace = true -common-function.workspace = true common-macro.workspace = true common-telemetry.workspace = true datafusion.workspace = true datafusion-common.workspace = true datafusion-expr.workspace = true datafusion-substrait.workspace = true -datatypes.workspace = true promql.workspace = true prost.workspace = true -session.workspace = true snafu.workspace = true [dependencies.substrait_proto] diff --git a/src/common/substrait/src/df_substrait.rs b/src/common/substrait/src/df_substrait.rs index 0730f0773b32..9217b60cc5b6 100644 --- a/src/common/substrait/src/df_substrait.rs +++ b/src/common/substrait/src/df_substrait.rs @@ -16,26 +16,19 @@ use std::sync::Arc; use async_trait::async_trait; use bytes::{Buf, Bytes, BytesMut}; -use common_function::function_registry::FUNCTION_REGISTRY; -use common_function::scalars::udf::create_udf; use datafusion::catalog::CatalogProviderList; use datafusion::execution::context::SessionState; use datafusion::execution::runtime_env::RuntimeEnv; -use datafusion::execution::FunctionRegistry; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion_expr::LogicalPlan; use datafusion_substrait::logical_plan::consumer::from_substrait_plan; use datafusion_substrait::logical_plan::producer::to_substrait_plan; use datafusion_substrait::substrait::proto::Plan; use prost::Message; -use session::context::QueryContextRef; use snafu::ResultExt; -use crate::error::{ - DFInternalSnafu, DecodeDfPlanSnafu, DecodeRelSnafu, EncodeDfPlanSnafu, EncodeRelSnafu, Error, -}; -use crate::extension_serializer::ExtensionSerializer; -use crate::SubstraitPlan; +use crate::error::{DecodeDfPlanSnafu, DecodeRelSnafu, EncodeDfPlanSnafu, EncodeRelSnafu, Error}; +use crate::{SerializerRegistry, SubstraitPlan}; pub struct DFLogicalSubstraitConvertor; @@ -49,15 +42,8 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor { &self, message: B, catalog_list: Arc, - mut state: SessionState, - query_ctx: QueryContextRef, + state: SessionState, ) -> Result { - // substrait decoder will look up the UDFs in SessionState, so we need to register them - for func in FUNCTION_REGISTRY.functions() { - let udf = Arc::new(create_udf(func, query_ctx.clone(), Default::default()).into()); - state.register_udf(udf).context(DFInternalSnafu)?; - } - let mut context = SessionContext::new_with_state(state); context.register_catalog_list(catalog_list); let plan = Plan::decode(message).context(DecodeRelSnafu)?; @@ -67,10 +53,13 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor { Ok(df_plan) } - fn encode(&self, plan: &Self::Plan) -> Result { + fn encode( + &self, + plan: &Self::Plan, + serializer: impl SerializerRegistry + 'static, + ) -> Result { let mut buf = BytesMut::new(); - - let substrait_plan = self.to_sub_plan(plan)?; + let substrait_plan = self.to_sub_plan(plan, serializer)?; substrait_plan.encode(&mut buf).context(EncodeRelSnafu)?; Ok(buf.freeze()) @@ -78,10 +67,14 @@ impl SubstraitPlan for DFLogicalSubstraitConvertor { } impl DFLogicalSubstraitConvertor { - pub fn to_sub_plan(&self, plan: &LogicalPlan) -> Result, Error> { + pub fn to_sub_plan( + &self, + plan: &LogicalPlan, + serializer: impl SerializerRegistry + 'static, + ) -> Result, Error> { let session_state = SessionState::new_with_config_rt(SessionConfig::new(), Arc::new(RuntimeEnv::default())) - .with_serializer_registry(Arc::new(ExtensionSerializer)); + .with_serializer_registry(Arc::new(serializer)); let context = SessionContext::new_with_state(session_state); to_substrait_plan(plan, &context).context(EncodeDfPlanSnafu) diff --git a/src/common/substrait/src/error.rs b/src/common/substrait/src/error.rs index 07cc310d3934..5a39a1a6d1d0 100644 --- a/src/common/substrait/src/error.rs +++ b/src/common/substrait/src/error.rs @@ -18,7 +18,6 @@ use common_error::ext::{BoxedError, ErrorExt}; use common_error::status_code::StatusCode; use common_macro::stack_trace_debug; use datafusion::error::DataFusionError; -use datatypes::prelude::ConcreteDataType; use prost::{DecodeError, EncodeError}; use snafu::{Location, Snafu}; @@ -26,34 +25,6 @@ use snafu::{Location, Snafu}; #[snafu(visibility(pub))] #[stack_trace_debug] pub enum Error { - #[snafu(display("Unsupported physical plan: {}", name))] - UnsupportedPlan { - name: String, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Unsupported expr: {}", name))] - UnsupportedExpr { - name: String, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Unsupported concrete type: {:?}", ty))] - UnsupportedConcreteType { - ty: ConcreteDataType, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Unsupported substrait type: {}", ty))] - UnsupportedSubstraitType { - ty: String, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to decode substrait relation"))] DecodeRel { #[snafu(source)] @@ -70,33 +41,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Input plan is empty"))] - EmptyPlan { - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Input expression is empty"))] - EmptyExpr { - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Missing required field in protobuf, field: {}, plan: {}", field, plan))] - MissingField { - field: String, - plan: String, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Invalid parameters: {}", reason))] - InvalidParameters { - reason: String, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Internal error from DataFusion"))] DFInternal { #[snafu(source)] @@ -118,35 +62,6 @@ pub enum Error { location: Location, }, - #[snafu(display( - "Schema from Substrait proto doesn't match with the schema in storage. - Substrait schema: {:?} - Storage schema: {:?}", - substrait_schema, - storage_schema - ))] - SchemaNotMatch { - substrait_schema: datafusion::arrow::datatypes::SchemaRef, - storage_schema: datafusion::arrow::datatypes::SchemaRef, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Failed to convert DataFusion schema"))] - ConvertDfSchema { - #[snafu(implicit)] - location: Location, - source: datatypes::error::Error, - }, - - #[snafu(display("Unable to resolve table: {table_name}, error: "))] - ResolveTable { - table_name: String, - #[snafu(implicit)] - location: Location, - source: catalog::error::Error, - }, - #[snafu(display("Failed to encode DataFusion plan"))] EncodeDfPlan { #[snafu(source)] @@ -169,24 +84,13 @@ pub type Result = std::result::Result; impl ErrorExt for Error { fn status_code(&self) -> StatusCode { match self { - Error::UnsupportedConcreteType { .. } - | Error::UnsupportedPlan { .. } - | Error::UnsupportedExpr { .. } - | Error::UnsupportedSubstraitType { .. } => StatusCode::Unsupported, - Error::UnknownPlan { .. } - | Error::EncodeRel { .. } - | Error::DecodeRel { .. } - | Error::EmptyPlan { .. } - | Error::EmptyExpr { .. } - | Error::MissingField { .. } - | Error::InvalidParameters { .. } - | Error::SchemaNotMatch { .. } => StatusCode::InvalidArguments, + Error::UnknownPlan { .. } | Error::EncodeRel { .. } | Error::DecodeRel { .. } => { + StatusCode::InvalidArguments + } Error::DFInternal { .. } | Error::Internal { .. } | Error::EncodeDfPlan { .. } | Error::DecodeDfPlan { .. } => StatusCode::Internal, - Error::ConvertDfSchema { source, .. } => source.status_code(), - Error::ResolveTable { source, .. } => source.status_code(), } } diff --git a/src/common/substrait/src/extension_serializer.rs b/src/common/substrait/src/extension_serializer.rs index 89944db508f9..a8179437687e 100644 --- a/src/common/substrait/src/extension_serializer.rs +++ b/src/common/substrait/src/extension_serializer.rs @@ -67,7 +67,6 @@ impl SerializerRegistry for ExtensionSerializer { name if name == EmptyMetric::name() => Err(DataFusionError::Substrait( "EmptyMetric should not be serialized".to_string(), )), - "MergeScan" => Ok(vec![]), other => Err(DataFusionError::NotImplemented(format!( "Serizlize logical plan for {}", other diff --git a/src/common/substrait/src/lib.rs b/src/common/substrait/src/lib.rs index 8a03dd7308ed..756e701c489a 100644 --- a/src/common/substrait/src/lib.rs +++ b/src/common/substrait/src/lib.rs @@ -23,11 +23,11 @@ use async_trait::async_trait; use bytes::{Buf, Bytes}; use datafusion::catalog::CatalogProviderList; use datafusion::execution::context::SessionState; +pub use datafusion::execution::registry::SerializerRegistry; /// Re-export the Substrait module of datafusion, /// note this is a different version of the `substrait_proto` crate pub use datafusion_substrait::substrait as substrait_proto_df; pub use datafusion_substrait::{logical_plan as df_logical_plan, variation_const}; -use session::context::QueryContextRef; pub use substrait_proto; pub use crate::df_substrait::DFLogicalSubstraitConvertor; @@ -42,8 +42,11 @@ pub trait SubstraitPlan { message: B, catalog_list: Arc, state: SessionState, - query_ctx: QueryContextRef, ) -> Result; - fn encode(&self, plan: &Self::Plan) -> Result; + fn encode( + &self, + plan: &Self::Plan, + serializer: impl SerializerRegistry + 'static, + ) -> Result; } diff --git a/src/datanode/Cargo.toml b/src/datanode/Cargo.toml index 26a7ccb67563..a5408b0c3246 100644 --- a/src/datanode/Cargo.toml +++ b/src/datanode/Cargo.toml @@ -57,7 +57,6 @@ servers.workspace = true session.workspace = true snafu.workspace = true store-api.workspace = true -substrait.workspace = true table.workspace = true tokio.workspace = true toml.workspace = true diff --git a/src/datanode/src/error.rs b/src/datanode/src/error.rs index 945d03422731..919a921ec349 100644 --- a/src/datanode/src/error.rs +++ b/src/datanode/src/error.rs @@ -64,11 +64,18 @@ pub enum Error { source: query::error::Error, }, + #[snafu(display("Failed to create plan decoder"))] + NewPlanDecoder { + #[snafu(implicit)] + location: Location, + source: query::error::Error, + }, + #[snafu(display("Failed to decode logical plan"))] DecodeLogicalPlan { #[snafu(implicit)] location: Location, - source: substrait::error::Error, + source: common_query::error::Error, }, #[snafu(display("Incorrect internal state: {}", state))] @@ -388,7 +395,9 @@ impl ErrorExt for Error { fn status_code(&self) -> StatusCode { use Error::*; match self { - ExecuteLogicalPlan { source, .. } => source.status_code(), + NewPlanDecoder { source, .. } | ExecuteLogicalPlan { source, .. } => { + source.status_code() + } BuildRegionRequests { source, .. } => source.status_code(), HandleHeartbeatResponse { source, .. } | GetMetadata { source, .. } => { diff --git a/src/datanode/src/region_server.rs b/src/datanode/src/region_server.rs index 5a8236a6d15c..13b10c497cef 100644 --- a/src/datanode/src/region_server.rs +++ b/src/datanode/src/region_server.rs @@ -51,13 +51,13 @@ use store_api::metric_engine_consts::{ use store_api::region_engine::{RegionEngineRef, RegionRole, SetReadonlyResponse}; use store_api::region_request::{AffectedRows, RegionCloseRequest, RegionRequest}; use store_api::storage::RegionId; -use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan}; use tonic::{Request, Response, Result as TonicResult}; use crate::error::{ self, BuildRegionRequestsSnafu, DecodeLogicalPlanSnafu, ExecuteLogicalPlanSnafu, - FindLogicalRegionsSnafu, HandleRegionRequestSnafu, RegionEngineNotFoundSnafu, - RegionNotFoundSnafu, Result, StopRegionEngineSnafu, UnexpectedSnafu, UnsupportedOutputSnafu, + FindLogicalRegionsSnafu, HandleRegionRequestSnafu, NewPlanDecoderSnafu, + RegionEngineNotFoundSnafu, RegionNotFoundSnafu, Result, StopRegionEngineSnafu, UnexpectedSnafu, + UnsupportedOutputSnafu, }; use crate::event_listener::RegionServerEventListenerRef; @@ -653,14 +653,13 @@ impl RegionServerInner { let catalog_list = Arc::new(DummyCatalogList::with_table_provider(table_provider)); let query_engine_ctx = self.query_engine.engine_context(ctx.clone()); + let plan_decoder = query_engine_ctx + .new_plan_decoder() + .context(NewPlanDecoderSnafu)?; + // decode substrait plan to logical plan and execute it - let logical_plan = DFLogicalSubstraitConvertor - .decode( - Bytes::from(plan), - catalog_list, - query_engine_ctx.state().clone(), - ctx.clone(), - ) + let logical_plan = plan_decoder + .decode(Bytes::from(plan), catalog_list, false) .await .context(DecodeLogicalPlanSnafu)?; diff --git a/src/flow/src/transform.rs b/src/flow/src/transform.rs index 6f93e36e9682..bb28c8630b33 100644 --- a/src/flow/src/transform.rs +++ b/src/flow/src/transform.rs @@ -23,6 +23,7 @@ use literal::{from_substrait_literal, from_substrait_type}; use prost::Message; use query::parser::QueryLanguageParser; use query::plan::LogicalPlan; +use query::query_engine::DefaultSerializer; use query::QueryEngine; use session::context::QueryContext; use snafu::{OptionExt, ResultExt}; @@ -121,7 +122,7 @@ pub async fn sql_to_flow_plan( .context(ExternalSnafu)?; let LogicalPlan::DfPlan(plan) = plan; let sub_plan = DFLogicalSubstraitConvertor {} - .to_sub_plan(&plan) + .to_sub_plan(&plan, DefaultSerializer) .map_err(BoxedError::new) .context(ExternalSnafu)?; @@ -294,7 +295,9 @@ mod test { let LogicalPlan::DfPlan(plan) = plan; // encode then decode so to rely on the impl of conversion from logical plan to substrait plan - let bytes = DFLogicalSubstraitConvertor {}.encode(&plan).unwrap(); + let bytes = DFLogicalSubstraitConvertor {} + .encode(&plan, DefaultSerializer) + .unwrap(); proto::Plan::decode(bytes).unwrap() } diff --git a/src/frontend/src/instance/grpc.rs b/src/frontend/src/instance/grpc.rs index 7be2c09ec1d4..6597e049aa3b 100644 --- a/src/frontend/src/instance/grpc.rs +++ b/src/frontend/src/instance/grpc.rs @@ -18,7 +18,6 @@ use api::v1::query_request::Query; use api::v1::{DeleteRequests, DropFlowExpr, InsertRequests, RowDeleteRequests, RowInsertRequests}; use async_trait::async_trait; use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq}; -use common_meta::table_name::TableName; use common_query::Output; use common_telemetry::tracing; use query::parser::PromQuery; @@ -27,6 +26,7 @@ use servers::query_handler::grpc::GrpcQueryHandler; use servers::query_handler::sql::SqlQueryHandler; use session::context::QueryContextRef; use snafu::{ensure, OptionExt, ResultExt}; +use table::table_name::TableName; use crate::error::{ Error, IncompleteGrpcRequestSnafu, NotSupportedSnafu, PermissionSnafu, Result, diff --git a/src/frontend/src/script.rs b/src/frontend/src/script.rs index 68a7d780ae9e..5cda392d112f 100644 --- a/src/frontend/src/script.rs +++ b/src/frontend/src/script.rs @@ -72,12 +72,12 @@ mod python { use arc_swap::ArcSwap; use catalog::RegisterSystemTableRequest; use common_error::ext::BoxedError; - use common_meta::table_name::TableName; use common_telemetry::{error, info}; use script::manager::ScriptManager; use servers::query_handler::grpc::GrpcQueryHandler; use session::context::QueryContext; use snafu::{OptionExt, ResultExt}; + use table::table_name::TableName; use super::*; use crate::error::{CatalogSnafu, TableNotFoundSnafu}; diff --git a/src/meta-srv/src/procedure/region_failover.rs b/src/meta-srv/src/procedure/region_failover.rs index 7d82ad36d520..9ee017ad15a6 100644 --- a/src/meta-srv/src/procedure/region_failover.rs +++ b/src/meta-srv/src/procedure/region_failover.rs @@ -29,7 +29,6 @@ use common_meta::key::datanode_table::DatanodeTableKey; use common_meta::key::{TableMetadataManagerRef, MAINTENANCE_KEY}; use common_meta::kv_backend::{KvBackendRef, ResettableKvBackendRef}; use common_meta::lock_key::{CatalogLock, RegionLock, SchemaLock, TableLock}; -use common_meta::table_name::TableName; use common_meta::{ClusterId, RegionIdent}; use common_procedure::error::{ Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu, @@ -44,6 +43,7 @@ use serde::{Deserialize, Serialize}; use snafu::ResultExt; use store_api::storage::{RegionId, RegionNumber}; use table::metadata::TableId; +use table::table_name::TableName; use crate::error::{ self, KvBackendSnafu, RegisterProcedureLoaderSnafu, Result, TableMetadataManagerSnafu, diff --git a/src/meta-srv/src/procedure/region_migration/manager.rs b/src/meta-srv/src/procedure/region_migration/manager.rs index 7dde629cbdb9..871342fd4fef 100644 --- a/src/meta-srv/src/procedure/region_migration/manager.rs +++ b/src/meta-srv/src/procedure/region_migration/manager.rs @@ -22,12 +22,12 @@ use common_meta::key::table_info::TableInfoValue; use common_meta::key::table_route::TableRouteValue; use common_meta::peer::Peer; use common_meta::rpc::router::RegionRoute; -use common_meta::table_name::TableName; use common_meta::ClusterId; use common_procedure::{watcher, ProcedureId, ProcedureManagerRef, ProcedureWithId}; use common_telemetry::{error, info}; use snafu::{ensure, OptionExt, ResultExt}; use store_api::storage::RegionId; +use table::table_name::TableName; use crate::error::{self, Result}; use crate::procedure::region_migration::{ diff --git a/src/operator/src/error.rs b/src/operator/src/error.rs index 6e77b53d3eba..d7dcdb9d7057 100644 --- a/src/operator/src/error.rs +++ b/src/operator/src/error.rs @@ -22,7 +22,6 @@ use datafusion::parquet; use datatypes::arrow::error::ArrowError; use servers::define_into_tonic_status; use snafu::{Location, Snafu}; -use sql::ast::Value; #[derive(Snafu)] #[snafu(visibility(pub))] @@ -113,12 +112,11 @@ pub enum Error { error: datafusion::error::DataFusionError, }, - #[snafu(display("Failed to convert value to sql value: {}", value))] - ConvertSqlValue { - value: Value, + #[snafu(display("Failed to extract table names"))] + ExtractTableNames { #[snafu(implicit)] location: Location, - source: sql::error::Error, + source: query::error::Error, }, #[snafu(display("Column datatype error"))] @@ -542,13 +540,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Failed to prepare immutable table"))] - PrepareImmutableTable { - #[snafu(implicit)] - location: Location, - source: query::error::Error, - }, - #[snafu(display("Invalid COPY parameter, key: {}, value: {}", key, value))] InvalidCopyParameter { key: String, @@ -571,20 +562,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Failed to read record batch"))] - ReadRecordBatch { - source: common_recordbatch::error::Error, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Failed to build column vectors"))] - BuildColumnVectors { - source: common_recordbatch::error::Error, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Missing insert body"))] MissingInsertBody { source: sql::error::Error, @@ -738,7 +715,6 @@ impl ErrorExt for Error { | Error::ColumnNotFound { .. } | Error::BuildRegex { .. } | Error::InvalidSchema { .. } - | Error::PrepareImmutableTable { .. } | Error::BuildCsvConfig { .. } | Error::ProjectSchema { .. } | Error::UnsupportedFormat { .. } @@ -762,9 +738,7 @@ impl ErrorExt for Error { Error::TableMetadataManager { source, .. } => source.status_code(), - Error::ConvertSqlValue { source, .. } | Error::ParseSql { source, .. } => { - source.status_code() - } + Error::ParseSql { source, .. } => source.status_code(), Error::InvalidateTableCache { source, .. } => source.status_code(), @@ -814,6 +788,7 @@ impl ErrorExt for Error { | Error::FindNewColumnsOnInsertion { source, .. } => source.status_code(), Error::ExecuteStatement { source, .. } + | Error::ExtractTableNames { source, .. } | Error::PlanStatement { source, .. } | Error::ParseQuery { source, .. } | Error::ExecLogicalPlan { source, .. } @@ -843,10 +818,6 @@ impl ErrorExt for Error { StatusCode::InvalidArguments } - Error::ReadRecordBatch { source, .. } | Error::BuildColumnVectors { source, .. } => { - source.status_code() - } - Error::ColumnDefaultValue { source, .. } => source.status_code(), Error::DdlWithMultiCatalogs { .. } diff --git a/src/operator/src/expr_factory.rs b/src/operator/src/expr_factory.rs index 6715a06e0363..8f9aab097f63 100644 --- a/src/operator/src/expr_factory.rs +++ b/src/operator/src/expr_factory.rs @@ -519,6 +519,7 @@ pub(crate) fn to_alter_expr( pub fn to_create_view_expr( stmt: CreateView, logical_plan: Vec, + table_names: Vec, query_ctx: QueryContextRef, ) -> Result { let (catalog_name, schema_name, view_name) = table_idents_to_full_name(&stmt.name, &query_ctx) @@ -532,6 +533,7 @@ pub fn to_create_view_expr( logical_plan, create_if_not_exists: stmt.if_not_exists, or_replace: stmt.or_replace, + table_names, }; Ok(expr) @@ -789,6 +791,21 @@ mod tests { assert!(change_column_type.target_type_extension.is_none()); } + fn new_test_table_names() -> Vec { + vec![ + TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "a_table".to_string(), + }, + TableName { + catalog_name: "greptime".to_string(), + schema_name: "public".to_string(), + table_name: "b_table".to_string(), + }, + ] + } + #[test] fn test_to_create_view_expr() { let sql = "CREATE VIEW test AS SELECT * FROM NUMBERS"; @@ -803,8 +820,15 @@ mod tests { }; let logical_plan = vec![1, 2, 3]; + let table_names = new_test_table_names(); - let expr = to_create_view_expr(stmt, logical_plan.clone(), QueryContext::arc()).unwrap(); + let expr = to_create_view_expr( + stmt, + logical_plan.clone(), + table_names.clone(), + QueryContext::arc(), + ) + .unwrap(); assert_eq!("greptime", expr.catalog_name); assert_eq!("public", expr.schema_name); @@ -812,6 +836,7 @@ mod tests { assert!(!expr.create_if_not_exists); assert!(!expr.or_replace); assert_eq!(logical_plan, expr.logical_plan); + assert_eq!(table_names, expr.table_names); } #[test] @@ -828,8 +853,15 @@ mod tests { }; let logical_plan = vec![1, 2, 3]; + let table_names = new_test_table_names(); - let expr = to_create_view_expr(stmt, logical_plan.clone(), QueryContext::arc()).unwrap(); + let expr = to_create_view_expr( + stmt, + logical_plan.clone(), + table_names.clone(), + QueryContext::arc(), + ) + .unwrap(); assert_eq!("greptime", expr.catalog_name); assert_eq!("test", expr.schema_name); @@ -837,5 +869,6 @@ mod tests { assert!(expr.create_if_not_exists); assert!(expr.or_replace); assert_eq!(logical_plan, expr.logical_plan); + assert_eq!(table_names, expr.table_names); } } diff --git a/src/operator/src/statement.rs b/src/operator/src/statement.rs index e9b6f4b282c0..649af286a4bb 100644 --- a/src/operator/src/statement.rs +++ b/src/operator/src/statement.rs @@ -32,7 +32,6 @@ use common_meta::ddl::ProcedureExecutorRef; use common_meta::key::flow::{FlowMetadataManager, FlowMetadataManagerRef}; use common_meta::key::{TableMetadataManager, TableMetadataManagerRef}; use common_meta::kv_backend::KvBackendRef; -use common_meta::table_name::TableName; use common_query::Output; use common_telemetry::tracing; use common_time::range::TimestampRange; @@ -50,6 +49,7 @@ use sql::statements::OptionMap; use sql::util::format_raw_object_name; use sqlparser::ast::ObjectName; use table::requests::{CopyDatabaseRequest, CopyDirection, CopyTableRequest}; +use table::table_name::TableName; use table::table_reference::TableReference; use table::TableRef; diff --git a/src/operator/src/statement/ddl.rs b/src/operator/src/statement/ddl.rs index 67c4a4251bf0..2cfe71fd0d24 100644 --- a/src/operator/src/statement/ddl.rs +++ b/src/operator/src/statement/ddl.rs @@ -32,7 +32,6 @@ use common_meta::rpc::ddl::{ CreateFlowTask, DdlTask, DropFlowTask, SubmitDdlTaskRequest, SubmitDdlTaskResponse, }; use common_meta::rpc::router::{Partition, Partition as MetaPartition}; -use common_meta::table_name::TableName; use common_query::Output; use common_telemetry::{debug, info, tracing}; use common_time::Timezone; @@ -43,6 +42,8 @@ use lazy_static::lazy_static; use partition::expr::{Operand, PartitionExpr, RestrictedOp}; use partition::partition::{PartitionBound, PartitionDef}; use query::parser::QueryStatement; +use query::plan::extract_and_rewrite_full_table_names; +use query::query_engine::DefaultSerializer; use query::sql::create_table_stmt; use regex::Regex; use session::context::QueryContextRef; @@ -60,17 +61,19 @@ use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan}; use table::dist_table::DistTable; use table::metadata::{self, RawTableInfo, RawTableMeta, TableId, TableInfo, TableType}; use table::requests::{AlterKind, AlterTableRequest, TableOptions, COMMENT_KEY}; +use table::table_name::TableName; use table::TableRef; use super::StatementExecutor; use crate::error::{ self, AlterExprToRequestSnafu, CatalogSnafu, ColumnDataTypeSnafu, ColumnNotFoundSnafu, CreateLogicalTablesSnafu, CreateTableInfoSnafu, DdlWithMultiCatalogsSnafu, - DdlWithMultiSchemasSnafu, DeserializePartitionSnafu, EmptyDdlExprSnafu, FlowNotFoundSnafu, - InvalidPartitionColumnsSnafu, InvalidPartitionRuleSnafu, InvalidTableNameSnafu, - InvalidViewNameSnafu, InvalidViewStmtSnafu, ParseSqlValueSnafu, Result, SchemaInUseSnafu, - SchemaNotFoundSnafu, SubstraitCodecSnafu, TableAlreadyExistsSnafu, TableMetadataManagerSnafu, - TableNotFoundSnafu, UnrecognizedTableOptionSnafu, ViewAlreadyExistsSnafu, + DdlWithMultiSchemasSnafu, DeserializePartitionSnafu, EmptyDdlExprSnafu, ExtractTableNamesSnafu, + FlowNotFoundSnafu, InvalidPartitionColumnsSnafu, InvalidPartitionRuleSnafu, + InvalidTableNameSnafu, InvalidViewNameSnafu, InvalidViewStmtSnafu, ParseSqlValueSnafu, Result, + SchemaInUseSnafu, SchemaNotFoundSnafu, SubstraitCodecSnafu, TableAlreadyExistsSnafu, + TableMetadataManagerSnafu, TableNotFoundSnafu, UnrecognizedTableOptionSnafu, + ViewAlreadyExistsSnafu, }; use crate::expr_factory; use crate::statement::show::create_partitions_stmt; @@ -398,16 +401,33 @@ impl StatementExecutor { return InvalidViewStmtSnafu {}.fail(); } }; - let optimized_plan = self.optimize_logical_plan(logical_plan)?; + + // Extract the table names from the origin plan + // and rewrite them as fully qualified names. + let (table_names, plan) = + extract_and_rewrite_full_table_names(logical_plan.unwrap_df_plan(), ctx.clone()) + .context(ExtractTableNamesSnafu)?; + + let table_names = table_names.into_iter().map(|t| t.into()).collect(); + + // TODO(dennis): we don't save the optimized plan yet, + // because there are some serialization issue with our own defined plan node (such as `MergeScanLogicalPlan`). + // When the issues are fixed, we can use the `optimized_plan` instead. + // let optimized_plan = self.optimize_logical_plan(logical_plan)?.unwrap_df_plan(); // encode logical plan let encoded_plan = DFLogicalSubstraitConvertor - .encode(&optimized_plan.unwrap_df_plan()) + .encode(&plan, DefaultSerializer) .context(SubstraitCodecSnafu)?; - let expr = - expr_factory::to_create_view_expr(create_view, encoded_plan.to_vec(), ctx.clone())?; + let expr = expr_factory::to_create_view_expr( + create_view, + encoded_plan.to_vec(), + table_names, + ctx.clone(), + )?; + //TODO(dennis): validate the logical plan self.create_view_by_expr(expr, ctx).await } diff --git a/src/operator/src/statement/show.rs b/src/operator/src/statement/show.rs index ca1a500c2a38..a89df5985206 100644 --- a/src/operator/src/statement/show.rs +++ b/src/operator/src/statement/show.rs @@ -12,7 +12,6 @@ // See the License for the specific language governing permissions and // limitations under the License. -use common_meta::table_name::TableName; use common_query::Output; use common_telemetry::tracing; use partition::manager::PartitionInfo; @@ -24,6 +23,7 @@ use sql::statements::create::Partitions; use sql::statements::show::{ ShowColumns, ShowDatabases, ShowIndex, ShowKind, ShowTables, ShowVariables, }; +use table::table_name::TableName; use table::TableRef; use crate::error::{self, ExecuteStatementSnafu, Result}; diff --git a/src/promql/Cargo.toml b/src/promql/Cargo.toml index 0bc9d6187485..4039328528c3 100644 --- a/src/promql/Cargo.toml +++ b/src/promql/Cargo.toml @@ -9,30 +9,22 @@ workspace = true [dependencies] ahash.workspace = true -async-recursion = "1.0" async-trait.workspace = true bytemuck.workspace = true -catalog.workspace = true -common-catalog.workspace = true common-error.workspace = true common-macro.workspace = true -common-query.workspace = true common-recordbatch.workspace = true common-telemetry.workspace = true datafusion.workspace = true datafusion-expr.workspace = true -datafusion-functions.workspace = true datatypes.workspace = true futures = "0.3" greptime-proto.workspace = true -itertools.workspace = true lazy_static.workspace = true prometheus.workspace = true promql-parser.workspace = true prost.workspace = true -session.workspace = true snafu.workspace = true -table.workspace = true [dev-dependencies] query.workspace = true diff --git a/src/promql/src/error.rs b/src/promql/src/error.rs index a9598904f036..3f3c216acd54 100644 --- a/src/promql/src/error.rs +++ b/src/promql/src/error.rs @@ -18,35 +18,12 @@ use common_error::ext::ErrorExt; use common_error::status_code::StatusCode; use common_macro::stack_trace_debug; use datafusion::error::DataFusionError; -use promql_parser::parser::token::TokenType; -use promql_parser::parser::{Expr as PromExpr, VectorMatchCardinality}; use snafu::{Location, Snafu}; #[derive(Snafu)] #[snafu(visibility(pub))] #[stack_trace_debug] pub enum Error { - #[snafu(display("Unsupported expr type: {}", name))] - UnsupportedExpr { - name: String, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Unsupported vector matches: {:?}", name))] - UnsupportedVectorMatch { - name: VectorMatchCardinality, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Unexpected token: {:?}", token))] - UnexpectedToken { - token: TokenType, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Internal error during building DataFusion plan"))] DataFusionPlanning { #[snafu(source)] @@ -55,49 +32,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Unexpected plan or expression: {}", desc))] - UnexpectedPlanExpr { - desc: String, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Unknown table type, downcast failed"))] - UnknownTable { - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Cannot find time index column in table {}", table))] - TimeIndexNotFound { - table: String, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Cannot find value columns in table {}", table))] - ValueNotFound { - table: String, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display( - "Cannot accept multiple vector as function input, PromQL expr: {:?}", - expr, - ))] - MultipleVector { - expr: PromExpr, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Expect a PromQL expr but not found, input expr: {:?}", expr))] - ExpectExpr { - expr: PromExpr, - #[snafu(implicit)] - location: Location, - }, #[snafu(display( "Illegal range: offset {}, length {}, array len {}", offset, @@ -126,117 +60,24 @@ pub enum Error { location: Location, }, - #[snafu(display( - "Table (metric) name not found, this indicates a procedure error in PromQL planner" - ))] - TableNameNotFound { - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("General catalog error: "))] - Catalog { - #[snafu(implicit)] - location: Location, - source: catalog::error::Error, - }, - - #[snafu(display("Expect a range selector, but not found"))] - ExpectRangeSelector { - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Zero range in range selector"))] - ZeroRangeSelector { - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Cannot find column {col}"))] ColumnNotFound { col: String, #[snafu(implicit)] location: Location, }, - - #[snafu(display("Found multiple metric matchers in selector"))] - MultipleMetricMatchers { - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Expect a metric matcher, but not found"))] - NoMetricMatcher { - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Invalid function argument for {}", fn_name))] - FunctionInvalidArgument { - fn_name: String, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display( - "Attempt to combine two tables with different column sets, left: {:?}, right: {:?}", - left, - right - ))] - CombineTableColumnMismatch { - left: Vec, - right: Vec, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Multi fields calculation is not supported in {}", operator))] - MultiFieldsNotSupported { - operator: String, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Matcher operator {matcher_op} is not supported for {matcher}"))] - UnsupportedMatcherOp { - matcher_op: String, - matcher: String, - #[snafu(implicit)] - location: Location, - }, } impl ErrorExt for Error { fn status_code(&self) -> StatusCode { use Error::*; match self { - TimeIndexNotFound { .. } - | ValueNotFound { .. } - | UnsupportedExpr { .. } - | UnexpectedToken { .. } - | MultipleVector { .. } - | ExpectExpr { .. } - | ExpectRangeSelector { .. } - | ZeroRangeSelector { .. } - | ColumnNotFound { .. } - | Deserialize { .. } - | FunctionInvalidArgument { .. } - | UnsupportedVectorMatch { .. } - | CombineTableColumnMismatch { .. } - | DataFusionPlanning { .. } - | MultiFieldsNotSupported { .. } - | UnexpectedPlanExpr { .. } - | UnsupportedMatcherOp { .. } - | IllegalRange { .. } => StatusCode::InvalidArguments, - - UnknownTable { .. } | EmptyRange { .. } => StatusCode::Internal, - - TableNameNotFound { .. } => StatusCode::TableNotFound, - - MultipleMetricMatchers { .. } | NoMetricMatcher { .. } => StatusCode::InvalidSyntax, + Deserialize { .. } => StatusCode::Unexpected, + IllegalRange { .. } | ColumnNotFound { .. } | EmptyRange { .. } => { + StatusCode::InvalidArguments + } - Catalog { source, .. } => source.status_code(), + DataFusionPlanning { .. } => StatusCode::PlanQuery, } } diff --git a/src/promql/src/extension_plan.rs b/src/promql/src/extension_plan.rs index f8e32fc4dcdf..eba327c1bf64 100644 --- a/src/promql/src/extension_plan.rs +++ b/src/promql/src/extension_plan.rs @@ -35,4 +35,4 @@ pub use scalar_calculate::ScalarCalculate; pub use series_divide::{SeriesDivide, SeriesDivideExec, SeriesDivideStream}; pub use union_distinct_on::{UnionDistinctOn, UnionDistinctOnExec, UnionDistinctOnStream}; -pub(crate) type Millisecond = ::Native; +pub type Millisecond = ::Native; diff --git a/src/promql/src/lib.rs b/src/promql/src/lib.rs index 127bf45d5f1a..a29fc032e957 100644 --- a/src/promql/src/lib.rs +++ b/src/promql/src/lib.rs @@ -20,5 +20,4 @@ pub mod error; pub mod extension_plan; pub mod functions; mod metrics; -pub mod planner; pub mod range_array; diff --git a/src/query/Cargo.toml b/src/query/Cargo.toml index 23fe20944c71..00bfa6621a87 100644 --- a/src/query/Cargo.toml +++ b/src/query/Cargo.toml @@ -16,6 +16,7 @@ arrow-schema.workspace = true async-recursion = "1.0" async-stream.workspace = true async-trait = "0.1" +bytes.workspace = true catalog.workspace = true chrono.workspace = true common-base.workspace = true @@ -28,11 +29,13 @@ common-meta.workspace = true common-plugins.workspace = true common-query.workspace = true common-recordbatch.workspace = true +common-runtime.workspace = true common-telemetry.workspace = true common-time.workspace = true datafusion.workspace = true datafusion-common.workspace = true datafusion-expr.workspace = true +datafusion-functions.workspace = true datafusion-optimizer.workspace = true datafusion-physical-expr.workspace = true datafusion-sql.workspace = true @@ -41,6 +44,7 @@ futures = "0.3" futures-util.workspace = true greptime-proto.workspace = true humantime.workspace = true +itertools.workspace = true lazy_static.workspace = true meter-core.workspace = true meter-macros.workspace = true @@ -49,6 +53,7 @@ once_cell.workspace = true prometheus.workspace = true promql.workspace = true promql-parser.workspace = true +prost.workspace = true regex.workspace = true session.workspace = true snafu.workspace = true @@ -63,6 +68,7 @@ approx_eq = "0.1" arrow.workspace = true catalog = { workspace = true, features = ["testing"] } common-macro.workspace = true +common-query = { workspace = true, features = ["testing"] } format_num = "0.1" num = "0.4" num-traits = "0.2" diff --git a/src/query/src/datafusion/planner.rs b/src/query/src/datafusion/planner.rs index 1ff9770a56c9..0e98247398dd 100644 --- a/src/query/src/datafusion/planner.rs +++ b/src/query/src/datafusion/planner.rs @@ -35,7 +35,7 @@ use session::context::QueryContextRef; use snafu::ResultExt; use crate::error::{CatalogSnafu, DataFusionSnafu, Result}; -use crate::query_engine::QueryEngineState; +use crate::query_engine::{DefaultPlanDecoder, QueryEngineState}; pub struct DfContextProviderAdapter { engine_state: Arc, @@ -64,6 +64,7 @@ impl DfContextProviderAdapter { engine_state.catalog_manager().clone(), engine_state.disallow_cross_catalog_query(), query_ctx.as_ref(), + Arc::new(DefaultPlanDecoder::new(session_state.clone(), &query_ctx)?), ); let tables = resolve_tables(table_names, &mut table_provider).await?; diff --git a/src/query/src/dist_plan/analyzer.rs b/src/query/src/dist_plan/analyzer.rs index 870b92633981..bbb3e5ddd9cc 100644 --- a/src/query/src/dist_plan/analyzer.rs +++ b/src/query/src/dist_plan/analyzer.rs @@ -31,6 +31,7 @@ use crate::dist_plan::commutativity::{ partial_commutative_transformer, Categorizer, Commutativity, }; use crate::dist_plan::merge_scan::MergeScanLogicalPlan; +use crate::query_engine::DefaultSerializer; pub struct DistPlannerAnalyzer; @@ -150,7 +151,10 @@ impl PlanRewriter { /// Return true if should stop and expand. The input plan is the parent node of current node fn should_expand(&mut self, plan: &LogicalPlan) -> bool { - if DFLogicalSubstraitConvertor.encode(plan).is_err() { + if DFLogicalSubstraitConvertor + .encode(plan, DefaultSerializer) + .is_err() + { return true; } diff --git a/src/query/src/dist_plan/merge_scan.rs b/src/query/src/dist_plan/merge_scan.rs index 36f106b87319..1af2516d233a 100644 --- a/src/query/src/dist_plan/merge_scan.rs +++ b/src/query/src/dist_plan/merge_scan.rs @@ -21,7 +21,6 @@ use async_stream::stream; use common_base::bytes::Bytes; use common_catalog::parse_catalog_and_schema_from_db_string; use common_error::ext::BoxedError; -use common_meta::table_name::TableName; use common_plugins::GREPTIME_EXEC_READ_COST; use common_recordbatch::adapter::{DfRecordBatchStreamAdapter, RecordBatchMetrics}; use common_recordbatch::error::ExternalSnafu; @@ -48,6 +47,7 @@ use meter_macros::read_meter; use session::context::QueryContextRef; use snafu::ResultExt; use store_api::storage::RegionId; +use table::table_name::TableName; use tokio::time::Instant; use crate::error::ConvertSchemaSnafu; @@ -118,7 +118,6 @@ impl MergeScanLogicalPlan { &self.input } } - pub struct MergeScanExec { table: TableName, regions: Vec, diff --git a/src/query/src/dist_plan/planner.rs b/src/query/src/dist_plan/planner.rs index 1d29fe7aba29..4bb0cccc020c 100644 --- a/src/query/src/dist_plan/planner.rs +++ b/src/query/src/dist_plan/planner.rs @@ -19,7 +19,6 @@ use std::sync::Arc; use async_trait::async_trait; use catalog::CatalogManagerRef; use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; -use common_meta::table_name::TableName; use datafusion::common::Result; use datafusion::datasource::DefaultTableSource; use datafusion::execution::context::SessionState; @@ -35,10 +34,12 @@ use store_api::storage::RegionId; use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan}; pub use table::metadata::TableType; use table::table::adapter::DfTableProviderAdapter; +use table::table_name::TableName; use crate::dist_plan::merge_scan::{MergeScanExec, MergeScanLogicalPlan}; use crate::error; use crate::error::{CatalogSnafu, TableNotFoundSnafu}; +use crate::query_engine::DefaultSerializer; use crate::region_query::RegionQueryHandlerRef; pub struct DistExtensionPlanner { @@ -101,7 +102,7 @@ impl ExtensionPlanner for DistExtensionPlanner { // Pass down the original plan, allow execution nodes to do their optimization let amended_plan = Self::plan_with_full_table_name(input_plan.clone(), &table_name)?; let substrait_plan = DFLogicalSubstraitConvertor - .encode(&amended_plan) + .encode(&amended_plan, DefaultSerializer) .context(error::EncodeSubstraitLogicalPlanSnafu)? .into(); diff --git a/src/query/src/error.rs b/src/query/src/error.rs index 7c0160d96042..35d3fbdb17b9 100644 --- a/src/query/src/error.rs +++ b/src/query/src/error.rs @@ -56,20 +56,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Catalog not found: {}", catalog))] - CatalogNotFound { - catalog: String, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Schema not found: {}", schema))] - SchemaNotFound { - schema: String, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Table not found: {}", table))] TableNotFound { table: String, @@ -137,13 +123,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Invalid timestamp `{}`", raw))] - InvalidTimestamp { - raw: String, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to parse float number `{}`", raw))] ParseFloat { raw: String, @@ -347,13 +326,10 @@ impl ErrorExt for Error { } UnsupportedExpr { .. } | Unimplemented { .. } - | CatalogNotFound { .. } - | SchemaNotFound { .. } | TableNotFound { .. } | UnknownTable { .. } | TimeIndexNotFound { .. } | ParseTimestamp { .. } - | InvalidTimestamp { .. } | ParseFloat { .. } | MissingRequiredField { .. } | BuildRegex { .. } diff --git a/src/query/src/lib.rs b/src/query/src/lib.rs index 9b6413e4ed92..4ac5a7c10aa9 100644 --- a/src/query/src/lib.rs +++ b/src/query/src/lib.rs @@ -14,6 +14,7 @@ #![feature(let_chains)] #![feature(int_roundings)] +#![feature(option_get_or_insert_default)] mod analyze; pub mod dataframe; @@ -31,6 +32,7 @@ pub mod physical_planner; pub mod physical_wrapper; pub mod plan; pub mod planner; +pub mod promql; pub mod query_engine; mod range_select; pub mod region_query; diff --git a/src/query/src/optimizer.rs b/src/query/src/optimizer.rs index e6a971417c23..1cb54c7126c3 100644 --- a/src/query/src/optimizer.rs +++ b/src/query/src/optimizer.rs @@ -17,7 +17,7 @@ pub mod order_hint; pub mod remove_duplicate; pub mod string_normalization; #[cfg(test)] -mod test_util; +pub(crate) mod test_util; pub mod type_conversion; use datafusion_common::config::ConfigOptions; diff --git a/src/query/src/plan.rs b/src/query/src/plan.rs index 34495dee989a..ea9dae3770da 100644 --- a/src/query/src/plan.rs +++ b/src/query/src/plan.rs @@ -12,15 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::fmt::{Debug, Display}; use common_query::prelude::ScalarValue; -use datafusion_common::ParamValues; +use datafusion::datasource::DefaultTableSource; +use datafusion_common::tree_node::{Transformed, TreeNode, TreeNodeRewriter}; +use datafusion_common::{ParamValues, TableReference}; use datafusion_expr::LogicalPlan as DfLogicalPlan; use datatypes::data_type::ConcreteDataType; use datatypes::schema::Schema; +use session::context::QueryContextRef; use snafu::ResultExt; +pub use table::metadata::TableType; +use table::table::adapter::DfTableProviderAdapter; +use table::table_name::TableName; use crate::error::{ConvertDatafusionSchemaSnafu, DataFusionSnafu, Result}; @@ -94,6 +100,13 @@ impl LogicalPlan { LogicalPlan::DfPlan(plan) => plan, } } + + /// Returns the DataFusion logical plan reference + pub fn df_plan(&self) -> &DfLogicalPlan { + match self { + LogicalPlan::DfPlan(plan) => plan, + } + } } impl From for LogicalPlan { @@ -101,3 +114,156 @@ impl From for LogicalPlan { Self::DfPlan(plan) } } + +struct TableNamesExtractAndRewriter { + pub(crate) table_names: HashSet, + query_ctx: QueryContextRef, +} + +impl TreeNodeRewriter for TableNamesExtractAndRewriter { + type Node = DfLogicalPlan; + + /// descend + fn f_down<'a>( + &mut self, + node: Self::Node, + ) -> datafusion::error::Result> { + match node { + DfLogicalPlan::TableScan(mut scan) => { + if let Some(source) = scan.source.as_any().downcast_ref::() { + if let Some(provider) = source + .table_provider + .as_any() + .downcast_ref::() + { + if provider.table().table_type() == TableType::Base { + let info = provider.table().table_info(); + self.table_names.insert(TableName::new( + info.catalog_name.clone(), + info.schema_name.clone(), + info.name.clone(), + )); + } + } + } + match &scan.table_name { + TableReference::Full { + catalog, + schema, + table, + } => { + self.table_names.insert(TableName::new( + catalog.to_string(), + schema.to_string(), + table.to_string(), + )); + } + TableReference::Partial { schema, table } => { + self.table_names.insert(TableName::new( + self.query_ctx.current_catalog(), + schema.to_string(), + table.to_string(), + )); + + scan.table_name = TableReference::Full { + catalog: self.query_ctx.current_catalog().into(), + schema: schema.clone(), + table: table.clone(), + }; + } + TableReference::Bare { table } => { + self.table_names.insert(TableName::new( + self.query_ctx.current_catalog(), + self.query_ctx.current_schema(), + table.to_string(), + )); + + scan.table_name = TableReference::Full { + catalog: self.query_ctx.current_catalog().into(), + schema: self.query_ctx.current_schema().into(), + table: table.clone(), + }; + } + } + Ok(Transformed::yes(DfLogicalPlan::TableScan(scan))) + } + node => Ok(Transformed::no(node)), + } + } +} + +impl TableNamesExtractAndRewriter { + fn new(query_ctx: QueryContextRef) -> Self { + Self { + query_ctx, + table_names: HashSet::new(), + } + } +} + +/// Extracts and rewrites the table names in the plan in the fully qualified style, +/// return the table names and new plan. +pub fn extract_and_rewrite_full_table_names( + plan: DfLogicalPlan, + query_ctx: QueryContextRef, +) -> Result<(HashSet, DfLogicalPlan)> { + let mut extractor = TableNamesExtractAndRewriter::new(query_ctx); + let plan = plan.rewrite(&mut extractor).context(DataFusionSnafu)?; + Ok((extractor.table_names, plan.data)) +} + +#[cfg(test)] +pub(crate) mod tests { + + use std::sync::Arc; + + use arrow::datatypes::{DataType, Field, Schema, SchemaRef, TimeUnit}; + use common_catalog::consts::DEFAULT_CATALOG_NAME; + use datafusion::logical_expr::builder::LogicalTableSource; + use datafusion::logical_expr::{col, lit, LogicalPlan, LogicalPlanBuilder}; + use session::context::QueryContextBuilder; + + use super::*; + + pub(crate) fn mock_plan() -> LogicalPlan { + let schema = Schema::new(vec![ + Field::new("id", DataType::Int32, true), + Field::new("name", DataType::Utf8, true), + Field::new("ts", DataType::Timestamp(TimeUnit::Millisecond, None), true), + ]); + let table_source = LogicalTableSource::new(SchemaRef::new(schema)); + + let projection = None; + + let builder = + LogicalPlanBuilder::scan("devices", Arc::new(table_source), projection).unwrap(); + + builder + .filter(col("id").gt(lit(500))) + .unwrap() + .build() + .unwrap() + } + + #[test] + fn test_extract_full_table_names() { + let ctx = QueryContextBuilder::default() + .current_schema("test".to_string()) + .build(); + + let (table_names, plan) = + extract_and_rewrite_full_table_names(mock_plan(), Arc::new(ctx)).unwrap(); + + assert_eq!(1, table_names.len()); + assert!(table_names.contains(&TableName::new( + DEFAULT_CATALOG_NAME.to_string(), + "test".to_string(), + "devices".to_string() + ))); + + assert_eq!( + "Filter: devices.id > Int32(500)\n TableScan: greptime.test.devices", + format!("{:?}", plan) + ); + } +} diff --git a/src/query/src/planner.rs b/src/query/src/planner.rs index 5f350a638d7c..9643e63ef496 100644 --- a/src/query/src/planner.rs +++ b/src/query/src/planner.rs @@ -24,7 +24,6 @@ use datafusion::execution::context::SessionState; use datafusion::sql::planner::PlannerContext; use datafusion_expr::Expr as DfExpr; use datafusion_sql::planner::{ParserOptions, SqlToRel}; -use promql::planner::PromPlanner; use promql_parser::parser::EvalStmt; use session::context::QueryContextRef; use snafu::ResultExt; @@ -34,7 +33,8 @@ use sql::statements::statement::Statement; use crate::error::{DataFusionSnafu, PlanSqlSnafu, QueryPlanSnafu, Result, SqlSnafu}; use crate::parser::QueryStatement; use crate::plan::LogicalPlan; -use crate::query_engine::QueryEngineState; +use crate::promql::planner::PromPlanner; +use crate::query_engine::{DefaultPlanDecoder, QueryEngineState}; use crate::range_select::plan_rewrite::RangePlanRewriter; use crate::{DfContextProviderAdapter, QueryEngineContext}; @@ -69,6 +69,10 @@ impl DfLogicalPlanner { self.engine_state.catalog_manager().clone(), self.engine_state.disallow_cross_catalog_query(), query_ctx.as_ref(), + Arc::new(DefaultPlanDecoder::new( + self.session_state.clone(), + &query_ctx, + )?), ); let context_provider = DfContextProviderAdapter::try_new( @@ -140,6 +144,10 @@ impl DfLogicalPlanner { self.engine_state.catalog_manager().clone(), self.engine_state.disallow_cross_catalog_query(), query_ctx.as_ref(), + Arc::new(DefaultPlanDecoder::new( + self.session_state.clone(), + &query_ctx, + )?), ); PromPlanner::stmt_to_plan(table_provider, stmt) .await diff --git a/src/query/src/promql.rs b/src/query/src/promql.rs new file mode 100644 index 000000000000..06d2bbd21ae0 --- /dev/null +++ b/src/query/src/promql.rs @@ -0,0 +1,16 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +pub(crate) mod error; +pub mod planner; diff --git a/src/query/src/promql/error.rs b/src/query/src/promql/error.rs new file mode 100644 index 000000000000..f204cdbd7b76 --- /dev/null +++ b/src/query/src/promql/error.rs @@ -0,0 +1,229 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; + +use common_error::ext::ErrorExt; +use common_error::status_code::StatusCode; +use common_macro::stack_trace_debug; +use datafusion::error::DataFusionError; +use promql::error::Error as PromqlError; +use promql_parser::parser::token::TokenType; +use promql_parser::parser::{Expr as PromExpr, VectorMatchCardinality}; +use snafu::{Location, Snafu}; + +#[derive(Snafu)] +#[snafu(visibility(pub))] +#[stack_trace_debug] +pub enum Error { + #[snafu(display("Unsupported expr type: {}", name))] + UnsupportedExpr { + name: String, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Unsupported vector matches: {:?}", name))] + UnsupportedVectorMatch { + name: VectorMatchCardinality, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Unexpected token: {:?}", token))] + UnexpectedToken { + token: TokenType, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Internal error during building DataFusion plan"))] + DataFusionPlanning { + #[snafu(source)] + error: datafusion::error::DataFusionError, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Unexpected plan or expression: {}", desc))] + UnexpectedPlanExpr { + desc: String, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Unknown table type, downcast failed"))] + UnknownTable { + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Cannot find time index column in table {}", table))] + TimeIndexNotFound { + table: String, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Cannot find value columns in table {}", table))] + ValueNotFound { + table: String, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Failed to create PromQL plan node"))] + PromqlPlanNode { + #[snafu(source)] + source: PromqlError, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display( + "Cannot accept multiple vector as function input, PromQL expr: {:?}", + expr, + ))] + MultipleVector { + expr: PromExpr, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display( + "Table (metric) name not found, this indicates a procedure error in PromQL planner" + ))] + TableNameNotFound { + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("General catalog error: "))] + Catalog { + #[snafu(implicit)] + location: Location, + source: catalog::error::Error, + }, + + #[snafu(display("Expect a range selector, but not found"))] + ExpectRangeSelector { + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Zero range in range selector"))] + ZeroRangeSelector { + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Cannot find column {col}"))] + ColumnNotFound { + col: String, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Found multiple metric matchers in selector"))] + MultipleMetricMatchers { + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Expect a metric matcher, but not found"))] + NoMetricMatcher { + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Invalid function argument for {}", fn_name))] + FunctionInvalidArgument { + fn_name: String, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display( + "Attempt to combine two tables with different column sets, left: {:?}, right: {:?}", + left, + right + ))] + CombineTableColumnMismatch { + left: Vec, + right: Vec, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Multi fields calculation is not supported in {}", operator))] + MultiFieldsNotSupported { + operator: String, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Matcher operator {matcher_op} is not supported for {matcher}"))] + UnsupportedMatcherOp { + matcher_op: String, + matcher: String, + #[snafu(implicit)] + location: Location, + }, +} + +impl ErrorExt for Error { + fn status_code(&self) -> StatusCode { + use Error::*; + match self { + TimeIndexNotFound { .. } + | ValueNotFound { .. } + | UnsupportedExpr { .. } + | UnexpectedToken { .. } + | MultipleVector { .. } + | ExpectRangeSelector { .. } + | ZeroRangeSelector { .. } + | ColumnNotFound { .. } + | FunctionInvalidArgument { .. } + | UnsupportedVectorMatch { .. } + | CombineTableColumnMismatch { .. } + | UnexpectedPlanExpr { .. } + | UnsupportedMatcherOp { .. } => StatusCode::InvalidArguments, + + UnknownTable { .. } => StatusCode::Internal, + + PromqlPlanNode { source, .. } => source.status_code(), + + DataFusionPlanning { .. } => StatusCode::PlanQuery, + + TableNameNotFound { .. } => StatusCode::TableNotFound, + + MultipleMetricMatchers { .. } | NoMetricMatcher { .. } => StatusCode::InvalidSyntax, + + MultiFieldsNotSupported { .. } => StatusCode::Unsupported, + Catalog { source, .. } => source.status_code(), + } + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +pub type Result = std::result::Result; + +impl From for DataFusionError { + fn from(err: Error) -> Self { + DataFusionError::External(Box::new(err)) + } +} diff --git a/src/promql/src/planner.rs b/src/query/src/promql/planner.rs similarity index 99% rename from src/promql/src/planner.rs rename to src/query/src/promql/planner.rs index 79100187fe58..9d9fd66a31c8 100644 --- a/src/promql/src/planner.rs +++ b/src/query/src/promql/planner.rs @@ -38,6 +38,15 @@ use datafusion_expr::utils::conjunction; use datatypes::arrow::datatypes::{DataType as ArrowDataType, TimeUnit as ArrowTimeUnit}; use datatypes::data_type::ConcreteDataType; use itertools::Itertools; +use promql::extension_plan::{ + build_special_time_expr, EmptyMetric, HistogramFold, InstantManipulate, Millisecond, + RangeManipulate, ScalarCalculate, SeriesDivide, SeriesNormalize, UnionDistinctOn, +}; +use promql::functions::{ + AbsentOverTime, AvgOverTime, Changes, CountOverTime, Delta, Deriv, HoltWinters, IDelta, + Increase, LastOverTime, MaxOverTime, MinOverTime, PredictLinear, PresentOverTime, + QuantileOverTime, Rate, Resets, StddevOverTime, StdvarOverTime, SumOverTime, +}; use promql_parser::label::{MatchOp, Matcher, Matchers, METRIC_NAME}; use promql_parser::parser::token::TokenType; use promql_parser::parser::{ @@ -49,23 +58,14 @@ use promql_parser::parser::{ use snafu::{ensure, OptionExt, ResultExt}; use table::table::adapter::DfTableProviderAdapter; -use crate::error::{ +use crate::promql::error::{ CatalogSnafu, ColumnNotFoundSnafu, CombineTableColumnMismatchSnafu, DataFusionPlanningSnafu, ExpectRangeSelectorSnafu, FunctionInvalidArgumentSnafu, MultiFieldsNotSupportedSnafu, - MultipleMetricMatchersSnafu, MultipleVectorSnafu, NoMetricMatcherSnafu, Result, - TableNameNotFoundSnafu, TimeIndexNotFoundSnafu, UnexpectedPlanExprSnafu, UnexpectedTokenSnafu, - UnknownTableSnafu, UnsupportedExprSnafu, UnsupportedMatcherOpSnafu, + MultipleMetricMatchersSnafu, MultipleVectorSnafu, NoMetricMatcherSnafu, PromqlPlanNodeSnafu, + Result, TableNameNotFoundSnafu, TimeIndexNotFoundSnafu, UnexpectedPlanExprSnafu, + UnexpectedTokenSnafu, UnknownTableSnafu, UnsupportedExprSnafu, UnsupportedMatcherOpSnafu, UnsupportedVectorMatchSnafu, ValueNotFoundSnafu, ZeroRangeSelectorSnafu, }; -use crate::extension_plan::{ - build_special_time_expr, EmptyMetric, HistogramFold, InstantManipulate, Millisecond, - RangeManipulate, ScalarCalculate, SeriesDivide, SeriesNormalize, UnionDistinctOn, -}; -use crate::functions::{ - AbsentOverTime, AvgOverTime, Changes, CountOverTime, Delta, Deriv, HoltWinters, IDelta, - Increase, LastOverTime, MaxOverTime, MinOverTime, PredictLinear, PresentOverTime, - QuantileOverTime, Rate, Resets, StddevOverTime, StdvarOverTime, SumOverTime, -}; /// `time()` function in PromQL. const SPECIAL_TIME_FUNCTION: &str = "time"; @@ -1522,16 +1522,19 @@ impl PromPlanner { }, ); let scalar_plan = LogicalPlan::Extension(Extension { - node: Arc::new(ScalarCalculate::new( - self.ctx.start, - self.ctx.end, - self.ctx.interval, - input, - self.ctx.time_index_column.as_ref().unwrap(), - &self.ctx.tag_columns, - &self.ctx.field_columns[0], - self.ctx.table_name.as_deref(), - )?), + node: Arc::new( + ScalarCalculate::new( + self.ctx.start, + self.ctx.end, + self.ctx.interval, + input, + self.ctx.time_index_column.as_ref().unwrap(), + &self.ctx.tag_columns, + &self.ctx.field_columns[0], + self.ctx.table_name.as_deref(), + ) + .context(PromqlPlanNodeSnafu)?, + ), }); // scalar plan have no tag columns self.ctx.tag_columns.clear(); @@ -2183,6 +2186,7 @@ mod test { use catalog::memory::MemoryCatalogManager; use catalog::RegisterTableRequest; use common_catalog::consts::{DEFAULT_CATALOG_NAME, DEFAULT_SCHEMA_NAME}; + use common_query::test_util::DummyDecoder; use datatypes::prelude::ConcreteDataType; use datatypes::schema::{ColumnSchema, Schema}; use promql_parser::label::Labels; @@ -2249,7 +2253,12 @@ mod test { .is_ok()); } - DfTableSourceProvider::new(catalog_list, false, QueryContext::arc().as_ref()) + DfTableSourceProvider::new( + catalog_list, + false, + QueryContext::arc().as_ref(), + DummyDecoder::arc(), + ) } // { @@ -3072,7 +3081,12 @@ mod test { .is_ok()); let plan = PromPlanner::stmt_to_plan( - DfTableSourceProvider::new(catalog_list.clone(), false, QueryContext::arc().as_ref()), + DfTableSourceProvider::new( + catalog_list.clone(), + false, + QueryContext::arc().as_ref(), + DummyDecoder::arc(), + ), EvalStmt { expr: parser::parse("metrics{tag = \"1\"}").unwrap(), start: UNIX_EPOCH, @@ -3095,7 +3109,12 @@ mod test { \n TableScan: metrics [tag:Utf8, timestamp:Timestamp(Nanosecond, None), field:Float64;N]" ); let plan = PromPlanner::stmt_to_plan( - DfTableSourceProvider::new(catalog_list.clone(), false, QueryContext::arc().as_ref()), + DfTableSourceProvider::new( + catalog_list.clone(), + false, + QueryContext::arc().as_ref(), + DummyDecoder::arc(), + ), EvalStmt { expr: parser::parse("avg_over_time(metrics{tag = \"1\"}[5s])").unwrap(), start: UNIX_EPOCH, diff --git a/src/query/src/query_engine.rs b/src/query/src/query_engine.rs index 18923f3b96ad..1beea2a1c2d2 100644 --- a/src/query/src/query_engine.rs +++ b/src/query/src/query_engine.rs @@ -13,9 +13,9 @@ // limitations under the License. mod context; +mod default_serializer; pub mod options; mod state; - use std::any::Any; use std::sync::Arc; @@ -29,6 +29,7 @@ use common_function::scalars::aggregate::AggregateFunctionMetaRef; use common_query::prelude::ScalarUdf; use common_query::Output; use datatypes::schema::Schema; +pub use default_serializer::{DefaultPlanDecoder, DefaultSerializer}; use session::context::QueryContextRef; use table::TableRef; diff --git a/src/query/src/query_engine/context.rs b/src/query/src/query_engine/context.rs index f76332cde2b4..c527e9d40557 100644 --- a/src/query/src/query_engine/context.rs +++ b/src/query/src/query_engine/context.rs @@ -14,10 +14,13 @@ use std::sync::Arc; +use common_query::logical_plan::SubstraitPlanDecoderRef; use common_telemetry::tracing_context::TracingContext; use datafusion::execution::context::{SessionState, TaskContext}; use session::context::QueryContextRef; +use crate::query_engine::default_serializer::DefaultPlanDecoder; + #[derive(Debug)] pub struct QueryEngineContext { state: SessionState, @@ -58,6 +61,14 @@ impl QueryEngineContext { )) } + /// Creates a [`LogicalPlan`] decoder + pub fn new_plan_decoder(&self) -> crate::error::Result { + Ok(Arc::new(DefaultPlanDecoder::new( + self.state.clone(), + &self.query_ctx, + )?)) + } + /// Mock an engine context for unit tests. #[cfg(any(test, feature = "test"))] pub fn mock() -> Self { diff --git a/src/query/src/query_engine/default_serializer.rs b/src/query/src/query_engine/default_serializer.rs new file mode 100644 index 000000000000..ff341a26ed82 --- /dev/null +++ b/src/query/src/query_engine/default_serializer.rs @@ -0,0 +1,171 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use common_error::ext::BoxedError; +use common_function::function_registry::FUNCTION_REGISTRY; +use common_function::scalars::udf::create_udf; +use common_query::logical_plan::SubstraitPlanDecoder; +use datafusion::catalog::CatalogProviderList; +use datafusion::common::DataFusionError; +use datafusion::error::Result; +use datafusion::execution::context::SessionState; +use datafusion::execution::registry::SerializerRegistry; +use datafusion::execution::FunctionRegistry; +use datafusion::logical_expr::LogicalPlan; +use datafusion_expr::UserDefinedLogicalNode; +use greptime_proto::substrait_extension::MergeScan as PbMergeScan; +use prost::Message; +use session::context::QueryContextRef; +use snafu::ResultExt; +use substrait::extension_serializer::ExtensionSerializer; +use substrait::{DFLogicalSubstraitConvertor, SubstraitPlan}; + +use crate::dist_plan::MergeScanLogicalPlan; +use crate::error::DataFusionSnafu; + +/// Extended [`substrait::extension_serializer::ExtensionSerializer`] but supports [`MergeScanLogicalPlan`] serialization. +pub struct DefaultSerializer; + +impl SerializerRegistry for DefaultSerializer { + fn serialize_logical_plan(&self, node: &dyn UserDefinedLogicalNode) -> Result> { + if node.name() == MergeScanLogicalPlan::name() { + let merge_scan = node + .as_any() + .downcast_ref::() + .expect("Failed to downcast to MergeScanLogicalPlan"); + + let input = merge_scan.input(); + let is_placeholder = merge_scan.is_placeholder(); + let input = DFLogicalSubstraitConvertor + .encode(input, DefaultSerializer) + .map_err(|e| DataFusionError::External(Box::new(e)))? + .to_vec(); + + Ok(PbMergeScan { + is_placeholder, + input, + } + .encode_to_vec()) + } else { + ExtensionSerializer.serialize_logical_plan(node) + } + } + + fn deserialize_logical_plan( + &self, + name: &str, + bytes: &[u8], + ) -> Result> { + if name == MergeScanLogicalPlan::name() { + // TODO(dennis): missing `session_state` to decode the logical plan in `MergeScanLogicalPlan`, + // so we only save the unoptimized logical plan for view currently. + Err(DataFusionError::Substrait(format!( + "Unsupported plan node: {name}" + ))) + } else { + ExtensionSerializer.deserialize_logical_plan(name, bytes) + } + } +} + +/// The datafusion `[LogicalPlan]` decoder. +pub struct DefaultPlanDecoder { + session_state: SessionState, +} + +impl DefaultPlanDecoder { + pub fn new( + mut session_state: SessionState, + query_ctx: &QueryContextRef, + ) -> crate::error::Result { + // Substrait decoder will look up the UDFs in SessionState, so we need to register them + // Note: the query context must be passed to set the timezone + for func in FUNCTION_REGISTRY.functions() { + let udf = Arc::new(create_udf(func, query_ctx.clone(), Default::default()).into()); + session_state.register_udf(udf).context(DataFusionSnafu)?; + } + + Ok(Self { session_state }) + } +} + +#[async_trait::async_trait] +impl SubstraitPlanDecoder for DefaultPlanDecoder { + async fn decode( + &self, + message: bytes::Bytes, + catalog_list: Arc, + optimize: bool, + ) -> common_query::error::Result { + // The session_state already has the `DefaultSerialzier` as `SerializerRegistry`. + let logical_plan = DFLogicalSubstraitConvertor + .decode(message, catalog_list.clone(), self.session_state.clone()) + .await + .map_err(BoxedError::new) + .context(common_query::error::DecodePlanSnafu)?; + + if optimize { + self.session_state + .optimize(&logical_plan) + .context(common_query::error::GeneralDataFusionSnafu) + } else { + Ok(logical_plan) + } + } +} + +#[cfg(test)] +mod tests { + use session::context::QueryContext; + + use super::*; + use crate::dummy_catalog::DummyCatalogList; + use crate::optimizer::test_util::mock_table_provider; + use crate::plan::tests::mock_plan; + use crate::QueryEngineFactory; + + #[tokio::test] + async fn test_serializer_decode_plan() { + let catalog_list = catalog::memory::new_memory_catalog_manager().unwrap(); + let factory = QueryEngineFactory::new(catalog_list, None, None, None, false); + + let engine = factory.query_engine(); + + let plan = mock_plan(); + + let bytes = DFLogicalSubstraitConvertor + .encode(&plan, DefaultSerializer) + .unwrap(); + + let plan_decoder = engine + .engine_context(QueryContext::arc()) + .new_plan_decoder() + .unwrap(); + let table_provider = Arc::new(mock_table_provider(1.into())); + let catalog_list = Arc::new(DummyCatalogList::with_table_provider(table_provider)); + + let decode_plan = plan_decoder + .decode(bytes, catalog_list, false) + .await + .unwrap(); + + assert_eq!( + "Filter: devices.k0 > Int32(500) + TableScan: devices projection=[k0, ts, v0]", + format!("{:?}", decode_plan), + ); + } +} diff --git a/src/query/src/query_engine/state.rs b/src/query/src/query_engine/state.rs index 9fdee8fc0e36..51b3f82ef228 100644 --- a/src/query/src/query_engine/state.rs +++ b/src/query/src/query_engine/state.rs @@ -37,7 +37,6 @@ use datafusion_optimizer::analyzer::count_wildcard_rule::CountWildcardRule; use datafusion_optimizer::analyzer::{Analyzer, AnalyzerRule}; use datafusion_optimizer::optimizer::Optimizer; use promql::extension_plan::PromExtensionPlanner; -use substrait::extension_serializer::ExtensionSerializer; use table::table::adapter::DfTableProviderAdapter; use table::TableRef; @@ -49,6 +48,7 @@ use crate::optimizer::string_normalization::StringNormalizationRule; use crate::optimizer::type_conversion::TypeConversionRule; use crate::optimizer::ExtensionAnalyzerRule; use crate::query_engine::options::QueryOptions; +use crate::query_engine::DefaultSerializer; use crate::range_select::planner::RangeSelectPlanner; use crate::region_query::RegionQueryHandlerRef; use crate::QueryEngineContext; @@ -115,8 +115,8 @@ impl QueryEngineState { physical_optimizer.rules.push(Arc::new(RemoveDuplicate)); let session_state = SessionState::new_with_config_rt(session_config, runtime_env) - .with_serializer_registry(Arc::new(ExtensionSerializer)) .with_analyzer_rules(analyzer.rules) + .with_serializer_registry(Arc::new(DefaultSerializer)) .with_query_planner(Arc::new(DfQueryPlanner::new( catalog_list.clone(), region_query_handler, diff --git a/src/table/Cargo.toml b/src/table/Cargo.toml index 9463b1809fe7..b33f4757c66e 100644 --- a/src/table/Cargo.toml +++ b/src/table/Cargo.toml @@ -11,6 +11,7 @@ testing = [] workspace = true [dependencies] +api.workspace = true async-trait = "0.1" chrono.workspace = true common-base.workspace = true diff --git a/src/table/src/lib.rs b/src/table/src/lib.rs index 857d529e8add..f4eb68cc85c7 100644 --- a/src/table/src/lib.rs +++ b/src/table/src/lib.rs @@ -21,6 +21,7 @@ pub mod predicate; pub mod requests; pub mod stats; pub mod table; +pub mod table_name; pub mod table_reference; pub mod test_util; diff --git a/src/common/meta/src/table_name.rs b/src/table/src/table_name.rs similarity index 98% rename from src/common/meta/src/table_name.rs rename to src/table/src/table_name.rs index 645e6386df02..f999e013f243 100644 --- a/src/common/meta/src/table_name.rs +++ b/src/table/src/table_name.rs @@ -16,7 +16,8 @@ use std::fmt::{Display, Formatter}; use api::v1::TableName as PbTableName; use serde::{Deserialize, Serialize}; -use table::table_reference::TableReference; + +use crate::table_reference::TableReference; #[derive(Debug, Clone, Hash, Eq, PartialEq, Deserialize, Serialize)] pub struct TableName { diff --git a/tests-integration/src/cluster.rs b/tests-integration/src/cluster.rs index bfa59966ae8e..7c0bb2f1d0ba 100644 --- a/tests-integration/src/cluster.rs +++ b/tests-integration/src/cluster.rs @@ -364,16 +364,12 @@ impl GreptimeDbClusterBuilder { .build(), ); - let table_cache = cache_registry.get().unwrap(); - let table_route_cache = cache_registry.get().unwrap(); let catalog_manager = KvBackendCatalogManager::new( Mode::Distributed, Some(meta_client.clone()), cached_meta_backend.clone(), - table_cache, - table_route_cache, - ) - .await; + cache_registry.clone(), + ); let handlers_executor = HandlerGroupExecutor::new(vec![ Arc::new(ParseMailboxMessageHandler), diff --git a/tests-integration/src/grpc.rs b/tests-integration/src/grpc.rs index 6d7179b18da1..ed2d6425a439 100644 --- a/tests-integration/src/grpc.rs +++ b/tests-integration/src/grpc.rs @@ -34,6 +34,7 @@ mod test { use frontend::instance::Instance; use query::parser::QueryLanguageParser; use query::plan::LogicalPlan; + use query::query_engine::DefaultSerializer; use servers::query_handler::grpc::GrpcQueryHandler; use session::context::QueryContext; use store_api::storage::RegionId; @@ -544,7 +545,9 @@ CREATE TABLE {table_name} ( .plan(stmt, QueryContext::arc()) .await .unwrap(); - let plan = DFLogicalSubstraitConvertor.encode(&plan).unwrap(); + let plan = DFLogicalSubstraitConvertor + .encode(&plan, DefaultSerializer) + .unwrap(); for (region, dn) in region_to_dn_map.iter() { let region_server = instance.datanodes().get(dn).unwrap().region_server(); diff --git a/tests-integration/src/instance.rs b/tests-integration/src/instance.rs index 1e52162ef3ca..feff39e136c0 100644 --- a/tests-integration/src/instance.rs +++ b/tests-integration/src/instance.rs @@ -32,6 +32,7 @@ mod tests { use frontend::instance::Instance; use query::parser::QueryLanguageParser; use query::plan::LogicalPlan; + use query::query_engine::DefaultSerializer; use servers::interceptor::{SqlQueryInterceptor, SqlQueryInterceptorRef}; use servers::query_handler::sql::SqlQueryHandler; use session::context::{QueryContext, QueryContextRef}; @@ -238,7 +239,9 @@ mod tests { .plan(stmt, QueryContext::arc()) .await .unwrap(); - let plan = DFLogicalSubstraitConvertor.encode(&plan).unwrap(); + let plan = DFLogicalSubstraitConvertor + .encode(&plan, DefaultSerializer) + .unwrap(); for (region, dn) in region_to_dn_map.iter() { let region_server = instance.datanodes().get(dn).unwrap().region_server(); diff --git a/tests-integration/src/standalone.rs b/tests-integration/src/standalone.rs index 5cbc46c69305..35a14e261260 100644 --- a/tests-integration/src/standalone.rs +++ b/tests-integration/src/standalone.rs @@ -146,10 +146,8 @@ impl GreptimeDbStandaloneBuilder { Mode::Standalone, None, kv_backend.clone(), - cache_registry.get().unwrap(), - cache_registry.get().unwrap(), - ) - .await; + cache_registry.clone(), + ); let flow_builder = FlownodeBuilder::new( 1, // for standalone mode this value is default to one diff --git a/tests/cases/standalone/common/view/create.result b/tests/cases/standalone/common/view/create.result index dbcd435a7424..855eb08bb7ed 100644 --- a/tests/cases/standalone/common/view/create.result +++ b/tests/cases/standalone/common/view/create.result @@ -1,9 +1,9 @@ --- test CREATE VIEW --- -CREATE DATABASE for_test_view; +CREATE DATABASE schema_for_view_test; Affected Rows: 1 -USE for_test_view; +USE schema_for_view_test; Affected Rows: 0 @@ -22,17 +22,17 @@ Error: 2000(InvalidSyntax), sql parser error: Expected SELECT, VALUES, or a subq --- Table already exists --- CREATE VIEW test_table as SELECT * FROM public.numbers; -Error: 4000(TableAlreadyExists), Table already exists: `greptime.for_test_view.test_table` +Error: 4000(TableAlreadyExists), Table already exists: `greptime.schema_for_view_test.test_table` --- Table already exists even when create_if_not_exists --- CREATE VIEW IF NOT EXISTS test_table as SELECT * FROM public.numbers; -Error: 4000(TableAlreadyExists), Table already exists: `greptime.for_test_view.test_table` +Error: 4000(TableAlreadyExists), Table already exists: `greptime.schema_for_view_test.test_table` --- Table already exists even when or_replace --- CREATE OR REPLACE VIEW test_table as SELECT * FROM public.numbers; -Error: 4000(TableAlreadyExists), Table already exists: `greptime.for_test_view.test_table` +Error: 4000(TableAlreadyExists), Table already exists: `greptime.schema_for_view_test.test_table` CREATE VIEW test_view as SELECT * FROM public.numbers; @@ -41,7 +41,7 @@ Affected Rows: 0 --- View already exists ---- CREATE VIEW test_view as SELECT * FROM public.numbers; -Error: 4000(TableAlreadyExists), View already exists: `greptime.for_test_view.test_view` +Error: 4000(TableAlreadyExists), View already exists: `greptime.schema_for_view_test.test_view` CREATE VIEW IF NOT EXISTS test_view as SELECT * FROM public.numbers; @@ -72,51 +72,51 @@ SHOW FULL TABLES; -- SQLNESS REPLACE (\s\d+\s) ID SELECT * FROM INFORMATION_SCHEMA.TABLES ORDER BY TABLE_NAME, TABLE_TYPE; -+---------------+--------------------+---------------------------------------+-----------------+----------+-------------+ -| table_catalog | table_schema | table_name | table_type | table_id | engine | -+---------------+--------------------+---------------------------------------+-----------------+----------+-------------+ -| greptime | information_schema | build_info | LOCAL TEMPORARY |ID | | -| greptime | information_schema | character_sets | LOCAL TEMPORARY |ID | | -| greptime | information_schema | check_constraints | LOCAL TEMPORARY |ID | | -| greptime | information_schema | cluster_info | LOCAL TEMPORARY |ID | | -| greptime | information_schema | collation_character_set_applicability | LOCAL TEMPORARY |ID | | -| greptime | information_schema | collations | LOCAL TEMPORARY |ID | | -| greptime | information_schema | column_privileges | LOCAL TEMPORARY |ID | | -| greptime | information_schema | column_statistics | LOCAL TEMPORARY |ID | | -| greptime | information_schema | columns | LOCAL TEMPORARY |ID | | -| greptime | information_schema | engines | LOCAL TEMPORARY |ID | | -| greptime | information_schema | events | LOCAL TEMPORARY |ID | | -| greptime | information_schema | files | LOCAL TEMPORARY |ID | | -| greptime | information_schema | global_status | LOCAL TEMPORARY |ID | | -| greptime | information_schema | key_column_usage | LOCAL TEMPORARY |ID | | -| greptime | public | numbers | LOCAL TEMPORARY |ID | test_engine | -| greptime | information_schema | optimizer_trace | LOCAL TEMPORARY |ID | | -| greptime | information_schema | parameters | LOCAL TEMPORARY |ID | | -| greptime | information_schema | partitions | LOCAL TEMPORARY |ID | | -| greptime | information_schema | profiling | LOCAL TEMPORARY |ID | | -| greptime | information_schema | referential_constraints | LOCAL TEMPORARY |ID | | -| greptime | information_schema | region_peers | LOCAL TEMPORARY |ID | | -| greptime | information_schema | routines | LOCAL TEMPORARY |ID | | -| greptime | information_schema | runtime_metrics | LOCAL TEMPORARY |ID | | -| greptime | information_schema | schema_privileges | LOCAL TEMPORARY |ID | | -| greptime | information_schema | schemata | LOCAL TEMPORARY |ID | | -| greptime | information_schema | session_status | LOCAL TEMPORARY |ID | | -| greptime | information_schema | table_constraints | LOCAL TEMPORARY |ID | | -| greptime | information_schema | table_privileges | LOCAL TEMPORARY |ID | | -| greptime | information_schema | tables | LOCAL TEMPORARY |ID | | -| greptime | for_test_view | test_table | BASE TABLE |ID | mito | -| greptime | for_test_view | test_view | VIEW |ID | | -| greptime | information_schema | triggers | LOCAL TEMPORARY |ID | | -+---------------+--------------------+---------------------------------------+-----------------+----------+-------------+ ++---------------+----------------------+---------------------------------------+-----------------+----------+-------------+ +| table_catalog | table_schema | table_name | table_type | table_id | engine | ++---------------+----------------------+---------------------------------------+-----------------+----------+-------------+ +| greptime | information_schema | build_info | LOCAL TEMPORARY |ID | | +| greptime | information_schema | character_sets | LOCAL TEMPORARY |ID | | +| greptime | information_schema | check_constraints | LOCAL TEMPORARY |ID | | +| greptime | information_schema | cluster_info | LOCAL TEMPORARY |ID | | +| greptime | information_schema | collation_character_set_applicability | LOCAL TEMPORARY |ID | | +| greptime | information_schema | collations | LOCAL TEMPORARY |ID | | +| greptime | information_schema | column_privileges | LOCAL TEMPORARY |ID | | +| greptime | information_schema | column_statistics | LOCAL TEMPORARY |ID | | +| greptime | information_schema | columns | LOCAL TEMPORARY |ID | | +| greptime | information_schema | engines | LOCAL TEMPORARY |ID | | +| greptime | information_schema | events | LOCAL TEMPORARY |ID | | +| greptime | information_schema | files | LOCAL TEMPORARY |ID | | +| greptime | information_schema | global_status | LOCAL TEMPORARY |ID | | +| greptime | information_schema | key_column_usage | LOCAL TEMPORARY |ID | | +| greptime | public | numbers | LOCAL TEMPORARY |ID | test_engine | +| greptime | information_schema | optimizer_trace | LOCAL TEMPORARY |ID | | +| greptime | information_schema | parameters | LOCAL TEMPORARY |ID | | +| greptime | information_schema | partitions | LOCAL TEMPORARY |ID | | +| greptime | information_schema | profiling | LOCAL TEMPORARY |ID | | +| greptime | information_schema | referential_constraints | LOCAL TEMPORARY |ID | | +| greptime | information_schema | region_peers | LOCAL TEMPORARY |ID | | +| greptime | information_schema | routines | LOCAL TEMPORARY |ID | | +| greptime | information_schema | runtime_metrics | LOCAL TEMPORARY |ID | | +| greptime | information_schema | schema_privileges | LOCAL TEMPORARY |ID | | +| greptime | information_schema | schemata | LOCAL TEMPORARY |ID | | +| greptime | information_schema | session_status | LOCAL TEMPORARY |ID | | +| greptime | information_schema | table_constraints | LOCAL TEMPORARY |ID | | +| greptime | information_schema | table_privileges | LOCAL TEMPORARY |ID | | +| greptime | information_schema | tables | LOCAL TEMPORARY |ID | | +| greptime | schema_for_view_test | test_table | BASE TABLE |ID | mito | +| greptime | schema_for_view_test | test_view | VIEW |ID | | +| greptime | information_schema | triggers | LOCAL TEMPORARY |ID | | ++---------------+----------------------+---------------------------------------+-----------------+----------+-------------+ -- SQLNESS REPLACE (\s\d+\s) ID SELECT * FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_TYPE = 'VIEW'; -+---------------+---------------+------------+------------+----------+--------+ -| table_catalog | table_schema | table_name | table_type | table_id | engine | -+---------------+---------------+------------+------------+----------+--------+ -| greptime | for_test_view | test_view | VIEW |ID | | -+---------------+---------------+------------+------------+----------+--------+ ++---------------+----------------------+------------+------------+----------+--------+ +| table_catalog | table_schema | table_name | table_type | table_id | engine | ++---------------+----------------------+------------+------------+----------+--------+ +| greptime | schema_for_view_test | test_view | VIEW |ID | | ++---------------+----------------------+------------+------------+----------+--------+ SHOW COLUMNS FROM test_view; @@ -133,16 +133,28 @@ SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 'test_view'; ++ ++ ---- FIXED in the following PR --- -SELECT * FROM test_view; - -Error: 3001(EngineExecuteQuery), DataFusion error: Unsupported operation: get stream from a distributed table +SELECT * FROM test_view LIMIT 10; + ++--------+ +| number | ++--------+ +| 0 | +| 1 | +| 2 | +| 3 | +| 4 | +| 5 | +| 6 | +| 7 | +| 8 | +| 9 | ++--------+ USE public; Affected Rows: 0 -DROP DATABASE for_test_view; +DROP DATABASE schema_for_view_test; Affected Rows: 0 diff --git a/tests/cases/standalone/common/view/create.sql b/tests/cases/standalone/common/view/create.sql index a01741f9166f..a778180939a8 100644 --- a/tests/cases/standalone/common/view/create.sql +++ b/tests/cases/standalone/common/view/create.sql @@ -1,8 +1,8 @@ --- test CREATE VIEW --- -CREATE DATABASE for_test_view; +CREATE DATABASE schema_for_view_test; -USE for_test_view; +USE schema_for_view_test; CREATE TABLE test_table(a STRING, ts TIMESTAMP TIME INDEX); @@ -44,9 +44,8 @@ SHOW FULL COLUMNS FROM test_view; SELECT * FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = 'test_view'; ---- FIXED in the following PR --- -SELECT * FROM test_view; +SELECT * FROM test_view LIMIT 10; USE public; -DROP DATABASE for_test_view; +DROP DATABASE schema_for_view_test; diff --git a/tests/cases/standalone/common/view/view.result b/tests/cases/standalone/common/view/view.result new file mode 100644 index 000000000000..132ec48033b1 --- /dev/null +++ b/tests/cases/standalone/common/view/view.result @@ -0,0 +1,62 @@ +-- From: https://github.com/duckdb/duckdb/blob/main/test/sql/catalog/view/test_view.test -- +CREATE DATABASE schema_for_view_test; + +Affected Rows: 1 + +USE schema_for_view_test; + +Affected Rows: 0 + +CREATE TABLE t1(i TIMESTAMP TIME INDEX); + +Affected Rows: 0 + +INSERT INTO t1 VALUES (41), (42), (43); + +Affected Rows: 3 + +CREATE VIEW v1 AS SELECT + i AS j +FROM t1 WHERE i < 43; + +Affected Rows: 0 + +SELECT * FROM v1; + ++-------------------------+ +| i | ++-------------------------+ +| 1970-01-01T00:00:00.041 | +| 1970-01-01T00:00:00.042 | ++-------------------------+ + +-- CREATE VIEW v1 AS SELECT 'whatever'; -- +SELECT j FROM v1 WHERE j > 41; + +Error: 3000(PlanQuery), Failed to plan SQL: No field named j. Valid fields are v1.i. + +-- FIXME(dennis):: name alias in view, not supported yet -- +--SELECT x FROM v1 t1(x) WHERE x > 41 -- +-- FIXME(dennis): DROP VIEW not supported yet-- +-- DROP VIEW v1 -- +-- SELECT j FROM v1 WHERE j > 41 -- +-- CREATE VIEW v1 AS SELECT 'whatever'; -- +-- SELECT * FROM v1; -- +-- CREATE OR REPLACE VIEW v1 AS SELECT 42; -- +-- SELECT * FROM v1; -- +INSERT INTO v1 VALUES (1); + +Error: 1004(InvalidArguments), Invalid SQL, error: column count mismatch, columns: 0, values: 1 + +CREATE VIEW v1 AS SELECT * FROM dontexist; + +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Table not found: greptime.schema_for_view_test.dontexist + +USE public; + +Affected Rows: 0 + +DROP DATABASE schema_for_view_test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/view/view.sql b/tests/cases/standalone/common/view/view.sql new file mode 100644 index 000000000000..3ca4cd7a7426 --- /dev/null +++ b/tests/cases/standalone/common/view/view.sql @@ -0,0 +1,45 @@ +-- From: https://github.com/duckdb/duckdb/blob/main/test/sql/catalog/view/test_view.test -- + +CREATE DATABASE schema_for_view_test; + +USE schema_for_view_test; + +CREATE TABLE t1(i TIMESTAMP TIME INDEX); + +INSERT INTO t1 VALUES (41), (42), (43); + +CREATE VIEW v1 AS SELECT + i AS j +FROM t1 WHERE i < 43; + +SELECT * FROM v1; + +-- CREATE VIEW v1 AS SELECT 'whatever'; -- + +SELECT j FROM v1 WHERE j > 41; + + +-- FIXME(dennis):: name alias in view, not supported yet -- +--SELECT x FROM v1 t1(x) WHERE x > 41 -- + +-- FIXME(dennis): DROP VIEW not supported yet-- +-- DROP VIEW v1 -- + +-- SELECT j FROM v1 WHERE j > 41 -- + +-- CREATE VIEW v1 AS SELECT 'whatever'; -- + +-- SELECT * FROM v1; -- + + +-- CREATE OR REPLACE VIEW v1 AS SELECT 42; -- + +-- SELECT * FROM v1; -- + +INSERT INTO v1 VALUES (1); + +CREATE VIEW v1 AS SELECT * FROM dontexist; + +USE public; + +DROP DATABASE schema_for_view_test;