From 4f54ca926ff9b28cb6532be2eabdd6fe4411683c Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Sat, 23 Mar 2024 04:28:46 -0400 Subject: [PATCH 01/28] Error when empty list of query variables passed --- CHANGELOG.md | 2 ++ crates/ndc-clickhouse/src/sql/query_builder.rs | 3 +++ crates/ndc-clickhouse/src/sql/query_builder/error.rs | 6 ++++++ 3 files changed, 11 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b70f65..65b7a9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +- Return error if empty list of query variables passed. Variables should be ommited or be a list with at least one member + ## [0.2.1] ### CLI diff --git a/crates/ndc-clickhouse/src/sql/query_builder.rs b/crates/ndc-clickhouse/src/sql/query_builder.rs index 0d1e625..c0d8466 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder.rs @@ -102,6 +102,9 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { }; let with = if let Some(variables) = &self.request.variables { + if variables.is_empty() { + return Err(QueryBuilderError::EmptyQueryVariablesList); + } let mut variable_values: IndexMap> = IndexMap::new(); variable_values.insert( diff --git a/crates/ndc-clickhouse/src/sql/query_builder/error.rs b/crates/ndc-clickhouse/src/sql/query_builder/error.rs index b0d5cfb..5aedaef 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder/error.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder/error.rs @@ -20,6 +20,8 @@ pub enum QueryBuilderError { Unexpected(String), /// There was an issue creating typecasting strings Typecasting(String), + /// An empty list of variables was passed. If variables are passed, we expect at least one set. + EmptyQueryVariablesList, } impl fmt::Display for QueryBuilderError { @@ -42,6 +44,10 @@ impl fmt::Display for QueryBuilderError { QueryBuilderError::NotSupported(e) => write!(f, "Not supported: {e}"), QueryBuilderError::Unexpected(e) => write!(f, "Unexpected: {e}"), QueryBuilderError::Typecasting(e) => write!(f, "Typecasting: {e}"), + QueryBuilderError::EmptyQueryVariablesList => write!( + f, + "Empty query variables list: we expect at least one set, or no list." + ), } } } From 2ff9d076819a2c8d4e5ad503ff93c823d9a059af Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Sat, 30 Mar 2024 13:54:31 -0400 Subject: [PATCH 02/28] add db table comment as type and collection description in schema --- crates/config/src/lib.rs | 1 + .../src/database_introspection.rs | 1 + .../src/database_introspection.sql | 30 ++++++++++++------- .../src/connector/handler/schema.rs | 4 +-- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index cd5be14..a21c77f 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -25,6 +25,7 @@ pub struct TableConfig { pub name: String, pub schema: String, pub alias: String, + pub comment: Option, pub primary_key: Option, pub columns: Vec, } diff --git a/crates/ndc-clickhouse-cli/src/database_introspection.rs b/crates/ndc-clickhouse-cli/src/database_introspection.rs index 9e2ba28..2c518b3 100644 --- a/crates/ndc-clickhouse-cli/src/database_introspection.rs +++ b/crates/ndc-clickhouse-cli/src/database_introspection.rs @@ -11,6 +11,7 @@ pub struct TableInfo { pub table_name: String, pub table_schema: String, pub table_catalog: String, + pub table_comment: Option, pub table_type: TableType, pub primary_key: Option, pub columns: Vec, diff --git a/crates/ndc-clickhouse-cli/src/database_introspection.sql b/crates/ndc-clickhouse-cli/src/database_introspection.sql index 0e5ed81..68799d9 100644 --- a/crates/ndc-clickhouse-cli/src/database_introspection.sql +++ b/crates/ndc-clickhouse-cli/src/database_introspection.sql @@ -1,15 +1,16 @@ -SELECT - t.table_name AS "table_name", +SELECT t.table_name AS "table_name", t.table_schema AS "table_schema", t.table_catalog AS "table_catalog", - if(empty(st.primary_key), null, st.primary_key) AS "primary_key", + t.table_comment AS "table_comment", + if(empty(st.primary_key), null, st.primary_key) AS "primary_key", toString(t.table_type) as "table_type", cast( - c.columns, - 'Array(Tuple(column_name String, data_type String, is_nullable Bool, is_in_primary_key Bool))' + c.columns, + 'Array(Tuple(column_name String, data_type String, is_nullable Bool, is_in_primary_key Bool))' ) AS "columns" FROM INFORMATION_SCHEMA.TABLES AS t - LEFT JOIN system.tables AS st ON st.database = t.table_schema AND st.name = t.table_name + LEFT JOIN system.tables AS st ON st.database = t.table_schema + AND st.name = t.table_name LEFT JOIN ( SELECT c.table_catalog, c.table_schema, @@ -23,10 +24,17 @@ FROM INFORMATION_SCHEMA.TABLES AS t ) ) AS "columns" FROM INFORMATION_SCHEMA.COLUMNS AS c - LEFT JOIN system.columns AS sc ON sc.database = c.table_schema AND sc.table = c.table_name AND sc.name = c.column_name - GROUP BY c .table_catalog, + LEFT JOIN system.columns AS sc ON sc.database = c.table_schema + AND sc.table = c.table_name + AND sc.name = c.column_name + GROUP BY c.table_catalog, c.table_schema, c.table_name - ) AS c ON t.table_catalog = c.table_catalog AND t.table_schema = c.table_schema AND t.table_name = c.table_name -WHERE t.table_catalog NOT IN ('system', 'INFORMATION_SCHEMA', 'information_schema') -FORMAT JSON; \ No newline at end of file + ) AS c ON t.table_catalog = c.table_catalog + AND t.table_schema = c.table_schema + AND t.table_name = c.table_name +WHERE t.table_catalog NOT IN ( + 'system', + 'INFORMATION_SCHEMA', + 'information_schema' + ) FORMAT JSON; \ No newline at end of file diff --git a/crates/ndc-clickhouse/src/connector/handler/schema.rs b/crates/ndc-clickhouse/src/connector/handler/schema.rs index 4d88085..38d0989 100644 --- a/crates/ndc-clickhouse/src/connector/handler/schema.rs +++ b/crates/ndc-clickhouse/src/connector/handler/schema.rs @@ -57,7 +57,7 @@ pub async fn schema(configuration: &ServerConfig) -> Result Result Date: Sat, 30 Mar 2024 16:37:26 -0400 Subject: [PATCH 03/28] cli: add table comment to config --- crates/ndc-clickhouse-cli/src/main.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/crates/ndc-clickhouse-cli/src/main.rs b/crates/ndc-clickhouse-cli/src/main.rs index ff7defc..9b1801c 100644 --- a/crates/ndc-clickhouse-cli/src/main.rs +++ b/crates/ndc-clickhouse-cli/src/main.rs @@ -149,6 +149,7 @@ pub async fn update_tables_config( name: table.table_name.to_owned(), schema: table.table_schema.to_owned(), alias: get_table_alias(table, &old_table_config), + comment: table.table_comment.to_owned(), primary_key: table.primary_key.as_ref().map(|primary_key| PrimaryKey { name: primary_key.to_owned(), columns: table From 9747adf8c06ba49cac3c15dc80532a0efac46746 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Sat, 30 Mar 2024 16:50:21 -0400 Subject: [PATCH 04/28] remove duplicate logic for converting a typestring to a typecasting string remove a bug where a scalar type string representation was assumed to be a valid typecast string --- .../src/connector/handler/schema.rs | 253 +----- crates/ndc-clickhouse/src/schema.rs | 4 +- .../src/schema/clickhouse_data_type.rs | 116 +-- .../ndc-clickhouse/src/schema/scalar_type.rs | 305 -------- .../single_column_aggregate_function.rs | 2 +- .../src/schema/type_definition.rs | 729 ++++++++++++++++++ .../ndc-clickhouse/src/sql/query_builder.rs | 3 +- .../src/sql/query_builder/typecasting.rs | 124 +-- 8 files changed, 852 insertions(+), 684 deletions(-) delete mode 100644 crates/ndc-clickhouse/src/schema/scalar_type.rs create mode 100644 crates/ndc-clickhouse/src/schema/type_definition.rs diff --git a/crates/ndc-clickhouse/src/connector/handler/schema.rs b/crates/ndc-clickhouse/src/connector/handler/schema.rs index 38d0989..fae1088 100644 --- a/crates/ndc-clickhouse/src/connector/handler/schema.rs +++ b/crates/ndc-clickhouse/src/connector/handler/schema.rs @@ -1,70 +1,54 @@ +use crate::schema::{ClickHouseDataType, ClickHouseTypeDefinition, Identifier, SingleQuotedString}; use config::{PrimaryKey, ServerConfig}; use ndc_sdk::{connector::SchemaError, models}; use std::{collections::BTreeMap, str::FromStr}; -use strum::IntoEnumIterator; - -use crate::schema::{ClickHouseScalarType, ClickhouseDataType, Identifier}; pub async fn schema(configuration: &ServerConfig) -> Result { - let scalar_types = BTreeMap::from_iter(ClickHouseScalarType::iter().map(|scalar_type| { - let aggregate_functions = - BTreeMap::from_iter(scalar_type.aggregate_functions().into_iter().map(|(f, r)| { - ( - f.to_string(), - models::AggregateFunctionDefinition { - result_type: models::Type::Named { - name: r.to_string(), - }, - }, - ) - })); - ( - scalar_type.to_string(), - models::ScalarType { - aggregate_functions, - comparison_operators: scalar_type.comparison_operators(), - }, - ) - })); - - let mut object_types = vec![]; + let mut scalar_type_definitions = BTreeMap::new(); + let mut object_type_definitions = vec![]; for table in &configuration.tables { let mut fields = vec![]; for column in &table.columns { - let column_type = ClickhouseDataType::from_str(column.data_type.as_str()) + let data_type = ClickHouseDataType::from_str(column.data_type.as_str()) .map_err(|err| SchemaError::Other(Box::new(err)))?; + let type_definition = ClickHouseTypeDefinition::from_table_column( + &data_type, + &column.alias, + &table.alias, + ); - let object_type_name = format!("{}_{}", table.alias, column.alias); - - let (column_type, additional_object_types) = - get_field_type(&object_type_name, &column_type); + let (scalars, objects) = type_definition.type_definitions(); - for additional_object_type in additional_object_types { - object_types.push(additional_object_type); + for (name, definition) in objects { + object_type_definitions.push((name, definition)); + } + for (name, definition) in scalars { + // silently dropping duplicate scalar definitions + // this could be an issue if somehow an enum has the same name as a primitive scalar + // there is the potential for name collisions resulting in dropped enum defintions + scalar_type_definitions.insert(name, definition); } fields.push(( column.alias.to_owned(), models::ObjectField { description: None, - r#type: column_type, + r#type: type_definition.type_identifier(), }, )); } - object_types.push(( + object_type_definitions.push(( table.alias.to_owned(), models::ObjectType { description: table.comment.to_owned(), - fields: BTreeMap::from_iter(fields), + fields: fields.into_iter().collect(), }, )); } - let object_types = BTreeMap::from_iter(object_types); - let collections = configuration .tables .iter() @@ -89,199 +73,12 @@ pub async fn schema(configuration: &ServerConfig) -> Result (models::Type, Vec<(String, models::ObjectType)>) { - use ClickHouseScalarType as SC; - use ClickhouseDataType as DT; - let scalar = |t: ClickHouseScalarType| { - ( - models::Type::Named { - name: t.to_string(), - }, - vec![], - ) - }; - match data_type { - DT::Nullable(inner) => { - let (underlying_type, additional_types) = get_field_type(type_name, inner); - ( - models::Type::Nullable { - underlying_type: Box::new(underlying_type), - }, - additional_types, - ) - } - DT::Bool => scalar(SC::Bool), - DT::String | DT::FixedString(_) => scalar(SC::String), - DT::UInt8 => scalar(SC::UInt8), - DT::UInt16 => scalar(SC::UInt16), - DT::UInt32 => scalar(SC::UInt32), - DT::UInt64 => scalar(SC::UInt64), - DT::UInt128 => scalar(SC::UInt128), - DT::UInt256 => scalar(SC::UInt256), - DT::Int8 => scalar(SC::Int8), - DT::Int16 => scalar(SC::Int16), - DT::Int32 => scalar(SC::Int32), - DT::Int64 => scalar(SC::Int64), - DT::Int128 => scalar(SC::Int128), - DT::Int256 => scalar(SC::Int256), - DT::Float32 => scalar(SC::Float32), - DT::Float64 => scalar(SC::Float64), - DT::Decimal { .. } => scalar(SC::Decimal), - DT::Decimal32 { .. } => scalar(SC::Decimal32), - DT::Decimal64 { .. } => scalar(SC::Decimal64), - DT::Decimal128 { .. } => scalar(SC::Decimal128), - DT::Decimal256 { .. } => scalar(SC::Decimal256), - DT::Date => scalar(SC::Date), - DT::Date32 => scalar(SC::Date32), - DT::DateTime { .. } => scalar(SC::DateTime), - DT::DateTime64 { .. } => scalar(SC::DateTime64), - DT::Json => scalar(SC::Json), - DT::Uuid => scalar(SC::Uuid), - DT::IPv4 => scalar(SC::IPv4), - DT::IPv6 => scalar(SC::IPv6), - DT::LowCardinality(inner) => get_field_type(type_name, inner), - DT::Nested(entries) => { - let object_type_name = type_name; - - let mut object_type_fields = vec![]; - let mut additional_object_types = vec![]; - - for (name, data_type) in entries { - let field_name = match name { - Identifier::DoubleQuoted(n) => n, - Identifier::BacktickQuoted(n) => n, - Identifier::Unquoted(n) => n, - }; - - let type_name = format!("{}_{}", object_type_name, field_name); - - let (field_type, mut additional_types) = get_field_type(&type_name, data_type); - - additional_object_types.append(&mut additional_types); - - object_type_fields.push(( - field_name.to_owned(), - models::ObjectField { - description: None, - r#type: field_type, - }, - )); - } - - additional_object_types.push(( - object_type_name.to_string(), - models::ObjectType { - description: None, - fields: BTreeMap::from_iter(object_type_fields), - }, - )); - - ( - models::Type::Named { - name: object_type_name.to_owned(), - }, - additional_object_types, - ) - } - DT::Array(inner) => { - let (inner, object_types) = get_field_type(type_name, inner); - ( - models::Type::Array { - element_type: Box::new(inner), - }, - object_types, - ) - } - DT::Map { key: _, value: _ } => scalar(SC::Unknown), - DT::Tuple(entries) => { - let object_type_name = type_name; - - let mut object_type_fields = vec![]; - let mut additional_object_types = vec![]; - - for (name, data_type) in entries { - let field_name = if let Some(name) = name { - match name { - Identifier::DoubleQuoted(n) => n, - Identifier::BacktickQuoted(n) => n, - Identifier::Unquoted(n) => n, - } - } else { - return scalar(SC::Unknown); - }; - - let type_name = format!("{}_{}", object_type_name, field_name); - - let (field_type, mut additional_types) = get_field_type(&type_name, data_type); - - additional_object_types.append(&mut additional_types); - - object_type_fields.push(( - field_name.to_owned(), - models::ObjectField { - description: None, - r#type: field_type, - }, - )); - } - - additional_object_types.push(( - object_type_name.to_string(), - models::ObjectType { - description: None, - fields: BTreeMap::from_iter(object_type_fields), - }, - )); - - ( - models::Type::Named { - name: object_type_name.to_owned(), - }, - additional_object_types, - ) - } - DT::Enum(_) => scalar(SC::String), - DT::AggregateFunction { - function, - arguments, - } => { - let arg_len = arguments.len(); - let first = arguments.first(); - let agg_fn_name = match &function.name { - Identifier::DoubleQuoted(n) => n, - Identifier::BacktickQuoted(n) => n, - Identifier::Unquoted(n) => n, - }; - - if let (Some(data_type), 1) = (first, arg_len) { - get_field_type(type_name, data_type) - } else if let (Some(data_type), 2, "anyIf") = (first, arg_len, agg_fn_name.as_str()) { - get_field_type(type_name, data_type) - } else { - scalar(SC::Unknown) - } - } - DT::SimpleAggregateFunction { - function: _, - arguments, - } => { - if let (Some(data_type), 1) = (arguments.first(), arguments.len()) { - get_field_type(type_name, data_type) - } else { - scalar(SC::Unknown) - } - } - DT::Nothing => scalar(SC::Unknown), - } -} diff --git a/crates/ndc-clickhouse/src/schema.rs b/crates/ndc-clickhouse/src/schema.rs index 8387756..58949e1 100644 --- a/crates/ndc-clickhouse/src/schema.rs +++ b/crates/ndc-clickhouse/src/schema.rs @@ -1,9 +1,9 @@ mod binary_comparison_operator; mod clickhouse_data_type; -mod scalar_type; mod single_column_aggregate_function; +mod type_definition; pub use binary_comparison_operator::*; pub use clickhouse_data_type::*; -pub use scalar_type::*; pub use single_column_aggregate_function::*; +pub use type_definition::*; diff --git a/crates/ndc-clickhouse/src/schema/clickhouse_data_type.rs b/crates/ndc-clickhouse/src/schema/clickhouse_data_type.rs index 8b1b9ce..1a50d9c 100644 --- a/crates/ndc-clickhouse/src/schema/clickhouse_data_type.rs +++ b/crates/ndc-clickhouse/src/schema/clickhouse_data_type.rs @@ -3,7 +3,7 @@ use std::{fmt::Display, str::FromStr}; use serde::{Deserialize, Serialize}; #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct SingleQuotedString(String); +pub struct SingleQuotedString(pub String); impl Display for SingleQuotedString { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { @@ -78,8 +78,8 @@ impl Display for AggregateFunctionParameter { /// To create one from a string slice, use try_from() #[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] #[serde(rename_all = "snake_case", tag = "type")] -pub enum ClickhouseDataType { - Nullable(Box), +pub enum ClickHouseDataType { + Nullable(Box), Bool, String, FixedString(u32), @@ -126,29 +126,29 @@ pub enum ClickhouseDataType { Uuid, IPv4, IPv6, - LowCardinality(Box), - Nested(Vec<(Identifier, ClickhouseDataType)>), - Array(Box), + LowCardinality(Box), + Nested(Vec<(Identifier, ClickHouseDataType)>), + Array(Box), Map { - key: Box, - value: Box, + key: Box, + value: Box, }, - Tuple(Vec<(Option, ClickhouseDataType)>), + Tuple(Vec<(Option, ClickHouseDataType)>), Enum(Vec<(SingleQuotedString, Option)>), SimpleAggregateFunction { function: AggregateFunctionDefinition, - arguments: Vec, + arguments: Vec, }, AggregateFunction { function: AggregateFunctionDefinition, - arguments: Vec, + arguments: Vec, }, Nothing, } -impl Display for ClickhouseDataType { +impl Display for ClickHouseDataType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use ClickhouseDataType as DT; + use ClickHouseDataType as DT; match self { DT::Nullable(inner) => write!(f, "Nullable({inner})"), DT::Bool => write!(f, "Bool"), @@ -271,10 +271,10 @@ impl Display for ClickhouseDataType { } } -impl FromStr for ClickhouseDataType { +impl FromStr for ClickHouseDataType { type Err = peg::error::ParseError; - /// Attempt to create a ClickhouseDataType from a string representation of the type. + /// Attempt to create a ClickHouseDataType from a string representation of the type. /// May return a parse error if the type string is malformed, or if our implementation is out of date or incorrect fn from_str(s: &str) -> Result { clickhouse_parser::data_type(s) @@ -283,8 +283,8 @@ impl FromStr for ClickhouseDataType { peg::parser! { grammar clickhouse_parser() for str { - use ClickhouseDataType as CDT; - pub rule data_type() -> ClickhouseDataType = nullable() + use ClickHouseDataType as CDT; + pub rule data_type() -> ClickHouseDataType = nullable() / uint256() / uint128() / uint64() @@ -322,46 +322,46 @@ peg::parser! { / tuple() / r#enum() / nothing() - rule nullable() -> ClickhouseDataType = "Nullable(" t:data_type() ")" { CDT::Nullable(Box::new(t)) } - rule uint8() -> ClickhouseDataType = "UInt8" { CDT::UInt8 } - rule uint16() -> ClickhouseDataType = "UInt16" { CDT::UInt16 } - rule uint32() -> ClickhouseDataType = "UInt32" { CDT::UInt32 } - rule uint64() -> ClickhouseDataType = "UInt64" { CDT::UInt64 } - rule uint128() -> ClickhouseDataType = "UInt128" { CDT::UInt128 } - rule uint256() -> ClickhouseDataType = "UInt256" { CDT::UInt256 } - rule int8() -> ClickhouseDataType = "Int8" { CDT::Int8 } - rule int16() -> ClickhouseDataType = "Int16" { CDT::Int16 } - rule int32() -> ClickhouseDataType = "Int32" { CDT::Int32 } - rule int64() -> ClickhouseDataType = "Int64" { CDT::Int64 } - rule int128() -> ClickhouseDataType = "Int128" { CDT::Int128 } - rule int256() -> ClickhouseDataType = "Int256" { CDT::Int256 } - rule float32() -> ClickhouseDataType = "Float32" { CDT::Float32 } - rule float64() -> ClickhouseDataType = "Float64" { CDT::Float64 } - rule decimal() -> ClickhouseDataType = "Decimal(" precision:integer_value() ", " scale:integer_value() ")" { CDT::Decimal { precision, scale } } - rule decimal32() -> ClickhouseDataType = "Decimal32(" scale:integer_value() ")" { CDT::Decimal32 { scale } } - rule decimal64() -> ClickhouseDataType = "Decimal64(" scale:integer_value() ")" { CDT::Decimal64 { scale } } - rule decimal128() -> ClickhouseDataType = "Decimal128(" scale:integer_value() ")" { CDT::Decimal128 { scale } } - rule decimal256() -> ClickhouseDataType = "Decimal256(" scale:integer_value() ")" { CDT::Decimal256 { scale } } - rule bool() -> ClickhouseDataType = "Bool" { CDT::Bool } - rule string() -> ClickhouseDataType = "String" { CDT::String } - rule fixed_string() -> ClickhouseDataType = "FixedString(" n:integer_value() ")" { CDT::FixedString(n) } - rule date() -> ClickhouseDataType = "Date" { CDT::Date } - rule date32() -> ClickhouseDataType = "Date32" { CDT::Date32 } - rule date_time() -> ClickhouseDataType = "DateTime" tz:("(" tz:single_quoted_string_value()? ")" { tz })? { CDT::DateTime { timezone: tz.flatten().map(|s| s.to_owned()) } } - rule date_time64() -> ClickhouseDataType = "DateTime64(" precision:integer_value() tz:(", " tz:single_quoted_string_value()? { tz })? ")" { CDT::DateTime64{ precision, timezone: tz.flatten().map(|s| s.to_owned())} } - rule json() -> ClickhouseDataType = "JSON" { CDT::Json } - rule uuid() -> ClickhouseDataType = "UUID" { CDT::Uuid } - rule ipv4() -> ClickhouseDataType = "IPv4" { CDT::IPv4 } - rule ipv6() -> ClickhouseDataType = "IPv6" { CDT::IPv6 } - rule low_cardinality() -> ClickhouseDataType = "LowCardinality(" t:data_type() ")" { CDT::LowCardinality(Box::new(t)) } - rule nested() -> ClickhouseDataType = "Nested(" e:(("\""? n:identifier() "\""? " " t:data_type() { (n, t)}) ** ", ") ")" { CDT::Nested(e) } - rule array() -> ClickhouseDataType = "Array(" t:data_type() ")" { CDT::Array(Box::new(t)) } - rule map() -> ClickhouseDataType = "Map(" k:data_type() ", " v:data_type() ")" { CDT::Map { key: Box::new(k), value: Box::new(v) } } - rule tuple() -> ClickhouseDataType = "Tuple(" e:((n:(n:identifier() " " { n })? t:data_type() { (n, t) }) ** ", ") ")" { CDT::Tuple(e) } - rule r#enum() -> ClickhouseDataType = "Enum" ("8" / "16")? "(" e:((n:single_quoted_string_value() i:(" = " i:integer_value() { i })? { (n, i) }) ** ", ") ")" { CDT::Enum(e)} - rule aggregate_function() -> ClickhouseDataType = "AggregateFunction(" f:aggregate_function_definition() ", " a:(data_type() ** ", ") ")" { CDT::AggregateFunction { function: f, arguments: a }} - rule simple_aggregate_function() -> ClickhouseDataType = "SimpleAggregateFunction(" f:aggregate_function_definition() ", " a:(data_type() ** ", ") ")" { CDT::SimpleAggregateFunction { function: f, arguments: a }} - rule nothing() -> ClickhouseDataType = "Nothing" { CDT::Nothing } + rule nullable() -> ClickHouseDataType = "Nullable(" t:data_type() ")" { CDT::Nullable(Box::new(t)) } + rule uint8() -> ClickHouseDataType = "UInt8" { CDT::UInt8 } + rule uint16() -> ClickHouseDataType = "UInt16" { CDT::UInt16 } + rule uint32() -> ClickHouseDataType = "UInt32" { CDT::UInt32 } + rule uint64() -> ClickHouseDataType = "UInt64" { CDT::UInt64 } + rule uint128() -> ClickHouseDataType = "UInt128" { CDT::UInt128 } + rule uint256() -> ClickHouseDataType = "UInt256" { CDT::UInt256 } + rule int8() -> ClickHouseDataType = "Int8" { CDT::Int8 } + rule int16() -> ClickHouseDataType = "Int16" { CDT::Int16 } + rule int32() -> ClickHouseDataType = "Int32" { CDT::Int32 } + rule int64() -> ClickHouseDataType = "Int64" { CDT::Int64 } + rule int128() -> ClickHouseDataType = "Int128" { CDT::Int128 } + rule int256() -> ClickHouseDataType = "Int256" { CDT::Int256 } + rule float32() -> ClickHouseDataType = "Float32" { CDT::Float32 } + rule float64() -> ClickHouseDataType = "Float64" { CDT::Float64 } + rule decimal() -> ClickHouseDataType = "Decimal(" precision:integer_value() ", " scale:integer_value() ")" { CDT::Decimal { precision, scale } } + rule decimal32() -> ClickHouseDataType = "Decimal32(" scale:integer_value() ")" { CDT::Decimal32 { scale } } + rule decimal64() -> ClickHouseDataType = "Decimal64(" scale:integer_value() ")" { CDT::Decimal64 { scale } } + rule decimal128() -> ClickHouseDataType = "Decimal128(" scale:integer_value() ")" { CDT::Decimal128 { scale } } + rule decimal256() -> ClickHouseDataType = "Decimal256(" scale:integer_value() ")" { CDT::Decimal256 { scale } } + rule bool() -> ClickHouseDataType = "Bool" { CDT::Bool } + rule string() -> ClickHouseDataType = "String" { CDT::String } + rule fixed_string() -> ClickHouseDataType = "FixedString(" n:integer_value() ")" { CDT::FixedString(n) } + rule date() -> ClickHouseDataType = "Date" { CDT::Date } + rule date32() -> ClickHouseDataType = "Date32" { CDT::Date32 } + rule date_time() -> ClickHouseDataType = "DateTime" tz:("(" tz:single_quoted_string_value()? ")" { tz })? { CDT::DateTime { timezone: tz.flatten().map(|s| s.to_owned()) } } + rule date_time64() -> ClickHouseDataType = "DateTime64(" precision:integer_value() tz:(", " tz:single_quoted_string_value()? { tz })? ")" { CDT::DateTime64{ precision, timezone: tz.flatten().map(|s| s.to_owned())} } + rule json() -> ClickHouseDataType = "JSON" { CDT::Json } + rule uuid() -> ClickHouseDataType = "UUID" { CDT::Uuid } + rule ipv4() -> ClickHouseDataType = "IPv4" { CDT::IPv4 } + rule ipv6() -> ClickHouseDataType = "IPv6" { CDT::IPv6 } + rule low_cardinality() -> ClickHouseDataType = "LowCardinality(" t:data_type() ")" { CDT::LowCardinality(Box::new(t)) } + rule nested() -> ClickHouseDataType = "Nested(" e:(("\""? n:identifier() "\""? " " t:data_type() { (n, t)}) ** ", ") ")" { CDT::Nested(e) } + rule array() -> ClickHouseDataType = "Array(" t:data_type() ")" { CDT::Array(Box::new(t)) } + rule map() -> ClickHouseDataType = "Map(" k:data_type() ", " v:data_type() ")" { CDT::Map { key: Box::new(k), value: Box::new(v) } } + rule tuple() -> ClickHouseDataType = "Tuple(" e:((n:(n:identifier() " " { n })? t:data_type() { (n, t) }) ** ", ") ")" { CDT::Tuple(e) } + rule r#enum() -> ClickHouseDataType = "Enum" ("8" / "16")? "(" e:((n:single_quoted_string_value() i:(" = " i:integer_value() { i })? { (n, i) }) ** ", ") ")" { CDT::Enum(e)} + rule aggregate_function() -> ClickHouseDataType = "AggregateFunction(" f:aggregate_function_definition() ", " a:(data_type() ** ", ") ")" { CDT::AggregateFunction { function: f, arguments: a }} + rule simple_aggregate_function() -> ClickHouseDataType = "SimpleAggregateFunction(" f:aggregate_function_definition() ", " a:(data_type() ** ", ") ")" { CDT::SimpleAggregateFunction { function: f, arguments: a }} + rule nothing() -> ClickHouseDataType = "Nothing" { CDT::Nothing } @@ -385,7 +385,7 @@ peg::parser! { #[test] fn can_parse_clickhouse_data_type() { - use ClickhouseDataType as CDT; + use ClickHouseDataType as CDT; let data_types = vec![ ("Int32", CDT::Int32), ("Nullable(Int32)", CDT::Nullable(Box::new(CDT::Int32))), diff --git a/crates/ndc-clickhouse/src/schema/scalar_type.rs b/crates/ndc-clickhouse/src/schema/scalar_type.rs deleted file mode 100644 index 729fd11..0000000 --- a/crates/ndc-clickhouse/src/schema/scalar_type.rs +++ /dev/null @@ -1,305 +0,0 @@ -use std::collections::BTreeMap; - -use crate::schema::{ - binary_comparison_operator::ClickHouseBinaryComparisonOperator, - single_column_aggregate_function::ClickHouseSingleColumnAggregateFunction, -}; -use ndc_sdk::models; -use strum::{Display, EnumIter, EnumString}; - -#[derive(Debug, Clone, EnumString, Display, EnumIter)] -pub enum ClickHouseScalarType { - Bool, - String, - UInt8, - UInt16, - UInt32, - UInt64, - UInt128, - UInt256, - Int8, - Int16, - Int32, - Int64, - Int128, - Int256, - Float32, - Float64, - Decimal, - Decimal32, - Decimal64, - Decimal128, - Decimal256, - Date, - Date32, - DateTime, - DateTime64, - Json, - Uuid, - IPv4, - IPv6, - /// Stand-in for data types that either cannot be represented in graphql, - /// (such as maps or tuples with anonymous memebers) - /// or cannot be known ahead of time (such as the return type of aggregate function columns) - Unknown, -} - -impl ClickHouseScalarType { - pub fn aggregate_functions( - &self, - ) -> Vec<( - ClickHouseSingleColumnAggregateFunction, - ClickHouseScalarType, - )> { - use ClickHouseScalarType as ST; - use ClickHouseSingleColumnAggregateFunction as AF; - - match self { - ST::Bool => vec![], - ST::String => vec![], - ST::UInt8 => vec![ - (AF::Max, ST::UInt8), - (AF::Min, ST::UInt8), - (AF::Sum, ST::UInt64), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::UInt16 => vec![ - (AF::Max, ST::UInt16), - (AF::Min, ST::UInt16), - (AF::Sum, ST::UInt64), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::UInt32 => vec![ - (AF::Max, ST::UInt32), - (AF::Min, ST::UInt32), - (AF::Sum, ST::UInt64), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::UInt64 => vec![ - (AF::Max, ST::UInt64), - (AF::Min, ST::UInt64), - (AF::Sum, ST::UInt64), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::UInt128 => vec![ - (AF::Max, ST::UInt128), - (AF::Min, ST::UInt128), - (AF::Sum, ST::UInt128), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::UInt256 => vec![ - (AF::Max, ST::UInt256), - (AF::Min, ST::UInt256), - (AF::Sum, ST::UInt256), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::Int8 => vec![ - (AF::Max, ST::Int8), - (AF::Min, ST::Int8), - (AF::Sum, ST::Int64), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::Int16 => vec![ - (AF::Max, ST::Int16), - (AF::Min, ST::Int16), - (AF::Sum, ST::Int64), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::Int32 => vec![ - (AF::Max, ST::Int32), - (AF::Min, ST::Int32), - (AF::Sum, ST::Int64), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::Int64 => vec![ - (AF::Max, ST::Int64), - (AF::Min, ST::Int64), - (AF::Sum, ST::Int64), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::Int128 => vec![ - (AF::Max, ST::Int128), - (AF::Min, ST::Int128), - (AF::Sum, ST::Int128), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::Int256 => vec![ - (AF::Max, ST::Int256), - (AF::Min, ST::Int256), - (AF::Sum, ST::Int256), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::Float32 => vec![ - (AF::Max, ST::Float64), - (AF::Min, ST::Float32), - (AF::Sum, ST::Float32), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float32), - (AF::StddevSamp, ST::Float32), - (AF::VarPop, ST::Float32), - (AF::VarSamp, ST::Float32), - ], - ST::Float64 => vec![ - (AF::Max, ST::Float64), - (AF::Min, ST::Float64), - (AF::Sum, ST::Float64), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::Decimal => vec![ - (AF::Max, ST::Decimal), - (AF::Min, ST::Decimal), - (AF::Sum, ST::Decimal), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::Decimal32 => vec![ - (AF::Max, ST::Decimal32), - (AF::Min, ST::Decimal32), - (AF::Sum, ST::Decimal32), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::Decimal64 => vec![ - (AF::Max, ST::Decimal64), - (AF::Min, ST::Decimal64), - (AF::Sum, ST::Decimal64), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::Decimal128 => vec![ - (AF::Max, ST::Decimal128), - (AF::Min, ST::Decimal128), - (AF::Sum, ST::Decimal128), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::Decimal256 => vec![ - (AF::Max, ST::Decimal256), - (AF::Min, ST::Decimal256), - (AF::Sum, ST::Decimal256), - (AF::Avg, ST::Float64), - (AF::StddevPop, ST::Float64), - (AF::StddevSamp, ST::Float64), - (AF::VarPop, ST::Float64), - (AF::VarSamp, ST::Float64), - ], - ST::Date => vec![(AF::Max, ST::Date), (AF::Min, ST::Date)], - ST::Date32 => vec![(AF::Max, ST::Date32), (AF::Min, ST::Date32)], - ST::DateTime => vec![(AF::Max, ST::DateTime), (AF::Min, ST::DateTime)], - ST::DateTime64 => vec![(AF::Max, ST::DateTime64), (AF::Min, ST::DateTime64)], - ST::Json => vec![], - ST::Uuid => vec![], - ST::IPv4 => vec![], - ST::IPv6 => vec![], - ST::Unknown => vec![], - } - } - pub fn comparison_operators(&self) -> BTreeMap { - use ClickHouseBinaryComparisonOperator as BC; - use ClickHouseScalarType as ST; - let base_operators = vec![ - (BC::Eq, self.to_owned()), - (BC::Gt, self.to_owned()), - (BC::Lt, self.to_owned()), - (BC::GtEq, self.to_owned()), - (BC::LtEq, self.to_owned()), - (BC::NotEq, self.to_owned()), - (BC::In, self.to_owned()), - (BC::NotIn, self.to_owned()), - ]; - - BTreeMap::from_iter( - match self { - ST::String => vec![(BC::Like, ST::String)], - _ => vec![], - } - .into_iter() - .chain(base_operators) - .map(|(name, argument_type)| { - ( - name.to_string(), - match name { - BC::Eq => models::ComparisonOperatorDefinition::Equal, - BC::In => models::ComparisonOperatorDefinition::In, - BC::NotIn => models::ComparisonOperatorDefinition::Custom { - argument_type: models::Type::Array { - element_type: Box::new(models::Type::Named { - name: argument_type.to_string(), - }), - }, - }, - _ => models::ComparisonOperatorDefinition::Custom { - argument_type: models::Type::Named { - name: argument_type.to_string(), - }, - }, - }, - ) - }), - ) - } -} diff --git a/crates/ndc-clickhouse/src/schema/single_column_aggregate_function.rs b/crates/ndc-clickhouse/src/schema/single_column_aggregate_function.rs index 04b1718..3a13ced 100644 --- a/crates/ndc-clickhouse/src/schema/single_column_aggregate_function.rs +++ b/crates/ndc-clickhouse/src/schema/single_column_aggregate_function.rs @@ -2,7 +2,7 @@ use strum::{Display, EnumIter, EnumString}; use crate::sql::ast::{Expr, Function}; -#[derive(Debug, Clone, EnumString, Display, EnumIter)] +#[derive(Debug, Clone, EnumString, Display, EnumIter, PartialEq, Eq)] #[strum(serialize_all = "snake_case")] pub enum ClickHouseSingleColumnAggregateFunction { Max, diff --git a/crates/ndc-clickhouse/src/schema/type_definition.rs b/crates/ndc-clickhouse/src/schema/type_definition.rs new file mode 100644 index 0000000..61831ad --- /dev/null +++ b/crates/ndc-clickhouse/src/schema/type_definition.rs @@ -0,0 +1,729 @@ +use std::collections::BTreeMap; + +use ndc_sdk::models; + +use super::{ + ClickHouseBinaryComparisonOperator, ClickHouseDataType, + ClickHouseSingleColumnAggregateFunction, Identifier, +}; + +#[derive(Debug, Clone, strum::Display)] +pub enum ClickHouseScalar { + Bool, + String, + UInt8, + UInt16, + UInt32, + UInt64, + UInt128, + UInt256, + Int8, + Int16, + Int32, + Int64, + Int128, + Int256, + Float32, + Float64, + Decimal, + Decimal32, + Decimal64, + Decimal128, + Decimal256, + Date, + Date32, + DateTime, + DateTime64, + #[strum(to_string = "JSON")] + Json, + #[strum(to_string = "UUID")] + Uuid, + IPv4, + IPv6, + #[strum(to_string = "{name}")] + Enum { + name: String, + variants: Vec, + }, +} + +impl ClickHouseScalar { + fn type_name(&self) -> String { + self.to_string() + } + fn type_definition(&self) -> models::ScalarType { + models::ScalarType { + representation: Some(self.json_representation()), + aggregate_functions: self + .aggregate_functions() + .into_iter() + .map(|(function, result_type)| { + ( + function.to_string(), + models::AggregateFunctionDefinition { + result_type: models::Type::Named { + name: result_type.type_name(), + }, + }, + ) + }) + .collect(), + comparison_operators: self + .comparison_operators() + .into_iter() + .map(|operator| { + let definition = match operator { + ClickHouseBinaryComparisonOperator::Eq => { + models::ComparisonOperatorDefinition::Equal + } + ClickHouseBinaryComparisonOperator::In => { + models::ComparisonOperatorDefinition::In + } + ClickHouseBinaryComparisonOperator::NotIn => { + models::ComparisonOperatorDefinition::Custom { + argument_type: models::Type::Array { + element_type: Box::new(models::Type::Named { + name: self.type_name(), + }), + }, + } + } + ClickHouseBinaryComparisonOperator::Gt + | ClickHouseBinaryComparisonOperator::Lt + | ClickHouseBinaryComparisonOperator::GtEq + | ClickHouseBinaryComparisonOperator::LtEq + | ClickHouseBinaryComparisonOperator::NotEq + | ClickHouseBinaryComparisonOperator::Like + | ClickHouseBinaryComparisonOperator::NotLike + | ClickHouseBinaryComparisonOperator::ILike + | ClickHouseBinaryComparisonOperator::NotILike + | ClickHouseBinaryComparisonOperator::Match => { + models::ComparisonOperatorDefinition::Custom { + argument_type: models::Type::Named { + name: self.type_name(), + }, + } + } + }; + (operator.to_string(), definition) + }) + .collect(), + } + } + fn json_representation(&self) -> models::TypeRepresentation { + use models::TypeRepresentation as Rep; + use ClickHouseScalar as ST; + match self { + ST::Bool => Rep::Boolean, + ST::String => Rep::String, + ST::UInt8 => Rep::Integer, + ST::UInt16 => Rep::Integer, + ST::UInt32 => Rep::Integer, + ST::UInt64 => Rep::Integer, + ST::UInt128 => Rep::Integer, + ST::UInt256 => Rep::Integer, + ST::Int8 => Rep::Integer, + ST::Int16 => Rep::Integer, + ST::Int32 => Rep::Integer, + ST::Int64 => Rep::Integer, + ST::Int128 => Rep::Integer, + ST::Int256 => Rep::Integer, + ST::Float32 => Rep::Number, + ST::Float64 => Rep::Number, + ST::Decimal => Rep::Number, + ST::Decimal32 => Rep::String, + ST::Decimal64 => Rep::String, + ST::Decimal128 => Rep::String, + ST::Decimal256 => Rep::String, + ST::Date => Rep::String, + ST::Date32 => Rep::String, + ST::DateTime => Rep::String, + ST::DateTime64 => Rep::String, + ST::Json => Rep::String, + ST::Uuid => Rep::String, + ST::IPv4 => Rep::String, + ST::IPv6 => Rep::String, + ST::Enum { name: _, variants } => Rep::Enum { + one_of: variants.to_owned(), + }, + } + } + fn aggregate_functions( + &self, + ) -> Vec<(ClickHouseSingleColumnAggregateFunction, ClickHouseScalar)> { + use ClickHouseScalar as ST; + use ClickHouseSingleColumnAggregateFunction as AF; + + match self { + ST::Bool => vec![], + ST::String => vec![], + ST::UInt8 => vec![ + (AF::Max, ST::UInt8), + (AF::Min, ST::UInt8), + (AF::Sum, ST::UInt64), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::UInt16 => vec![ + (AF::Max, ST::UInt16), + (AF::Min, ST::UInt16), + (AF::Sum, ST::UInt64), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::UInt32 => vec![ + (AF::Max, ST::UInt32), + (AF::Min, ST::UInt32), + (AF::Sum, ST::UInt64), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::UInt64 => vec![ + (AF::Max, ST::UInt64), + (AF::Min, ST::UInt64), + (AF::Sum, ST::UInt64), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::UInt128 => vec![ + (AF::Max, ST::UInt128), + (AF::Min, ST::UInt128), + (AF::Sum, ST::UInt128), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::UInt256 => vec![ + (AF::Max, ST::UInt256), + (AF::Min, ST::UInt256), + (AF::Sum, ST::UInt256), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::Int8 => vec![ + (AF::Max, ST::Int8), + (AF::Min, ST::Int8), + (AF::Sum, ST::Int64), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::Int16 => vec![ + (AF::Max, ST::Int16), + (AF::Min, ST::Int16), + (AF::Sum, ST::Int64), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::Int32 => vec![ + (AF::Max, ST::Int32), + (AF::Min, ST::Int32), + (AF::Sum, ST::Int64), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::Int64 => vec![ + (AF::Max, ST::Int64), + (AF::Min, ST::Int64), + (AF::Sum, ST::Int64), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::Int128 => vec![ + (AF::Max, ST::Int128), + (AF::Min, ST::Int128), + (AF::Sum, ST::Int128), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::Int256 => vec![ + (AF::Max, ST::Int256), + (AF::Min, ST::Int256), + (AF::Sum, ST::Int256), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::Float32 => vec![ + (AF::Max, ST::Float64), + (AF::Min, ST::Float32), + (AF::Sum, ST::Float32), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float32), + (AF::StddevSamp, ST::Float32), + (AF::VarPop, ST::Float32), + (AF::VarSamp, ST::Float32), + ], + ST::Float64 => vec![ + (AF::Max, ST::Float64), + (AF::Min, ST::Float64), + (AF::Sum, ST::Float64), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::Decimal => vec![ + (AF::Max, ST::Decimal), + (AF::Min, ST::Decimal), + (AF::Sum, ST::Decimal), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::Decimal32 => vec![ + (AF::Max, ST::Decimal32), + (AF::Min, ST::Decimal32), + (AF::Sum, ST::Decimal32), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::Decimal64 => vec![ + (AF::Max, ST::Decimal64), + (AF::Min, ST::Decimal64), + (AF::Sum, ST::Decimal64), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::Decimal128 => vec![ + (AF::Max, ST::Decimal128), + (AF::Min, ST::Decimal128), + (AF::Sum, ST::Decimal128), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::Decimal256 => vec![ + (AF::Max, ST::Decimal256), + (AF::Min, ST::Decimal256), + (AF::Sum, ST::Decimal256), + (AF::Avg, ST::Float64), + (AF::StddevPop, ST::Float64), + (AF::StddevSamp, ST::Float64), + (AF::VarPop, ST::Float64), + (AF::VarSamp, ST::Float64), + ], + ST::Date => vec![(AF::Max, ST::Date), (AF::Min, ST::Date)], + ST::Date32 => vec![(AF::Max, ST::Date32), (AF::Min, ST::Date32)], + ST::DateTime => vec![(AF::Max, ST::DateTime), (AF::Min, ST::DateTime)], + ST::DateTime64 => vec![(AF::Max, ST::DateTime64), (AF::Min, ST::DateTime64)], + ST::Json => vec![], + ST::Uuid => vec![], + ST::IPv4 => vec![], + ST::IPv6 => vec![], + ST::Enum { .. } => vec![], + } + } + fn comparison_operators(&self) -> Vec { + use ClickHouseBinaryComparisonOperator as BC; + use ClickHouseScalar as ST; + + let equality_operators = vec![BC::Eq, BC::NotEq, BC::In, BC::NotIn]; + let ordering_operators = vec![BC::Gt, BC::Lt, BC::GtEq, BC::LtEq]; + let string_operators = vec![BC::Like, BC::NotLike, BC::ILike, BC::NotILike, BC::Match]; + + match self { + ST::Bool => equality_operators, + ST::String => [equality_operators, ordering_operators, string_operators].concat(), + ST::UInt8 | ST::UInt16 | ST::UInt32 | ST::UInt64 | ST::UInt128 | ST::UInt256 => { + [equality_operators, ordering_operators].concat() + } + ST::Int8 | ST::Int16 | ST::Int32 | ST::Int64 | ST::Int128 | ST::Int256 => { + [equality_operators, ordering_operators].concat() + } + ST::Float32 | ST::Float64 => [equality_operators, ordering_operators].concat(), + ST::Decimal | ST::Decimal32 | ST::Decimal64 | ST::Decimal128 | ST::Decimal256 => { + [equality_operators, ordering_operators].concat() + } + ST::Date | ST::Date32 => [equality_operators, ordering_operators].concat(), + ST::DateTime | ST::DateTime64 => [equality_operators, ordering_operators].concat(), + ST::Json => [equality_operators, ordering_operators].concat(), + ST::Uuid => [equality_operators, ordering_operators].concat(), + ST::IPv4 => [equality_operators, ordering_operators].concat(), + ST::IPv6 => [equality_operators, ordering_operators].concat(), + ST::Enum { .. } => equality_operators, + } + } + /// returns the type we can cast this type to + /// this may not be the same as the underlying real type + /// for examples, enums are cast to strings, and fixed strings cast to strings + fn cast_type(&self) -> ClickHouseDataType { + match self { + ClickHouseScalar::Bool => ClickHouseDataType::Bool, + ClickHouseScalar::String => ClickHouseDataType::String, + ClickHouseScalar::UInt8 => ClickHouseDataType::UInt8, + ClickHouseScalar::UInt16 => ClickHouseDataType::UInt16, + ClickHouseScalar::UInt32 => ClickHouseDataType::UInt32, + ClickHouseScalar::UInt64 => ClickHouseDataType::UInt64, + ClickHouseScalar::UInt128 => ClickHouseDataType::UInt128, + ClickHouseScalar::UInt256 => ClickHouseDataType::UInt256, + ClickHouseScalar::Int8 => ClickHouseDataType::Int8, + ClickHouseScalar::Int16 => ClickHouseDataType::Int16, + ClickHouseScalar::Int32 => ClickHouseDataType::Int32, + ClickHouseScalar::Int64 => ClickHouseDataType::Int64, + ClickHouseScalar::Int128 => ClickHouseDataType::Int128, + ClickHouseScalar::Int256 => ClickHouseDataType::Int256, + ClickHouseScalar::Float32 => ClickHouseDataType::Float32, + ClickHouseScalar::Float64 => ClickHouseDataType::Float64, + ClickHouseScalar::Decimal => ClickHouseDataType::String, + ClickHouseScalar::Decimal32 => ClickHouseDataType::String, + ClickHouseScalar::Decimal64 => ClickHouseDataType::String, + ClickHouseScalar::Decimal128 => ClickHouseDataType::String, + ClickHouseScalar::Decimal256 => ClickHouseDataType::String, + ClickHouseScalar::Date => ClickHouseDataType::String, + ClickHouseScalar::Date32 => ClickHouseDataType::String, + ClickHouseScalar::DateTime => ClickHouseDataType::String, + ClickHouseScalar::DateTime64 => ClickHouseDataType::String, + ClickHouseScalar::Json => ClickHouseDataType::Json, + ClickHouseScalar::Uuid => ClickHouseDataType::Uuid, + ClickHouseScalar::IPv4 => ClickHouseDataType::IPv4, + ClickHouseScalar::IPv6 => ClickHouseDataType::IPv6, + ClickHouseScalar::Enum { .. } => ClickHouseDataType::String, + } + } +} + +pub enum ClickHouseTypeDefinition { + Scalar(ClickHouseScalar), + Nullable { + inner: Box, + }, + Array { + element_type: Box, + }, + Object { + name: String, + fields: BTreeMap, + }, + /// Stand-in for data types that either cannot be represented in graphql, + /// (such as maps or tuples with anonymous memebers) + /// or cannot be known ahead of time (such as the return type of aggregate function columns) + Unknown { + name: String, + }, +} + +impl ClickHouseTypeDefinition { + /// Table alias is guaranteed unique across the database, and by default includes the schema name for non default schemas + pub fn from_table_column( + data_type: &ClickHouseDataType, + column_alias: &str, + table_alias: &str, + ) -> Self { + let namespace = format!("{table_alias}.{column_alias}"); + Self::new(data_type, &namespace) + } + fn new(data_type: &ClickHouseDataType, namespace: &str) -> Self { + match data_type { + ClickHouseDataType::Nullable(inner) => Self::Nullable { + inner: Box::new(Self::new(inner, namespace)), + }, + ClickHouseDataType::Bool => Self::Scalar(ClickHouseScalar::Bool), + ClickHouseDataType::String | ClickHouseDataType::FixedString(_) => { + Self::Scalar(ClickHouseScalar::String) + } + ClickHouseDataType::UInt8 => Self::Scalar(ClickHouseScalar::UInt8), + ClickHouseDataType::UInt16 => Self::Scalar(ClickHouseScalar::UInt16), + ClickHouseDataType::UInt32 => Self::Scalar(ClickHouseScalar::UInt32), + ClickHouseDataType::UInt64 => Self::Scalar(ClickHouseScalar::UInt64), + ClickHouseDataType::UInt128 => Self::Scalar(ClickHouseScalar::UInt128), + ClickHouseDataType::UInt256 => Self::Scalar(ClickHouseScalar::UInt256), + ClickHouseDataType::Int8 => Self::Scalar(ClickHouseScalar::Int8), + ClickHouseDataType::Int16 => Self::Scalar(ClickHouseScalar::Int16), + ClickHouseDataType::Int32 => Self::Scalar(ClickHouseScalar::Int32), + ClickHouseDataType::Int64 => Self::Scalar(ClickHouseScalar::Int64), + ClickHouseDataType::Int128 => Self::Scalar(ClickHouseScalar::Int128), + ClickHouseDataType::Int256 => Self::Scalar(ClickHouseScalar::Int256), + ClickHouseDataType::Float32 => Self::Scalar(ClickHouseScalar::Float32), + ClickHouseDataType::Float64 => Self::Scalar(ClickHouseScalar::Float64), + ClickHouseDataType::Decimal { .. } => Self::Scalar(ClickHouseScalar::Decimal), + ClickHouseDataType::Decimal32 { .. } => Self::Scalar(ClickHouseScalar::Decimal32), + ClickHouseDataType::Decimal64 { .. } => Self::Scalar(ClickHouseScalar::Decimal64), + ClickHouseDataType::Decimal128 { .. } => Self::Scalar(ClickHouseScalar::Decimal128), + ClickHouseDataType::Decimal256 { .. } => Self::Scalar(ClickHouseScalar::Decimal256), + ClickHouseDataType::Date => Self::Scalar(ClickHouseScalar::Date), + ClickHouseDataType::Date32 => Self::Scalar(ClickHouseScalar::Date32), + ClickHouseDataType::DateTime { .. } => Self::Scalar(ClickHouseScalar::DateTime), + ClickHouseDataType::DateTime64 { .. } => Self::Scalar(ClickHouseScalar::DateTime64), + ClickHouseDataType::Json => Self::Scalar(ClickHouseScalar::Json), + ClickHouseDataType::Uuid => Self::Scalar(ClickHouseScalar::Uuid), + ClickHouseDataType::IPv4 => Self::Scalar(ClickHouseScalar::IPv4), + ClickHouseDataType::IPv6 => Self::Scalar(ClickHouseScalar::IPv6), + ClickHouseDataType::LowCardinality(inner) => Self::new(inner, namespace), + ClickHouseDataType::Nested(entries) => { + let mut fields = BTreeMap::new(); + + for (name, field_data_type) in entries { + let field_name = match name { + Identifier::DoubleQuoted(n) => n, + Identifier::BacktickQuoted(n) => n, + Identifier::Unquoted(n) => n, + }; + + let field_namespace = format!("{namespace}_{field_name}"); + + let field_definition = Self::new(field_data_type, &field_namespace); + + if fields + .insert(field_name.to_owned(), field_definition) + .is_some() + { + // on duplicate field names, fall back to unknown type + return Self::Unknown { + name: namespace.to_owned(), + }; + } + } + + Self::Object { + name: namespace.to_owned(), + fields, + } + } + ClickHouseDataType::Array(element) => Self::Array { + element_type: Box::new(Self::new(element, namespace)), + }, + ClickHouseDataType::Map { .. } => Self::Unknown { + name: namespace.to_owned(), + }, + ClickHouseDataType::Tuple(entries) => { + let mut fields = BTreeMap::new(); + + for (name, field_data_type) in entries { + let field_name = if let Some(name) = name { + match name { + Identifier::DoubleQuoted(n) => n, + Identifier::BacktickQuoted(n) => n, + Identifier::Unquoted(n) => n, + } + } else { + return Self::Unknown { + name: namespace.to_owned(), + }; + }; + + let field_namespace = format!("{namespace}.{field_name}"); + + let field_definition = Self::new(field_data_type, &field_namespace); + + if fields + .insert(field_name.to_owned(), field_definition) + .is_some() + { + // on duplicate field names, fall back to unknown type + return Self::Unknown { + name: namespace.to_owned(), + }; + } + } + + Self::Object { + name: namespace.to_owned(), + fields, + } + } + ClickHouseDataType::Enum(variants) => { + let name = namespace.to_owned(); + let variants = variants + .iter() + .map(|(super::SingleQuotedString(variant), _)| variant.to_owned()) + .collect(); + + Self::Scalar(ClickHouseScalar::Enum { name, variants }) + } + + ClickHouseDataType::SimpleAggregateFunction { + function: _, + arguments, + } => { + if let (Some(data_type), 1) = (arguments.first(), arguments.len()) { + Self::new(data_type, namespace) + } else { + Self::Unknown { + name: namespace.to_owned(), + } + } + } + ClickHouseDataType::AggregateFunction { + function, + arguments, + } => { + let arg_len = arguments.len(); + let first = arguments.first(); + let agg_fn_name = match &function.name { + Identifier::DoubleQuoted(n) => n, + Identifier::BacktickQuoted(n) => n, + Identifier::Unquoted(n) => n, + }; + + if let (Some(data_type), 1) = (first, arg_len) { + Self::new(data_type, namespace) + } else if let (Some(data_type), 2, "anyIf") = (first, arg_len, agg_fn_name.as_str()) + { + Self::new(data_type, namespace) + } else { + Self::Unknown { + name: namespace.to_owned(), + } + } + } + ClickHouseDataType::Nothing => Self::Unknown { + name: namespace.to_owned(), + }, + } + } + pub fn type_identifier(&self) -> models::Type { + match self { + ClickHouseTypeDefinition::Scalar(scalar) => models::Type::Named { + name: scalar.type_name(), + }, + ClickHouseTypeDefinition::Nullable { inner } => models::Type::Nullable { + underlying_type: Box::new(inner.type_identifier()), + }, + ClickHouseTypeDefinition::Array { element_type } => models::Type::Array { + element_type: Box::new(element_type.type_identifier()), + }, + ClickHouseTypeDefinition::Object { name, fields: _ } => models::Type::Named { + name: name.to_owned(), + }, + ClickHouseTypeDefinition::Unknown { name } => models::Type::Named { + name: name.to_owned(), + }, + } + } + /// returns the schema type definitions for this type + /// note that ScalarType definitions may be duplicated + pub fn type_definitions( + &self, + ) -> ( + Vec<(String, models::ScalarType)>, + Vec<(String, models::ObjectType)>, + ) { + match self { + ClickHouseTypeDefinition::Scalar(scalar) => { + (vec![(scalar.type_name(), scalar.type_definition())], vec![]) + } + ClickHouseTypeDefinition::Nullable { inner } => inner.type_definitions(), + ClickHouseTypeDefinition::Array { element_type } => element_type.type_definitions(), + ClickHouseTypeDefinition::Object { + name: namespace, + fields, + } => { + let mut object_type_fields = vec![]; + let mut object_type_definitions = vec![]; + let mut scalar_type_definitions = vec![]; + + for (field_name, field) in fields { + let (mut scalars, mut objects) = field.type_definitions(); + + scalar_type_definitions.append(&mut scalars); + object_type_definitions.append(&mut objects); + + object_type_fields.push(( + field_name.to_owned(), + models::ObjectField { + description: None, + r#type: field.type_identifier(), + }, + )); + } + + object_type_definitions.push(( + namespace.to_string(), + models::ObjectType { + description: None, + fields: object_type_fields.into_iter().collect(), + }, + )); + + (scalar_type_definitions, object_type_definitions) + } + ClickHouseTypeDefinition::Unknown { name } => { + let definition = models::ScalarType { + representation: None, + aggregate_functions: BTreeMap::new(), + comparison_operators: BTreeMap::new(), + }; + (vec![(name.to_owned(), definition)], vec![]) + } + } + } + pub fn cast_type(&self) -> ClickHouseDataType { + match self { + ClickHouseTypeDefinition::Scalar(scalar) => scalar.cast_type(), + ClickHouseTypeDefinition::Nullable { inner } => { + ClickHouseDataType::Nullable(Box::new(inner.cast_type())) + } + ClickHouseTypeDefinition::Array { element_type } => { + ClickHouseDataType::Array(Box::new(element_type.cast_type())) + } + ClickHouseTypeDefinition::Object { name: _, fields } => { + ClickHouseDataType::Nested( + fields + .iter() + .map(|(key, value)| { + // todo: prevent issues where the key contains unescaped double quotes + (Identifier::DoubleQuoted(key.to_owned()), value.cast_type()) + }) + .collect(), + ) + } + ClickHouseTypeDefinition::Unknown { .. } => ClickHouseDataType::Json, + } + } + pub fn aggregate_functions( + &self, + ) -> Vec<(ClickHouseSingleColumnAggregateFunction, ClickHouseScalar)> { + match self { + ClickHouseTypeDefinition::Scalar(scalar) => scalar.aggregate_functions(), + ClickHouseTypeDefinition::Nullable { inner } => inner.aggregate_functions(), + ClickHouseTypeDefinition::Array { .. } => vec![], + ClickHouseTypeDefinition::Object { .. } => vec![], + ClickHouseTypeDefinition::Unknown { .. } => vec![], + } + } +} diff --git a/crates/ndc-clickhouse/src/sql/query_builder.rs b/crates/ndc-clickhouse/src/sql/query_builder.rs index c0d8466..bc0b85d 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder.rs @@ -1492,7 +1492,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let mut additional_predicates = vec![]; let mut last_join_alias = current_join_alias.clone(); - let mut last_collection_name = current_collection.clone(); + let mut last_collection_name = current_collection; for path_element in path { let join_alias = Ident::new_quoted(format!("_exists_{name_index}")); @@ -1680,6 +1680,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { ) })?; + // todo: revise whether we want to get the data type from the type definition instead Ok(column.data_type.to_owned()) } } diff --git a/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs b/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs index 2bb0a24..c221d1a 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs @@ -4,7 +4,9 @@ use config::{ColumnConfig, ServerConfig}; use indexmap::IndexMap; use ndc_sdk::models; -use crate::schema::{ClickHouseScalarType, ClickhouseDataType}; +use crate::schema::{ + ClickHouseDataType, ClickHouseSingleColumnAggregateFunction, ClickHouseTypeDefinition, +}; /// Tuple(rows , aggregates ) pub struct RowsetTypeString { @@ -69,20 +71,33 @@ impl AggregatesTypeString { function, } => { let column = get_column(column_alias, table_alias, config)?; - let scalar_type = - get_scalar_column_type(column, column_alias, table_alias)?; + let data_type = get_data_type(column, column_alias, table_alias)?; + let type_definition = ClickHouseTypeDefinition::from_table_column( + &data_type, + column_alias, + table_alias, + ); - let aggregate_functions = scalar_type.aggregate_functions(); + let aggregate_function = + ClickHouseSingleColumnAggregateFunction::from_str(&function).map_err( + |_err| TypeStringError::UnknownAggregateFunction { + table: table_alias.to_owned(), + column: column_alias.to_owned(), + data_type: column.data_type.to_owned(), + function: function.to_owned(), + }, + )?; + + let aggregate_functions = type_definition.aggregate_functions(); let result_type = aggregate_functions .iter() - .find(|(f, _)| &f.to_string() == function) - .map(|(_, r)| r) + .find(|(function, _)| function == &aggregate_function) + .map(|(_, result_type)| result_type) .ok_or_else(|| TypeStringError::UnknownAggregateFunction { table: table_alias.to_owned(), column: column_alias.to_owned(), data_type: column.data_type.to_owned(), - scalar_type: scalar_type.to_string(), function: function.to_owned(), })?; @@ -116,7 +131,14 @@ impl RowsTypeString { todo!("support nested field selection") } let column = get_column(column_alias, table_alias, config)?; - FieldTypeString::Column(column.data_type.to_owned()) + let data_type = + get_data_type(&column, &column_alias, &table_alias)?; + let type_definition = ClickHouseTypeDefinition::from_table_column( + &data_type, + column_alias, + table_alias, + ); + FieldTypeString::Column(type_definition.cast_type().to_string()) } models::Field::Relationship { query, @@ -238,79 +260,18 @@ fn get_column<'a>( Ok(column) } -fn get_scalar_column_type( +fn get_data_type( column: &ColumnConfig, column_alias: &str, table_alias: &str, -) -> Result { - let data_type = ClickhouseDataType::from_str(&column.data_type).map_err(|_err| { +) -> Result { + ClickHouseDataType::from_str(&column.data_type).map_err(|_err| { TypeStringError::CannotParseTypeString { table: table_alias.to_owned(), column: column_alias.to_owned(), data_type: column.data_type.to_owned(), } - })?; - - let scalar_type = - get_scalar_type(&data_type).ok_or_else(|| TypeStringError::ColumnNotScalar { - table: table_alias.to_owned(), - column: column_alias.to_owned(), - data_type: column.data_type.to_owned(), - })?; - - Ok(scalar_type) -} - -fn get_scalar_type(data_type: &ClickhouseDataType) -> Option { - match data_type { - ClickhouseDataType::Nullable(data_type) => get_scalar_type(data_type), - ClickhouseDataType::Bool => Some(ClickHouseScalarType::Bool), - ClickhouseDataType::String | ClickhouseDataType::FixedString(_) => { - Some(ClickHouseScalarType::String) - } - ClickhouseDataType::UInt8 => Some(ClickHouseScalarType::UInt8), - ClickhouseDataType::UInt16 => Some(ClickHouseScalarType::UInt16), - ClickhouseDataType::UInt32 => Some(ClickHouseScalarType::UInt32), - ClickhouseDataType::UInt64 => Some(ClickHouseScalarType::UInt64), - ClickhouseDataType::UInt128 => Some(ClickHouseScalarType::UInt128), - ClickhouseDataType::UInt256 => Some(ClickHouseScalarType::UInt256), - ClickhouseDataType::Int8 => Some(ClickHouseScalarType::Int8), - ClickhouseDataType::Int16 => Some(ClickHouseScalarType::Int16), - ClickhouseDataType::Int32 => Some(ClickHouseScalarType::Int32), - ClickhouseDataType::Int64 => Some(ClickHouseScalarType::Int64), - ClickhouseDataType::Int128 => Some(ClickHouseScalarType::Int128), - ClickhouseDataType::Int256 => Some(ClickHouseScalarType::Int256), - ClickhouseDataType::Float32 => Some(ClickHouseScalarType::Float32), - ClickhouseDataType::Float64 => Some(ClickHouseScalarType::Float64), - ClickhouseDataType::Decimal { .. } => Some(ClickHouseScalarType::Decimal), - ClickhouseDataType::Decimal32 { .. } => Some(ClickHouseScalarType::Decimal32), - ClickhouseDataType::Decimal64 { .. } => Some(ClickHouseScalarType::Decimal64), - ClickhouseDataType::Decimal128 { .. } => Some(ClickHouseScalarType::Decimal128), - ClickhouseDataType::Decimal256 { .. } => Some(ClickHouseScalarType::Decimal256), - ClickhouseDataType::Date => Some(ClickHouseScalarType::Date), - ClickhouseDataType::Date32 => Some(ClickHouseScalarType::Date32), - ClickhouseDataType::DateTime { .. } => Some(ClickHouseScalarType::DateTime), - ClickhouseDataType::DateTime64 { .. } => Some(ClickHouseScalarType::DateTime64), - ClickhouseDataType::Json => Some(ClickHouseScalarType::Json), - ClickhouseDataType::Uuid => Some(ClickHouseScalarType::Uuid), - ClickhouseDataType::IPv4 => Some(ClickHouseScalarType::IPv4), - ClickhouseDataType::IPv6 => Some(ClickHouseScalarType::IPv6), - ClickhouseDataType::LowCardinality(data_type) => get_scalar_type(data_type), - ClickhouseDataType::Nested(_) - | ClickhouseDataType::Array(_) - | ClickhouseDataType::Map { .. } - | ClickhouseDataType::Tuple(_) - | ClickhouseDataType::Enum(_) => None, - ClickhouseDataType::SimpleAggregateFunction { - function: _, - arguments, - } - | ClickhouseDataType::AggregateFunction { - function: _, - arguments, - } => arguments.first().and_then(get_scalar_type), - ClickhouseDataType::Nothing => None, - } + }) } #[derive(Debug)] @@ -327,16 +288,10 @@ pub enum TypeStringError { column: String, data_type: String, }, - ColumnNotScalar { - table: String, - column: String, - data_type: String, - }, UnknownAggregateFunction { table: String, column: String, data_type: String, - scalar_type: String, function: String, }, MissingRelationship(String), @@ -357,21 +312,12 @@ impl Display for TypeStringError { f, "Unable to parse data type: {data_type} for column: {column} in table: {table}" ), - TypeStringError::ColumnNotScalar { - table, - column, - data_type, - } => write!( - f, - "Unable to determine scalar type for column: {column} of type: {data_type} in table: {table}" - ), TypeStringError::UnknownAggregateFunction { table, column, data_type, - scalar_type, function, - } => write!(f, "Unknown aggregate function: {function} for scalar type: {scalar_type} for column {column} of type: {data_type} in table {table}"), + } => write!(f, "Unknown aggregate function: {function} for column {column} of type: {data_type} in table {table}"), TypeStringError::MissingRelationship(rel) => write!(f, "Missing relationship: {rel}"), } } From 810a515c701c96ecf3ac5ae7bbc9f7da78cab544 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Sun, 31 Mar 2024 18:02:32 -0400 Subject: [PATCH 05/28] depend on ndc-sdk-rs repo instead of ndc-hub --- crates/ndc-clickhouse/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ndc-clickhouse/Cargo.toml b/crates/ndc-clickhouse/Cargo.toml index 4e9391e..8e779a1 100644 --- a/crates/ndc-clickhouse/Cargo.toml +++ b/crates/ndc-clickhouse/Cargo.toml @@ -8,7 +8,7 @@ async-trait = "0.1.78" client = { path = "../client" } config = { path = "../config" } indexmap = "2.1.0" -ndc-sdk = { git = "https://github.com/hasura/ndc-hub.git", rev = "4c31e8b", package = "ndc-sdk" } +ndc-sdk = { git = "https://github.com/hasura/ndc-sdk-rs", rev = "7b56fac", package = "ndc-sdk" } peg = "0.8.2" prometheus = "0.13.3" reqwest = { version = "0.11.27", features = [ From c38704b05dcd2865050f584c5dbb3b74cd1783df Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Sun, 31 Mar 2024 18:03:44 -0400 Subject: [PATCH 06/28] add configuration.schema.json to generated config --- crates/config/src/lib.rs | 12 ++++++++++++ crates/ndc-clickhouse-cli/Cargo.toml | 1 + crates/ndc-clickhouse-cli/src/main.rs | 20 ++++++++++++++++---- crates/ndc-clickhouse/src/connector.rs | 4 ++-- 4 files changed, 31 insertions(+), 6 deletions(-) diff --git a/crates/config/src/lib.rs b/crates/config/src/lib.rs index a21c77f..d5dd006 100644 --- a/crates/config/src/lib.rs +++ b/crates/config/src/lib.rs @@ -3,6 +3,9 @@ use serde::{Deserialize, Serialize}; #[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] pub struct ServerConfigFile { + #[serde(rename = "$schema")] + pub schema: String, + /// A list of tables available in this database pub tables: Vec, } @@ -22,9 +25,14 @@ pub struct ConnectionConfig { #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct TableConfig { + /// The table name pub name: String, + /// The table schema pub schema: String, + /// The table alias defaults to "_", except for tables in the "default" schema where the table name is used + /// This is the name exposed to the engine, and may be configured by users. This is preserved through config updates pub alias: String, + /// Comments are sourced from the database table comment pub comment: Option, pub primary_key: Option, pub columns: Vec, @@ -38,9 +46,13 @@ pub struct PrimaryKey { #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct ColumnConfig { + /// The column name pub name: String, + /// The column alias defaults to the column name, but may be changed by users. This is preserved through config updates pub alias: String, + /// The column data type pub data_type: String, } pub const CONFIG_FILE_NAME: &str = "configuration.json"; +pub const CONFIG_SCHEMA_FILE_NAME: &str = "configuration.schema.json"; diff --git a/crates/ndc-clickhouse-cli/Cargo.toml b/crates/ndc-clickhouse-cli/Cargo.toml index 6c636e1..ad9b21c 100644 --- a/crates/ndc-clickhouse-cli/Cargo.toml +++ b/crates/ndc-clickhouse-cli/Cargo.toml @@ -7,6 +7,7 @@ edition.workspace = true clap = { version = "4.5.3", features = ["derive", "env"] } client = { path = "../client" } config = { path = "../config" } +schemars = "0.8.16" serde = { version = "1.0.197", features = ["derive"] } serde_json = "1.0.114" tokio = { version = "1.36.0", features = ["macros", "rt-multi-thread", "fs"] } diff --git a/crates/ndc-clickhouse-cli/src/main.rs b/crates/ndc-clickhouse-cli/src/main.rs index 9b1801c..8e08178 100644 --- a/crates/ndc-clickhouse-cli/src/main.rs +++ b/crates/ndc-clickhouse-cli/src/main.rs @@ -7,8 +7,10 @@ use std::{ use clap::{Parser, Subcommand, ValueEnum}; use config::{ ColumnConfig, ConnectionConfig, PrimaryKey, ServerConfigFile, TableConfig, CONFIG_FILE_NAME, + CONFIG_SCHEMA_FILE_NAME, }; use database_introspection::{introspect_database, ColumnInfo, TableInfo}; +use schemars::schema_for; use tokio::fs; mod database_introspection; @@ -132,10 +134,11 @@ pub async fn update_tables_config( let table_infos = introspect_database(connection_config).await?; let file_path = configuration_dir.as_ref().join(CONFIG_FILE_NAME); + let schema_file_path = configuration_dir.as_ref().join(CONFIG_SCHEMA_FILE_NAME); let old_config = match fs::read_to_string(&file_path).await { Ok(file) => serde_json::from_str(&file) - .map_err(|err| format!("Error parsing {CONFIG_FILE_NAME}: {err}")), + .map_err(|err| format!("Error parsing {CONFIG_FILE_NAME}: {err}\n\nDelete {CONFIG_FILE_NAME} to create a fresh file")), Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(ServerConfigFile::default()), Err(_) => Err(format!("Error reading {CONFIG_FILE_NAME}")), }?; @@ -183,9 +186,18 @@ pub async fn update_tables_config( }) .collect(); - let config = ServerConfigFile { tables }; - - fs::write(&file_path, serde_json::to_string(&config)?).await?; + let config = ServerConfigFile { + schema: CONFIG_SCHEMA_FILE_NAME.to_owned(), + tables, + }; + let config_schema = schema_for!(ServerConfigFile); + + fs::write(&file_path, serde_json::to_string_pretty(&config)?).await?; + fs::write( + &schema_file_path, + serde_json::to_string_pretty(&config_schema)?, + ) + .await?; Ok(()) } diff --git a/crates/ndc-clickhouse/src/connector.rs b/crates/ndc-clickhouse/src/connector.rs index 8bb3ac7..772a4d5 100644 --- a/crates/ndc-clickhouse/src/connector.rs +++ b/crates/ndc-clickhouse/src/connector.rs @@ -140,8 +140,8 @@ pub async fn read_server_config( _ => ParseError::IoError(err), })?; - let ServerConfigFile { tables } = serde_json::from_str::(&config_file) - .map_err(|err| { + let ServerConfigFile { tables, schema: _ } = + serde_json::from_str::(&config_file).map_err(|err| { ParseError::ParseError(LocatedError { file_path, line: err.line(), From c59109087f6c3d976be758695fa4dd09f36e8047 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Sun, 31 Mar 2024 18:05:08 -0400 Subject: [PATCH 07/28] add changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 65b7a9d..2be85c4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] - Return error if empty list of query variables passed. Variables should be ommited or be a list with at least one member +- Use table comment as description for corresponding collection and object type +- Return json representation for applicable scalar types in schema response ## [0.2.1] From 73abe1c06e45e130fba8f465258ed989efeb6f91 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Mon, 1 Apr 2024 02:04:56 -0400 Subject: [PATCH 08/28] remove unused comparison column methods & variants --- .../sql/query_builder/comparison_column.rs | 96 ------------------- 1 file changed, 96 deletions(-) diff --git a/crates/ndc-clickhouse/src/sql/query_builder/comparison_column.rs b/crates/ndc-clickhouse/src/sql/query_builder/comparison_column.rs index 50455dd..d75c730 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder/comparison_column.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder/comparison_column.rs @@ -50,56 +50,6 @@ pub enum ComparisonColumn { }, } -#[derive(Debug)] -pub struct ComparisonColumnWithoutJoins(ComparisonColumn); - -impl ComparisonColumnWithoutJoins { - /// applies the expression without adding the joins - pub fn apply(&self, use_column: F) -> (Expr, Vec) - where - F: FnOnce(Expr) -> (Expr, Vec), - { - match &self.0 { - ComparisonColumn::Simple { - column_ident, - data_type: _, - } => use_column(column_ident.clone()), - ComparisonColumn::Flat { - column_ident, - joins: _, - additional_predicate, - data_type: _, - } => { - let (expr, additional_joins) = use_column(column_ident.clone()); - let expr = if let Some(additional_expr) = additional_predicate { - and_reducer(expr, additional_expr.clone()) - } else { - expr - }; - - (expr, additional_joins) - } - ComparisonColumn::Grouped { - column_ident, - joins: _, - values_ident, - data_type: _, - } => { - let (expr, additional_joins) = use_column(column_ident.clone().into_expr()); - let expr = Function::new_unquoted("arrayExists") - .args(vec![ - Lambda::new(vec![column_ident.clone()], expr) - .into_expr() - .into_arg(), - values_ident.clone().into_arg(), - ]) - .into_expr(); - (expr, additional_joins) - } - } - } -} - impl ComparisonColumn { pub fn new_simple(column_ident: Expr, data_type: String) -> Self { Self::Simple { @@ -140,52 +90,6 @@ impl ComparisonColumn { | ComparisonColumn::Grouped { data_type, .. } => data_type.to_owned(), } } - /// extract the joins for a column, if any. returns a new column with joins empty. - /// Should be used if the column will be cloned, to avoid duplicating joins. - /// todo: improve types so this this invariant cannot be broken. - /// the main problem to keep in mind is that we cannot allow the - pub fn extract_joins(self) -> (ComparisonColumnWithoutJoins, Vec) { - match self { - ComparisonColumn::Simple { - column_ident, - data_type, - } => ( - ComparisonColumnWithoutJoins(ComparisonColumn::Simple { - column_ident, - data_type, - }), - vec![], - ), - ComparisonColumn::Flat { - column_ident, - joins, - additional_predicate, - data_type, - } => ( - ComparisonColumnWithoutJoins(ComparisonColumn::Flat { - column_ident, - joins: vec![], - additional_predicate, - data_type, - }), - joins, - ), - ComparisonColumn::Grouped { - column_ident, - joins, - values_ident, - data_type, - } => ( - ComparisonColumnWithoutJoins(ComparisonColumn::Grouped { - column_ident, - joins: vec![], - values_ident, - data_type, - }), - joins, - ), - } - } /// consumes self, and wraps an expression and set of joins appropriately. /// if self is a simple column, this does nothing and returns the inputs as-is /// if self contains any joins, those are added to the set of joins passed as an argument From 928d7960fc7f9133dd4c4c632f4b025c8aca9af2 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Mon, 1 Apr 2024 02:05:46 -0400 Subject: [PATCH 09/28] remove unused imports --- crates/ndc-clickhouse/src/connector/handler/schema.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/ndc-clickhouse/src/connector/handler/schema.rs b/crates/ndc-clickhouse/src/connector/handler/schema.rs index fae1088..e1133ef 100644 --- a/crates/ndc-clickhouse/src/connector/handler/schema.rs +++ b/crates/ndc-clickhouse/src/connector/handler/schema.rs @@ -1,4 +1,4 @@ -use crate::schema::{ClickHouseDataType, ClickHouseTypeDefinition, Identifier, SingleQuotedString}; +use crate::schema::{ClickHouseDataType, ClickHouseTypeDefinition}; use config::{PrimaryKey, ServerConfig}; use ndc_sdk::{connector::SchemaError, models}; use std::{collections::BTreeMap, str::FromStr}; From a39d2888b6d1cbd3d480654609043ab305c1a303 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Mon, 1 Apr 2024 02:44:10 -0400 Subject: [PATCH 10/28] move common dependencies into common crate (instead of individual crates --- crates/{client => common}/Cargo.toml | 9 +++++---- .../src/clickhouse_datatype.rs} | 0 crates/{client/src/lib.rs => common/src/client.rs} | 3 ++- crates/{config/src/lib.rs => common/src/config.rs} | 0 crates/common/src/lib.rs | 3 +++ crates/config/Cargo.toml | 8 -------- crates/ndc-clickhouse-cli/Cargo.toml | 3 +-- crates/ndc-clickhouse-cli/src/database_introspection.rs | 2 +- crates/ndc-clickhouse-cli/src/main.rs | 2 +- crates/ndc-clickhouse/Cargo.toml | 4 +--- crates/ndc-clickhouse/src/connector.rs | 4 ++-- crates/ndc-clickhouse/src/connector/handler/explain.rs | 3 +-- crates/ndc-clickhouse/src/connector/handler/query.rs | 3 +-- crates/ndc-clickhouse/src/connector/handler/schema.rs | 7 +++++-- crates/ndc-clickhouse/src/connector/state.rs | 3 +-- crates/ndc-clickhouse/src/schema.rs | 2 -- crates/ndc-clickhouse/src/schema/type_definition.rs | 8 +++----- crates/ndc-clickhouse/src/sql/query_builder.rs | 2 +- .../ndc-clickhouse/src/sql/query_builder/typecasting.rs | 9 +++++---- 19 files changed, 33 insertions(+), 42 deletions(-) rename crates/{client => common}/Cargo.toml (74%) rename crates/{ndc-clickhouse/src/schema/clickhouse_data_type.rs => common/src/clickhouse_datatype.rs} (100%) rename crates/{client/src/lib.rs => common/src/client.rs} (98%) rename crates/{config/src/lib.rs => common/src/config.rs} (100%) create mode 100644 crates/common/src/lib.rs delete mode 100644 crates/config/Cargo.toml diff --git a/crates/client/Cargo.toml b/crates/common/Cargo.toml similarity index 74% rename from crates/client/Cargo.toml rename to crates/common/Cargo.toml index 903e4b4..6ba87a2 100644 --- a/crates/client/Cargo.toml +++ b/crates/common/Cargo.toml @@ -1,13 +1,14 @@ [package] -name = "client" +name = "common" version.workspace = true edition.workspace = true [dependencies] -config = { path = "../config" } reqwest = { version = "0.11.27", features = [ - "json", - "rustls-tls", + "json", + "rustls-tls", ], default-features = false } serde = { version = "1.0.197", features = ["derive"] } serde_json = "1.0.114" +peg = "0.8.2" +schemars = "0.8.16" diff --git a/crates/ndc-clickhouse/src/schema/clickhouse_data_type.rs b/crates/common/src/clickhouse_datatype.rs similarity index 100% rename from crates/ndc-clickhouse/src/schema/clickhouse_data_type.rs rename to crates/common/src/clickhouse_datatype.rs diff --git a/crates/client/src/lib.rs b/crates/common/src/client.rs similarity index 98% rename from crates/client/src/lib.rs rename to crates/common/src/client.rs index 5404486..29f99ef 100644 --- a/crates/client/src/lib.rs +++ b/crates/common/src/client.rs @@ -1,8 +1,9 @@ use std::error::Error; -use config::ConnectionConfig; use serde::{de::DeserializeOwned, Deserialize}; +use crate::config::ConnectionConfig; + pub fn get_http_client( _connection_config: &ConnectionConfig, ) -> Result> { diff --git a/crates/config/src/lib.rs b/crates/common/src/config.rs similarity index 100% rename from crates/config/src/lib.rs rename to crates/common/src/config.rs diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs new file mode 100644 index 0000000..46a185a --- /dev/null +++ b/crates/common/src/lib.rs @@ -0,0 +1,3 @@ +pub mod clickhouse_datatype; +pub mod client; +pub mod config; diff --git a/crates/config/Cargo.toml b/crates/config/Cargo.toml deleted file mode 100644 index 12f0f56..0000000 --- a/crates/config/Cargo.toml +++ /dev/null @@ -1,8 +0,0 @@ -[package] -name = "config" -version.workspace = true -edition.workspace = true - -[dependencies] -schemars = "0.8.16" -serde = { version = "1.0.197", features = ["derive"] } diff --git a/crates/ndc-clickhouse-cli/Cargo.toml b/crates/ndc-clickhouse-cli/Cargo.toml index ad9b21c..05d8572 100644 --- a/crates/ndc-clickhouse-cli/Cargo.toml +++ b/crates/ndc-clickhouse-cli/Cargo.toml @@ -5,8 +5,7 @@ edition.workspace = true [dependencies] clap = { version = "4.5.3", features = ["derive", "env"] } -client = { path = "../client" } -config = { path = "../config" } +common = { path = "../common" } schemars = "0.8.16" serde = { version = "1.0.197", features = ["derive"] } serde_json = "1.0.114" diff --git a/crates/ndc-clickhouse-cli/src/database_introspection.rs b/crates/ndc-clickhouse-cli/src/database_introspection.rs index 2c518b3..8f71aa2 100644 --- a/crates/ndc-clickhouse-cli/src/database_introspection.rs +++ b/crates/ndc-clickhouse-cli/src/database_introspection.rs @@ -2,7 +2,7 @@ use std::error::Error; use serde::{Deserialize, Serialize}; -use client::{execute_query, get_http_client}; +use common::client::{execute_query, get_http_client}; use super::ConnectionConfig; diff --git a/crates/ndc-clickhouse-cli/src/main.rs b/crates/ndc-clickhouse-cli/src/main.rs index 8e08178..ce752cc 100644 --- a/crates/ndc-clickhouse-cli/src/main.rs +++ b/crates/ndc-clickhouse-cli/src/main.rs @@ -5,7 +5,7 @@ use std::{ }; use clap::{Parser, Subcommand, ValueEnum}; -use config::{ +use common::config::{ ColumnConfig, ConnectionConfig, PrimaryKey, ServerConfigFile, TableConfig, CONFIG_FILE_NAME, CONFIG_SCHEMA_FILE_NAME, }; diff --git a/crates/ndc-clickhouse/Cargo.toml b/crates/ndc-clickhouse/Cargo.toml index 8e779a1..82dfd34 100644 --- a/crates/ndc-clickhouse/Cargo.toml +++ b/crates/ndc-clickhouse/Cargo.toml @@ -5,11 +5,9 @@ edition.workspace = true [dependencies] async-trait = "0.1.78" -client = { path = "../client" } -config = { path = "../config" } +common = { path = "../common" } indexmap = "2.1.0" ndc-sdk = { git = "https://github.com/hasura/ndc-sdk-rs", rev = "7b56fac", package = "ndc-sdk" } -peg = "0.8.2" prometheus = "0.13.3" reqwest = { version = "0.11.27", features = [ "json", diff --git a/crates/ndc-clickhouse/src/connector.rs b/crates/ndc-clickhouse/src/connector.rs index 772a4d5..a50cbfe 100644 --- a/crates/ndc-clickhouse/src/connector.rs +++ b/crates/ndc-clickhouse/src/connector.rs @@ -15,7 +15,7 @@ use ndc_sdk::{ }; use self::state::ServerState; -use config::{ConnectionConfig, ServerConfig, ServerConfigFile, CONFIG_FILE_NAME}; +use common::config::{ConnectionConfig, ServerConfig, ServerConfigFile, CONFIG_FILE_NAME}; #[derive(Debug, Clone, Default)] pub struct ClickhouseConnector; @@ -61,7 +61,7 @@ impl Connector for ClickhouseConnector { .await .map_err(|err| HealthError::Other(err.to_string().into()))?; - client::ping(&client, &configuration.connection) + common::client::ping(&client, &configuration.connection) .await .map_err(|err| HealthError::Other(err.to_string().into()))?; diff --git a/crates/ndc-clickhouse/src/connector/handler/explain.rs b/crates/ndc-clickhouse/src/connector/handler/explain.rs index 2dc9d4a..d063141 100644 --- a/crates/ndc-clickhouse/src/connector/handler/explain.rs +++ b/crates/ndc-clickhouse/src/connector/handler/explain.rs @@ -1,7 +1,6 @@ use std::collections::BTreeMap; -use client::execute_query; -use config::ServerConfig; +use common::{client::execute_query, config::ServerConfig}; use ndc_sdk::{connector::ExplainError, models}; use serde::{Deserialize, Serialize}; diff --git a/crates/ndc-clickhouse/src/connector/handler/query.rs b/crates/ndc-clickhouse/src/connector/handler/query.rs index 33e3322..f954a7f 100644 --- a/crates/ndc-clickhouse/src/connector/handler/query.rs +++ b/crates/ndc-clickhouse/src/connector/handler/query.rs @@ -1,5 +1,4 @@ -use client::execute_query; -use config::ServerConfig; +use common::{client::execute_query, config::ServerConfig}; use ndc_sdk::{connector::QueryError, models}; use crate::{connector::state::ServerState, sql::QueryBuilder}; diff --git a/crates/ndc-clickhouse/src/connector/handler/schema.rs b/crates/ndc-clickhouse/src/connector/handler/schema.rs index e1133ef..9ac0bab 100644 --- a/crates/ndc-clickhouse/src/connector/handler/schema.rs +++ b/crates/ndc-clickhouse/src/connector/handler/schema.rs @@ -1,5 +1,8 @@ -use crate::schema::{ClickHouseDataType, ClickHouseTypeDefinition}; -use config::{PrimaryKey, ServerConfig}; +use crate::schema::ClickHouseTypeDefinition; +use common::{ + clickhouse_datatype::ClickHouseDataType, + config::{PrimaryKey, ServerConfig}, +}; use ndc_sdk::{connector::SchemaError, models}; use std::{collections::BTreeMap, str::FromStr}; diff --git a/crates/ndc-clickhouse/src/connector/state.rs b/crates/ndc-clickhouse/src/connector/state.rs index 18adfa9..9d114b4 100644 --- a/crates/ndc-clickhouse/src/connector/state.rs +++ b/crates/ndc-clickhouse/src/connector/state.rs @@ -1,7 +1,6 @@ use std::{error::Error, sync::Arc}; -use client::get_http_client; -use config::ServerConfig; +use common::{client::get_http_client, config::ServerConfig}; use tokio::sync::RwLock; #[derive(Debug, Clone)] diff --git a/crates/ndc-clickhouse/src/schema.rs b/crates/ndc-clickhouse/src/schema.rs index 58949e1..817a905 100644 --- a/crates/ndc-clickhouse/src/schema.rs +++ b/crates/ndc-clickhouse/src/schema.rs @@ -1,9 +1,7 @@ mod binary_comparison_operator; -mod clickhouse_data_type; mod single_column_aggregate_function; mod type_definition; pub use binary_comparison_operator::*; -pub use clickhouse_data_type::*; pub use single_column_aggregate_function::*; pub use type_definition::*; diff --git a/crates/ndc-clickhouse/src/schema/type_definition.rs b/crates/ndc-clickhouse/src/schema/type_definition.rs index 61831ad..31efb0e 100644 --- a/crates/ndc-clickhouse/src/schema/type_definition.rs +++ b/crates/ndc-clickhouse/src/schema/type_definition.rs @@ -1,11 +1,9 @@ use std::collections::BTreeMap; +use common::clickhouse_datatype::{ClickHouseDataType, Identifier, SingleQuotedString}; use ndc_sdk::models; -use super::{ - ClickHouseBinaryComparisonOperator, ClickHouseDataType, - ClickHouseSingleColumnAggregateFunction, Identifier, -}; +use super::{ClickHouseBinaryComparisonOperator, ClickHouseSingleColumnAggregateFunction}; #[derive(Debug, Clone, strum::Display)] pub enum ClickHouseScalar { @@ -570,7 +568,7 @@ impl ClickHouseTypeDefinition { let name = namespace.to_owned(); let variants = variants .iter() - .map(|(super::SingleQuotedString(variant), _)| variant.to_owned()) + .map(|(SingleQuotedString(variant), _)| variant.to_owned()) .collect(); Self::Scalar(ClickHouseScalar::Enum { name, variants }) diff --git a/crates/ndc-clickhouse/src/sql/query_builder.rs b/crates/ndc-clickhouse/src/sql/query_builder.rs index bc0b85d..3926bee 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder.rs @@ -1,6 +1,6 @@ use std::str::FromStr; -use config::ServerConfig; +use common::config::ServerConfig; use indexmap::IndexMap; use ndc_sdk::models; diff --git a/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs b/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs index c221d1a..c83eeb6 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs @@ -1,12 +1,13 @@ use std::{collections::BTreeMap, fmt::Display, str::FromStr}; -use config::{ColumnConfig, ServerConfig}; +use common::{ + clickhouse_datatype::ClickHouseDataType, + config::{ColumnConfig, ServerConfig}, +}; use indexmap::IndexMap; use ndc_sdk::models; -use crate::schema::{ - ClickHouseDataType, ClickHouseSingleColumnAggregateFunction, ClickHouseTypeDefinition, -}; +use crate::schema::{ClickHouseSingleColumnAggregateFunction, ClickHouseTypeDefinition}; /// Tuple(rows , aggregates ) pub struct RowsetTypeString { From 6c54fea9b8002163bb18e146b5d800d5b726c483 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Mon, 1 Apr 2024 04:08:15 -0400 Subject: [PATCH 11/28] use maps for config, move parsing data types into init and configuration times rather than runtime --- crates/common/src/clickhouse_datatype.rs | 3 +- crates/common/src/config.rs | 31 ++++--- crates/ndc-clickhouse-cli/src/main.rs | 92 +++++++++++++------ crates/ndc-clickhouse/src/connector.rs | 68 ++++++++++++-- .../src/connector/handler/schema.rs | 35 +++---- crates/ndc-clickhouse/src/sql/ast.rs | 13 ++- .../ndc-clickhouse/src/sql/query_builder.rs | 43 +++------ .../sql/query_builder/comparison_column.rs | 16 ++-- .../src/sql/query_builder/typecasting.rs | 44 ++------- 9 files changed, 201 insertions(+), 144 deletions(-) diff --git a/crates/common/src/clickhouse_datatype.rs b/crates/common/src/clickhouse_datatype.rs index 1a50d9c..6aed85b 100644 --- a/crates/common/src/clickhouse_datatype.rs +++ b/crates/common/src/clickhouse_datatype.rs @@ -76,8 +76,7 @@ impl Display for AggregateFunctionParameter { /// A parsed representation of a clickhouse datatype string /// This should support the full scope of clickhouse types /// To create one from a string slice, use try_from() -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -#[serde(rename_all = "snake_case", tag = "type")] +#[derive(Debug, Clone, PartialEq)] pub enum ClickHouseDataType { Nullable(Box), Bool, diff --git a/crates/common/src/config.rs b/crates/common/src/config.rs index d5dd006..4f5437d 100644 --- a/crates/common/src/config.rs +++ b/crates/common/src/config.rs @@ -1,19 +1,27 @@ +use std::collections::BTreeMap; + use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use crate::clickhouse_datatype::ClickHouseDataType; + #[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] pub struct ServerConfigFile { #[serde(rename = "$schema")] pub schema: String, /// A list of tables available in this database - pub tables: Vec, + pub tables: BTreeMap>, } -#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] +#[derive(Debug, Default, Clone)] pub struct ServerConfig { /// the connection part of the config is not part of the config file pub connection: ConnectionConfig, - pub tables: Vec, + /// The map key is a unique table alias that defaults to defaults to "_", + /// except for tables in the "default" schema where the table name is used + /// This is the name exposed to the engine, and may be configured by users. + /// When the configuration is updated, the table is identified by name and schema, and changes to the alias are preserved. + pub tables: BTreeMap>, } #[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] @@ -24,34 +32,33 @@ pub struct ConnectionConfig { } #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct TableConfig { +pub struct TableConfig { /// The table name pub name: String, /// The table schema pub schema: String, - /// The table alias defaults to "_", except for tables in the "default" schema where the table name is used - /// This is the name exposed to the engine, and may be configured by users. This is preserved through config updates - pub alias: String, /// Comments are sourced from the database table comment pub comment: Option, pub primary_key: Option, - pub columns: Vec, + /// The map key is a column alias identifying the table and may be customized. + /// It defaults to the table name. + /// When the configuration is updated, the column is identified by name, and changes to the alias are preserved. + pub columns: BTreeMap>, } #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct PrimaryKey { pub name: String, + /// The names of columns in this primary key pub columns: Vec, } #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct ColumnConfig { +pub struct ColumnConfig { /// The column name pub name: String, - /// The column alias defaults to the column name, but may be changed by users. This is preserved through config updates - pub alias: String, /// The column data type - pub data_type: String, + pub data_type: ColumnDataType, } pub const CONFIG_FILE_NAME: &str = "configuration.json"; diff --git a/crates/ndc-clickhouse-cli/src/main.rs b/crates/ndc-clickhouse-cli/src/main.rs index ce752cc..0f6d092 100644 --- a/crates/ndc-clickhouse-cli/src/main.rs +++ b/crates/ndc-clickhouse-cli/src/main.rs @@ -1,13 +1,18 @@ use std::{ + collections::BTreeMap, env, error::Error, path::{Path, PathBuf}, + str::FromStr, }; use clap::{Parser, Subcommand, ValueEnum}; -use common::config::{ - ColumnConfig, ConnectionConfig, PrimaryKey, ServerConfigFile, TableConfig, CONFIG_FILE_NAME, - CONFIG_SCHEMA_FILE_NAME, +use common::{ + clickhouse_datatype::ClickHouseDataType, + config::{ + ColumnConfig, ConnectionConfig, PrimaryKey, ServerConfigFile, TableConfig, + CONFIG_FILE_NAME, CONFIG_SCHEMA_FILE_NAME, + }, }; use database_introspection::{introspect_database, ColumnInfo, TableInfo}; use schemars::schema_for; @@ -147,11 +152,11 @@ pub async fn update_tables_config( .iter() .map(|table| { let old_table_config = get_old_table_config(table, &old_config.tables); + let table_alias = get_table_alias(table, &old_table_config); - TableConfig { + let table_config = TableConfig { name: table.table_name.to_owned(), schema: table.table_schema.to_owned(), - alias: get_table_alias(table, &old_table_config), comment: table.table_comment.to_owned(), primary_key: table.primary_key.as_ref().map(|primary_key| PrimaryKey { name: primary_key.to_owned(), @@ -173,18 +178,36 @@ pub async fn update_tables_config( columns: table .columns .iter() - .map(|column| ColumnConfig { - name: column.column_name.to_owned(), - alias: get_column_alias( + .map(|column| { + let column_alias = get_column_alias( column, &get_old_column_config(column, &old_table_config), - ), - data_type: column.data_type.to_owned(), + ); + // check if data type can be parsed, to give early warning to the user + // this is preferable to failing later while handling requests + let _data_type = + ClickHouseDataType::from_str(&column.data_type).map_err(|err| { + format!( + "Unable to parse data type \"{}\" for column {} in table {}.{}: {}", + column.data_type, + column.column_name, + table.table_schema, + table.table_name, + err + ) + })?; + let column_config = ColumnConfig { + name: column.column_name.to_owned(), + data_type: column.data_type.to_owned(), + }; + Ok((column_alias, column_config)) }) - .collect(), - } + .collect::>()?, + }; + + Ok((table_alias, table_config)) }) - .collect(); + .collect::>()?; let config = ServerConfigFile { schema: CONFIG_SCHEMA_FILE_NAME.to_owned(), @@ -202,33 +225,45 @@ pub async fn update_tables_config( Ok(()) } +/// Get old table config, if any +/// Note this uses the table name and schema to search, not the alias +/// This allows custom aliases to be preserved fn get_old_table_config<'a>( table: &TableInfo, - old_tables: &'a [TableConfig], -) -> Option<&'a TableConfig> { - old_tables.iter().find(|old_table| { + old_tables: &'a BTreeMap>, +) -> Option<(&'a String, &'a TableConfig)> { + old_tables.iter().find(|(_, old_table)| { old_table.name == table.table_name && old_table.schema == table.table_schema }) } +/// Get old column config, if any +/// Note this uses the column name to search, not the alias +/// This allows custom aliases to be preserved fn get_old_column_config<'a>( column: &ColumnInfo, - old_table: &Option<&'a TableConfig>, -) -> Option<&'a ColumnConfig> { + old_table: &Option<(&'a String, &'a TableConfig)>, +) -> Option<(&'a String, &'a ColumnConfig)> { old_table - .map(|old_table| { + .map(|(_, old_table)| { old_table .columns .iter() - .find(|old_column| old_column.name == column.column_name) + .find(|(_, old_column)| old_column.name == column.column_name) }) .flatten() } -fn get_table_alias(table: &TableInfo, old_table: &Option<&TableConfig>) -> String { +/// Table aliases default to _, +/// except for tables in the default schema where the table name is used. +/// Prefer existing, old aliases over creating a new one +fn get_table_alias( + table: &TableInfo, + old_table: &Option<(&String, &TableConfig)>, +) -> String { // to preserve any customization, aliases are kept throught updates - if let Some(old_table) = old_table { - old_table.alias.to_owned() + if let Some((old_table_alias, _)) = old_table { + old_table_alias.to_string() } else if table.table_schema == "default" { table.table_name.to_owned() } else { @@ -236,10 +271,15 @@ fn get_table_alias(table: &TableInfo, old_table: &Option<&TableConfig>) -> Strin } } -fn get_column_alias(column: &ColumnInfo, old_column: &Option<&ColumnConfig>) -> String { +/// Table aliases default to the column namee +/// Prefer existing, old aliases over creating a new one +fn get_column_alias( + column: &ColumnInfo, + old_column: &Option<(&String, &ColumnConfig)>, +) -> String { // to preserve any customization, aliases are kept throught updates - if let Some(old_column) = old_column { - old_column.alias.to_owned() + if let Some((old_column_alias, _)) = old_column { + old_column_alias.to_string() } else { column.column_name.to_owned() } diff --git a/crates/ndc-clickhouse/src/connector.rs b/crates/ndc-clickhouse/src/connector.rs index a50cbfe..e225898 100644 --- a/crates/ndc-clickhouse/src/connector.rs +++ b/crates/ndc-clickhouse/src/connector.rs @@ -1,21 +1,28 @@ pub mod handler; pub mod state; -use std::{env, path::Path}; +use std::{env, path::Path, str::FromStr}; use tokio::fs; use async_trait::async_trait; use ndc_sdk::{ connector::{ Connector, ConnectorSetup, ExplainError, FetchMetricsError, HealthError, - InitializationError, LocatedError, MutationError, ParseError, QueryError, SchemaError, + InitializationError, InvalidNode, InvalidNodes, KeyOrIndex, LocatedError, MutationError, + ParseError, QueryError, SchemaError, }, json_response::JsonResponse, models, }; use self::state::ServerState; -use common::config::{ConnectionConfig, ServerConfig, ServerConfigFile, CONFIG_FILE_NAME}; +use common::{ + clickhouse_datatype::ClickHouseDataType, + config::{ + ColumnConfig, ConnectionConfig, ServerConfig, ServerConfigFile, TableConfig, + CONFIG_FILE_NAME, + }, +}; #[derive(Debug, Clone, Default)] pub struct ClickhouseConnector; @@ -140,17 +147,66 @@ pub async fn read_server_config( _ => ParseError::IoError(err), })?; - let ServerConfigFile { tables, schema: _ } = + let ServerConfigFile { schema: _, tables } = serde_json::from_str::(&config_file).map_err(|err| { ParseError::ParseError(LocatedError { - file_path, + file_path: file_path.to_owned(), line: err.line(), column: err.column(), message: err.to_string(), }) })?; - Ok(ServerConfig { connection, tables }) + let config = ServerConfig { + connection, + tables: tables + .into_iter() + .map(|(table_alias, table_config)| { + Ok(( + table_alias.clone(), + TableConfig { + name: table_config.name, + schema: table_config.schema, + comment: table_config.comment, + primary_key: table_config.primary_key, + columns: table_config + .columns + .into_iter() + .map(|(column_alias, column_config)| { + Ok(( + column_alias.clone(), + ColumnConfig { + name: column_config.name, + data_type: ClickHouseDataType::from_str( + &column_config.data_type, + ) + .map_err(|_err| { + ParseError::ValidateError(InvalidNodes(vec![ + InvalidNode { + file_path: file_path.to_owned(), + node_path: vec![ + KeyOrIndex::Key("tables".to_string()), + KeyOrIndex::Key(table_alias.to_owned()), + KeyOrIndex::Key("columns".to_string()), + KeyOrIndex::Key(column_alias.to_owned()), + KeyOrIndex::Key("data_type".to_string()), + ], + message: "Unable to parse data type" + .to_string(), + }, + ])) + })?, + }, + )) + }) + .collect::>()?, + }, + )) + }) + .collect::>()?, + }; + + Ok(config) } fn get_connection_config() -> Result { diff --git a/crates/ndc-clickhouse/src/connector/handler/schema.rs b/crates/ndc-clickhouse/src/connector/handler/schema.rs index 9ac0bab..ba427d8 100644 --- a/crates/ndc-clickhouse/src/connector/handler/schema.rs +++ b/crates/ndc-clickhouse/src/connector/handler/schema.rs @@ -1,25 +1,20 @@ use crate::schema::ClickHouseTypeDefinition; -use common::{ - clickhouse_datatype::ClickHouseDataType, - config::{PrimaryKey, ServerConfig}, -}; +use common::config::{PrimaryKey, ServerConfig}; use ndc_sdk::{connector::SchemaError, models}; -use std::{collections::BTreeMap, str::FromStr}; +use std::collections::BTreeMap; pub async fn schema(configuration: &ServerConfig) -> Result { let mut scalar_type_definitions = BTreeMap::new(); let mut object_type_definitions = vec![]; - for table in &configuration.tables { + for (table_alias, table_config) in &configuration.tables { let mut fields = vec![]; - for column in &table.columns { - let data_type = ClickHouseDataType::from_str(column.data_type.as_str()) - .map_err(|err| SchemaError::Other(Box::new(err)))?; + for (column_alias, column_config) in &table_config.columns { let type_definition = ClickHouseTypeDefinition::from_table_column( - &data_type, - &column.alias, - &table.alias, + &column_config.data_type, + &column_alias, + &table_alias, ); let (scalars, objects) = type_definition.type_definitions(); @@ -35,7 +30,7 @@ pub async fn schema(configuration: &ServerConfig) -> Result Result Result Self { + pub fn new(value: Value, data_type: ClickHouseDataType) -> Self { Self::Value { data_type, value } } pub fn into_expr(self) -> Expr { diff --git a/crates/ndc-clickhouse/src/sql/query_builder.rs b/crates/ndc-clickhouse/src/sql/query_builder.rs index 3926bee..e63866c 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder.rs @@ -1,6 +1,6 @@ use std::str::FromStr; -use common::config::ServerConfig; +use common::{clickhouse_datatype::ClickHouseDataType, config::ServerConfig}; use indexmap::IndexMap; use ndc_sdk::models; @@ -129,7 +129,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { Value::SingleQuotedString(serde_json::to_string(&variable_values).map_err( |err| QueryBuilderError::CannotSerializeVariables(err.to_string()), )?), - "String".to_owned(), + ClickHouseDataType::String, ) .into_expr(); @@ -966,7 +966,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let right_col_type = match operator { ClickHouseBinaryComparisonOperator::In | ClickHouseBinaryComparisonOperator::NotIn => { - format!("Array({})", left_col.data_type()) + ClickHouseDataType::Array(Box::new(left_col.data_type())) } _ => left_col.data_type(), }; @@ -1621,8 +1621,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let table = self .configuration .tables - .iter() - .find(|t| t.alias == collection_alias) + .get(collection_alias) .ok_or_else(|| QueryBuilderError::UnknownTable(collection_alias.to_owned()))?; Ok(ObjectName(vec![ @@ -1639,20 +1638,12 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let table = self .configuration .tables - .iter() - .find(|t| t.alias == collection_alias) + .get(collection_alias) .ok_or_else(|| QueryBuilderError::UnknownTable(collection_alias.to_owned()))?; - let column = table - .columns - .iter() - .find(|c| c.alias == column_alias) - .ok_or_else(|| { - QueryBuilderError::UnknownColumn( - column_alias.to_owned(), - collection_alias.to_owned(), - ) - })?; + let column = table.columns.get(column_alias).ok_or_else(|| { + QueryBuilderError::UnknownColumn(column_alias.to_owned(), collection_alias.to_owned()) + })?; Ok(Ident::new_quoted(&column.name)) } @@ -1660,25 +1651,17 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { &self, column_alias: &str, collection_alias: &str, - ) -> Result { + ) -> Result { // todo: get column name based on column alias and collection alias let table = self .configuration .tables - .iter() - .find(|t| t.alias == collection_alias) + .get(collection_alias) .ok_or_else(|| QueryBuilderError::UnknownTable(collection_alias.to_owned()))?; - let column = table - .columns - .iter() - .find(|c| c.alias == column_alias) - .ok_or_else(|| { - QueryBuilderError::UnknownColumn( - column_alias.to_owned(), - collection_alias.to_owned(), - ) - })?; + let column = table.columns.get(column_alias).ok_or_else(|| { + QueryBuilderError::UnknownColumn(column_alias.to_owned(), collection_alias.to_owned()) + })?; // todo: revise whether we want to get the data type from the type definition instead Ok(column.data_type.to_owned()) diff --git a/crates/ndc-clickhouse/src/sql/query_builder/comparison_column.rs b/crates/ndc-clickhouse/src/sql/query_builder/comparison_column.rs index d75c730..b496fa7 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder/comparison_column.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder/comparison_column.rs @@ -1,3 +1,5 @@ +use common::clickhouse_datatype::ClickHouseDataType; + use crate::sql::ast::{Expr, Function, Ident, Join, Lambda}; use super::and_reducer; @@ -28,7 +30,7 @@ pub enum ComparisonColumn { /// For convenience, can be used with any expression as it won't wrap them in the arrayExists function Simple { column_ident: Expr, - data_type: String, + data_type: ClickHouseDataType, }, /// The Flat variant does not group rows with a subquery. This can be safely used inside exists subqueries, /// as duplicating rows there does not matter. Like the Simple variant, when applied we do not wrap the expression in the arrayExists function @@ -36,7 +38,7 @@ pub enum ComparisonColumn { column_ident: Expr, joins: Vec, additional_predicate: Option, - data_type: String, + data_type: ClickHouseDataType, }, /// The Grouped variant contains a single join which groups all values from the related column into an array. /// When applied, the expression returned from the closure is wrapped in the `arrayExists` function, and this expression will be evaluated against all values in the array. @@ -46,12 +48,12 @@ pub enum ComparisonColumn { column_ident: Ident, joins: Vec, values_ident: Expr, - data_type: String, + data_type: ClickHouseDataType, }, } impl ComparisonColumn { - pub fn new_simple(column_ident: Expr, data_type: String) -> Self { + pub fn new_simple(column_ident: Expr, data_type: ClickHouseDataType) -> Self { Self::Simple { column_ident, data_type, @@ -61,7 +63,7 @@ impl ComparisonColumn { column_ident: Expr, joins: Vec, additional_predicate: Option, - data_type: String, + data_type: ClickHouseDataType, ) -> Self { Self::Flat { column_ident, @@ -74,7 +76,7 @@ impl ComparisonColumn { column_ident: Ident, join: Join, values_ident: Expr, - data_type: String, + data_type: ClickHouseDataType, ) -> Self { Self::Grouped { column_ident, @@ -83,7 +85,7 @@ impl ComparisonColumn { data_type, } } - pub fn data_type(&self) -> String { + pub fn data_type(&self) -> ClickHouseDataType { match self { ComparisonColumn::Simple { data_type, .. } | ComparisonColumn::Flat { data_type, .. } diff --git a/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs b/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs index c83eeb6..30a89a9 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs @@ -72,9 +72,8 @@ impl AggregatesTypeString { function, } => { let column = get_column(column_alias, table_alias, config)?; - let data_type = get_data_type(column, column_alias, table_alias)?; let type_definition = ClickHouseTypeDefinition::from_table_column( - &data_type, + &column.data_type, column_alias, table_alias, ); @@ -132,10 +131,8 @@ impl RowsTypeString { todo!("support nested field selection") } let column = get_column(column_alias, table_alias, config)?; - let data_type = - get_data_type(&column, &column_alias, &table_alias)?; let type_definition = ClickHouseTypeDefinition::from_table_column( - &data_type, + &column.data_type, column_alias, table_alias, ); @@ -240,19 +237,17 @@ fn get_column<'a>( column_alias: &str, table_alias: &str, config: &'a ServerConfig, -) -> Result<&'a ColumnConfig, TypeStringError> { +) -> Result<&'a ColumnConfig, TypeStringError> { let table = config .tables - .iter() - .find(|t| t.alias == table_alias) + .get(table_alias) .ok_or_else(|| TypeStringError::UnknownTable { table: table_alias.to_owned(), })?; let column = table .columns - .iter() - .find(|c| c.alias == column_alias) + .get(column_alias) .ok_or_else(|| TypeStringError::UnknownColumn { table: table_alias.to_owned(), column: column_alias.to_owned(), @@ -261,20 +256,6 @@ fn get_column<'a>( Ok(column) } -fn get_data_type( - column: &ColumnConfig, - column_alias: &str, - table_alias: &str, -) -> Result { - ClickHouseDataType::from_str(&column.data_type).map_err(|_err| { - TypeStringError::CannotParseTypeString { - table: table_alias.to_owned(), - column: column_alias.to_owned(), - data_type: column.data_type.to_owned(), - } - }) -} - #[derive(Debug)] pub enum TypeStringError { UnknownTable { @@ -284,15 +265,10 @@ pub enum TypeStringError { table: String, column: String, }, - CannotParseTypeString { - table: String, - column: String, - data_type: String, - }, UnknownAggregateFunction { table: String, column: String, - data_type: String, + data_type: ClickHouseDataType, function: String, }, MissingRelationship(String), @@ -305,14 +281,6 @@ impl Display for TypeStringError { TypeStringError::UnknownColumn { table, column } => { write!(f, "Unknown column: {column} in table: {table}") } - TypeStringError::CannotParseTypeString { - table, - column, - data_type, - } => write!( - f, - "Unable to parse data type: {data_type} for column: {column} in table: {table}" - ), TypeStringError::UnknownAggregateFunction { table, column, From f7a89dc81a9b013a8e2a0421e31dc1e559285200 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Mon, 1 Apr 2024 06:07:44 -0400 Subject: [PATCH 12/28] support identifier data type --- crates/common/src/clickhouse_datatype.rs | 30 ++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/crates/common/src/clickhouse_datatype.rs b/crates/common/src/clickhouse_datatype.rs index 6aed85b..3ca89dc 100644 --- a/crates/common/src/clickhouse_datatype.rs +++ b/crates/common/src/clickhouse_datatype.rs @@ -143,6 +143,8 @@ pub enum ClickHouseDataType { arguments: Vec, }, Nothing, + CompoundIdentifier(Vec), + SingleIdentifier(Identifier), } impl Display for ClickHouseDataType { @@ -266,6 +268,20 @@ impl Display for ClickHouseDataType { write!(f, ")") } DT::Nothing => write!(f, "Nothing"), + DT::CompoundIdentifier(identifiers) => { + let mut first = true; + for identifier in identifiers { + if first { + first = false; + } else { + write!(f, ".")?; + } + + write!(f, "{identifier}")?; + } + Ok(()) + } + DT::SingleIdentifier(identifier) => write!(f, "{identifier}"), } } } @@ -321,6 +337,8 @@ peg::parser! { / tuple() / r#enum() / nothing() + / compound_identifier() + / single_identifier() rule nullable() -> ClickHouseDataType = "Nullable(" t:data_type() ")" { CDT::Nullable(Box::new(t)) } rule uint8() -> ClickHouseDataType = "UInt8" { CDT::UInt8 } rule uint16() -> ClickHouseDataType = "UInt16" { CDT::UInt16 } @@ -361,8 +379,8 @@ peg::parser! { rule aggregate_function() -> ClickHouseDataType = "AggregateFunction(" f:aggregate_function_definition() ", " a:(data_type() ** ", ") ")" { CDT::AggregateFunction { function: f, arguments: a }} rule simple_aggregate_function() -> ClickHouseDataType = "SimpleAggregateFunction(" f:aggregate_function_definition() ", " a:(data_type() ** ", ") ")" { CDT::SimpleAggregateFunction { function: f, arguments: a }} rule nothing() -> ClickHouseDataType = "Nothing" { CDT::Nothing } - - + rule compound_identifier() -> ClickHouseDataType = i:(i:identifier() ** ".") { CDT::CompoundIdentifier(i) } + rule single_identifier() -> ClickHouseDataType = i:identifier() { CDT::SingleIdentifier(i) } rule aggregate_function_definition() -> AggregateFunctionDefinition = n:identifier() p:("(" p:(aggregate_function_parameter() ** ", ") ")" { p })? { AggregateFunctionDefinition { name: n, parameters: p }} rule aggregate_function_parameter() -> AggregateFunctionParameter = s:single_quoted_string_value() { AggregateFunctionParameter::SingleQuotedString(s)} @@ -444,6 +462,14 @@ fn can_parse_clickhouse_data_type() { ), ]), ), + ( + "\"t1\".t2.`t3`", + CDT::CompoundIdentifier(vec![ + Identifier::DoubleQuoted("t1".to_string()), + Identifier::Unquoted("t2".to_string()), + Identifier::BacktickQuoted("t3".to_string()), + ]), + ), ]; for (s, t) in data_types { From 61235cb94c539b378b879061a316c56d13706d7d Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Mon, 1 Apr 2024 10:37:13 -0400 Subject: [PATCH 13/28] separate parser and target type, add parameterized query parser and type --- crates/common/src/clickhouse_datatype.rs | 479 ------------------ crates/common/src/clickhouse_parser.rs | 226 +++++++++ .../common/src/clickhouse_parser/datatype.rs | 299 +++++++++++ .../clickhouse_parser/parameterized_query.rs | 57 +++ crates/common/src/config.rs | 70 ++- crates/common/src/lib.rs | 2 +- 6 files changed, 646 insertions(+), 487 deletions(-) delete mode 100644 crates/common/src/clickhouse_datatype.rs create mode 100644 crates/common/src/clickhouse_parser.rs create mode 100644 crates/common/src/clickhouse_parser/datatype.rs create mode 100644 crates/common/src/clickhouse_parser/parameterized_query.rs diff --git a/crates/common/src/clickhouse_datatype.rs b/crates/common/src/clickhouse_datatype.rs deleted file mode 100644 index 3ca89dc..0000000 --- a/crates/common/src/clickhouse_datatype.rs +++ /dev/null @@ -1,479 +0,0 @@ -use std::{fmt::Display, str::FromStr}; - -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct SingleQuotedString(pub String); - -impl Display for SingleQuotedString { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "'{}'", self.0) - } -} - -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub enum Identifier { - DoubleQuoted(String), - BacktickQuoted(String), - Unquoted(String), -} - -impl Display for Identifier { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Identifier::DoubleQuoted(s) => write!(f, "\"{s}\""), - Identifier::BacktickQuoted(s) => write!(f, "`{s}`"), - Identifier::Unquoted(s) => write!(f, "{s}"), - } - } -} - -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub struct AggregateFunctionDefinition { - pub name: Identifier, - pub parameters: Option>, -} - -impl Display for AggregateFunctionDefinition { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.name)?; - - if let Some(parameters) = &self.parameters { - write!(f, "(")?; - let mut first = true; - for parameter in parameters { - if first { - first = false; - } else { - write!(f, ", ")?; - } - write!(f, "{parameter}")?; - } - write!(f, ")")?; - } - - Ok(()) - } -} - -#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] -pub enum AggregateFunctionParameter { - SingleQuotedString(SingleQuotedString), - FloatingPoint(f64), - Integer(u32), -} - -impl Display for AggregateFunctionParameter { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - AggregateFunctionParameter::SingleQuotedString(s) => write!(f, "{s}"), - AggregateFunctionParameter::FloatingPoint(n) => write!(f, "{n}"), - AggregateFunctionParameter::Integer(n) => write!(f, "{n}"), - } - } -} - -/// A parsed representation of a clickhouse datatype string -/// This should support the full scope of clickhouse types -/// To create one from a string slice, use try_from() -#[derive(Debug, Clone, PartialEq)] -pub enum ClickHouseDataType { - Nullable(Box), - Bool, - String, - FixedString(u32), - UInt8, - UInt16, - UInt32, - UInt64, - UInt128, - UInt256, - Int8, - Int16, - Int32, - Int64, - Int128, - Int256, - Float32, - Float64, - Decimal { - precision: u32, - scale: u32, - }, - Decimal32 { - scale: u32, - }, - Decimal64 { - scale: u32, - }, - Decimal128 { - scale: u32, - }, - Decimal256 { - scale: u32, - }, - Date, - Date32, - DateTime { - timezone: Option, - }, - DateTime64 { - precision: u32, - timezone: Option, - }, - Json, - Uuid, - IPv4, - IPv6, - LowCardinality(Box), - Nested(Vec<(Identifier, ClickHouseDataType)>), - Array(Box), - Map { - key: Box, - value: Box, - }, - Tuple(Vec<(Option, ClickHouseDataType)>), - Enum(Vec<(SingleQuotedString, Option)>), - SimpleAggregateFunction { - function: AggregateFunctionDefinition, - arguments: Vec, - }, - AggregateFunction { - function: AggregateFunctionDefinition, - arguments: Vec, - }, - Nothing, - CompoundIdentifier(Vec), - SingleIdentifier(Identifier), -} - -impl Display for ClickHouseDataType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - use ClickHouseDataType as DT; - match self { - DT::Nullable(inner) => write!(f, "Nullable({inner})"), - DT::Bool => write!(f, "Bool"), - DT::String => write!(f, "String"), - DT::FixedString(n) => write!(f, "FixedString({n})"), - DT::UInt8 => write!(f, "UInt8"), - DT::UInt16 => write!(f, "UInt16"), - DT::UInt32 => write!(f, "UInt32"), - DT::UInt64 => write!(f, "UInt64"), - DT::UInt128 => write!(f, "UInt128"), - DT::UInt256 => write!(f, "UInt256"), - DT::Int8 => write!(f, "Int8"), - DT::Int16 => write!(f, "Int16"), - DT::Int32 => write!(f, "Int32"), - DT::Int64 => write!(f, "Int64"), - DT::Int128 => write!(f, "Int128"), - DT::Int256 => write!(f, "Int256"), - DT::Float32 => write!(f, "Float32"), - DT::Float64 => write!(f, "Float64"), - DT::Decimal { precision, scale } => write!(f, "Decimal({precision}, {scale})"), - DT::Decimal32 { scale } => write!(f, "Decimal32({scale})"), - DT::Decimal64 { scale } => write!(f, "Decimal64({scale})"), - DT::Decimal128 { scale } => write!(f, "Decimal128({scale})"), - DT::Decimal256 { scale } => write!(f, "Decimal256({scale})"), - DT::Date => write!(f, "Date"), - DT::Date32 => write!(f, "Date32"), - DT::DateTime { timezone } => { - write!(f, "DateTime")?; - if let Some(tz) = timezone { - write!(f, "({tz})")?; - } - Ok(()) - } - DT::DateTime64 { - precision, - timezone, - } => { - write!(f, "DateTime64({precision}")?; - if let Some(tz) = timezone { - write!(f, ", {tz}")?; - } - write!(f, ")") - } - DT::Json => write!(f, "JSON"), - DT::Uuid => write!(f, "UUID"), - DT::IPv4 => write!(f, "IPv4"), - DT::IPv6 => write!(f, "IPv6"), - DT::LowCardinality(inner) => write!(f, "LowCardinality({inner})"), - DT::Nested(elements) => { - write!(f, "Nested(")?; - let mut first = true; - for (name, value) in elements { - if first { - first = false; - } else { - write!(f, ", ")?; - } - write!(f, "{name} {value}")?; - } - write!(f, ")") - } - DT::Array(inner) => write!(f, "Array({inner})"), - DT::Map { key, value } => write!(f, "Map({key}, {value})"), - DT::Tuple(elements) => { - write!(f, "Tuple(")?; - let mut first = true; - for (name, t) in elements { - if first { - first = false; - } else { - write!(f, ", ")?; - } - if let Some(name) = name { - write!(f, "{name} ")?; - } - write!(f, "{t}")?; - } - write!(f, ")") - } - DT::Enum(variants) => { - write!(f, "Enum(")?; - let mut first = true; - for (variant, id) in variants { - if first { - first = false; - } else { - write!(f, ", ")?; - } - - write!(f, "{variant}")?; - - if let Some(id) = id { - write!(f, " = {id}")?; - } - } - write!(f, ")") - } - DT::SimpleAggregateFunction { - function, - arguments, - } => { - write!(f, "SimpleAggregateFunction({function}")?; - for argument in arguments { - write!(f, ", {argument}")?; - } - write!(f, ")") - } - DT::AggregateFunction { - function, - arguments, - } => { - write!(f, "AggregateFunction({function}")?; - for argument in arguments { - write!(f, ", {argument}")?; - } - write!(f, ")") - } - DT::Nothing => write!(f, "Nothing"), - DT::CompoundIdentifier(identifiers) => { - let mut first = true; - for identifier in identifiers { - if first { - first = false; - } else { - write!(f, ".")?; - } - - write!(f, "{identifier}")?; - } - Ok(()) - } - DT::SingleIdentifier(identifier) => write!(f, "{identifier}"), - } - } -} - -impl FromStr for ClickHouseDataType { - type Err = peg::error::ParseError; - - /// Attempt to create a ClickHouseDataType from a string representation of the type. - /// May return a parse error if the type string is malformed, or if our implementation is out of date or incorrect - fn from_str(s: &str) -> Result { - clickhouse_parser::data_type(s) - } -} - -peg::parser! { - grammar clickhouse_parser() for str { - use ClickHouseDataType as CDT; - pub rule data_type() -> ClickHouseDataType = nullable() - / uint256() - / uint128() - / uint64() - / uint32() - / uint16() - / uint8() - / int256() - / int128() - / int64() - / int32() - / int16() - / int8() - / float32() - / float64() - / decimal256() - / decimal128() - / decimal64() - / decimal32() - / decimal() - / bool() - / string() - / fixed_string() - / date_time64() - / date_time() - / date32() - / date() - / json() - / uuid() - / ipv4() - / ipv6() - / low_cardinality() - / nested() - / array() - / map() - / tuple() - / r#enum() - / nothing() - / compound_identifier() - / single_identifier() - rule nullable() -> ClickHouseDataType = "Nullable(" t:data_type() ")" { CDT::Nullable(Box::new(t)) } - rule uint8() -> ClickHouseDataType = "UInt8" { CDT::UInt8 } - rule uint16() -> ClickHouseDataType = "UInt16" { CDT::UInt16 } - rule uint32() -> ClickHouseDataType = "UInt32" { CDT::UInt32 } - rule uint64() -> ClickHouseDataType = "UInt64" { CDT::UInt64 } - rule uint128() -> ClickHouseDataType = "UInt128" { CDT::UInt128 } - rule uint256() -> ClickHouseDataType = "UInt256" { CDT::UInt256 } - rule int8() -> ClickHouseDataType = "Int8" { CDT::Int8 } - rule int16() -> ClickHouseDataType = "Int16" { CDT::Int16 } - rule int32() -> ClickHouseDataType = "Int32" { CDT::Int32 } - rule int64() -> ClickHouseDataType = "Int64" { CDT::Int64 } - rule int128() -> ClickHouseDataType = "Int128" { CDT::Int128 } - rule int256() -> ClickHouseDataType = "Int256" { CDT::Int256 } - rule float32() -> ClickHouseDataType = "Float32" { CDT::Float32 } - rule float64() -> ClickHouseDataType = "Float64" { CDT::Float64 } - rule decimal() -> ClickHouseDataType = "Decimal(" precision:integer_value() ", " scale:integer_value() ")" { CDT::Decimal { precision, scale } } - rule decimal32() -> ClickHouseDataType = "Decimal32(" scale:integer_value() ")" { CDT::Decimal32 { scale } } - rule decimal64() -> ClickHouseDataType = "Decimal64(" scale:integer_value() ")" { CDT::Decimal64 { scale } } - rule decimal128() -> ClickHouseDataType = "Decimal128(" scale:integer_value() ")" { CDT::Decimal128 { scale } } - rule decimal256() -> ClickHouseDataType = "Decimal256(" scale:integer_value() ")" { CDT::Decimal256 { scale } } - rule bool() -> ClickHouseDataType = "Bool" { CDT::Bool } - rule string() -> ClickHouseDataType = "String" { CDT::String } - rule fixed_string() -> ClickHouseDataType = "FixedString(" n:integer_value() ")" { CDT::FixedString(n) } - rule date() -> ClickHouseDataType = "Date" { CDT::Date } - rule date32() -> ClickHouseDataType = "Date32" { CDT::Date32 } - rule date_time() -> ClickHouseDataType = "DateTime" tz:("(" tz:single_quoted_string_value()? ")" { tz })? { CDT::DateTime { timezone: tz.flatten().map(|s| s.to_owned()) } } - rule date_time64() -> ClickHouseDataType = "DateTime64(" precision:integer_value() tz:(", " tz:single_quoted_string_value()? { tz })? ")" { CDT::DateTime64{ precision, timezone: tz.flatten().map(|s| s.to_owned())} } - rule json() -> ClickHouseDataType = "JSON" { CDT::Json } - rule uuid() -> ClickHouseDataType = "UUID" { CDT::Uuid } - rule ipv4() -> ClickHouseDataType = "IPv4" { CDT::IPv4 } - rule ipv6() -> ClickHouseDataType = "IPv6" { CDT::IPv6 } - rule low_cardinality() -> ClickHouseDataType = "LowCardinality(" t:data_type() ")" { CDT::LowCardinality(Box::new(t)) } - rule nested() -> ClickHouseDataType = "Nested(" e:(("\""? n:identifier() "\""? " " t:data_type() { (n, t)}) ** ", ") ")" { CDT::Nested(e) } - rule array() -> ClickHouseDataType = "Array(" t:data_type() ")" { CDT::Array(Box::new(t)) } - rule map() -> ClickHouseDataType = "Map(" k:data_type() ", " v:data_type() ")" { CDT::Map { key: Box::new(k), value: Box::new(v) } } - rule tuple() -> ClickHouseDataType = "Tuple(" e:((n:(n:identifier() " " { n })? t:data_type() { (n, t) }) ** ", ") ")" { CDT::Tuple(e) } - rule r#enum() -> ClickHouseDataType = "Enum" ("8" / "16")? "(" e:((n:single_quoted_string_value() i:(" = " i:integer_value() { i })? { (n, i) }) ** ", ") ")" { CDT::Enum(e)} - rule aggregate_function() -> ClickHouseDataType = "AggregateFunction(" f:aggregate_function_definition() ", " a:(data_type() ** ", ") ")" { CDT::AggregateFunction { function: f, arguments: a }} - rule simple_aggregate_function() -> ClickHouseDataType = "SimpleAggregateFunction(" f:aggregate_function_definition() ", " a:(data_type() ** ", ") ")" { CDT::SimpleAggregateFunction { function: f, arguments: a }} - rule nothing() -> ClickHouseDataType = "Nothing" { CDT::Nothing } - rule compound_identifier() -> ClickHouseDataType = i:(i:identifier() ** ".") { CDT::CompoundIdentifier(i) } - rule single_identifier() -> ClickHouseDataType = i:identifier() { CDT::SingleIdentifier(i) } - - rule aggregate_function_definition() -> AggregateFunctionDefinition = n:identifier() p:("(" p:(aggregate_function_parameter() ** ", ") ")" { p })? { AggregateFunctionDefinition { name: n, parameters: p }} - rule aggregate_function_parameter() -> AggregateFunctionParameter = s:single_quoted_string_value() { AggregateFunctionParameter::SingleQuotedString(s)} - / f:floating_point_value() { AggregateFunctionParameter::FloatingPoint(f)} - / i:integer_value() { AggregateFunctionParameter::Integer(i) } - rule floating_point_value() -> f64 = f:$(['0'..='9']+("." ['0'..='9']+)?) {? f.parse().or(Err("f64")) } - rule integer_value() -> u32 = n:$(['0'..='9']+) {? n.parse().or(Err("u32")) } - // parsing quoted strings - // characters in quotes can be any char except quote char or backslash - // unless the backslash is followed by any another character (and is thus not escaping our end quote) - // for single quoted strings, single quotes in identifiers may also be escaped by another single quote, so include pairs of single quotes - rule single_quoted_string_value() -> SingleQuotedString = "'" s:$(([^'\'' | '\\'] / "\\" [_] / "''")*) "'" { SingleQuotedString(s.to_string()) } - rule double_quoted_string_value() -> Identifier = "\"" s:$(([^'\"' | '\\'] / "\\" [_])*) "\"" { Identifier::DoubleQuoted(s.to_string()) } - rule backtick_quoted_string_value() -> Identifier = "`" s:$(([^'`' | '\\'] / "\\" [_])*) "`" { Identifier::BacktickQuoted(s.to_string()) } - rule unquoted_identifier() -> Identifier = s:$(['a'..='z' | 'A'..='Z' | '_']['0'..='9' | 'a'..='z' | 'A'..='Z' | '_']*) { Identifier::Unquoted(s.to_string()) } - rule identifier() -> Identifier = unquoted_identifier() / double_quoted_string_value() / backtick_quoted_string_value() - } -} - -#[test] -fn can_parse_clickhouse_data_type() { - use ClickHouseDataType as CDT; - let data_types = vec![ - ("Int32", CDT::Int32), - ("Nullable(Int32)", CDT::Nullable(Box::new(CDT::Int32))), - ("Nullable(Date32)", CDT::Nullable(Box::new(CDT::Date32))), - ( - "DateTime64(9)", - CDT::DateTime64 { - precision: 9, - timezone: None, - }, - ), - ("Float64", CDT::Float64), - ("Date", CDT::Date), - ( - "DateTime('Asia/Istanbul\\\\')", - CDT::DateTime { - timezone: Some(SingleQuotedString("Asia/Istanbul\\\\".to_string())), - }, - ), - ( - "LowCardinality(String)", - CDT::LowCardinality(Box::new(CDT::String)), - ), - ( - "Map(LowCardinality(String), String)", - CDT::Map { - key: Box::new(CDT::LowCardinality(Box::new(CDT::String))), - value: Box::new(CDT::String), - }, - ), - ( - "Array(DateTime64(9))", - CDT::Array(Box::new(CDT::DateTime64 { - precision: 9, - timezone: None, - })), - ), - ( - "Array(Map(LowCardinality(String), String))", - CDT::Array(Box::new(CDT::Map { - key: Box::new(CDT::LowCardinality(Box::new(CDT::String))), - value: Box::new(CDT::String), - })), - ), - ( - "Tuple(String, Int32)", - CDT::Tuple(vec![(None, CDT::String), (None, CDT::Int32)]), - ), - ( - "Tuple(n String, \"i\" Int32, `u` UInt8)", - CDT::Tuple(vec![ - (Some(Identifier::Unquoted("n".to_string())), CDT::String), - (Some(Identifier::DoubleQuoted("i".to_string())), CDT::Int32), - ( - Some(Identifier::BacktickQuoted("u".to_string())), - CDT::UInt8, - ), - ]), - ), - ( - "\"t1\".t2.`t3`", - CDT::CompoundIdentifier(vec![ - Identifier::DoubleQuoted("t1".to_string()), - Identifier::Unquoted("t2".to_string()), - Identifier::BacktickQuoted("t3".to_string()), - ]), - ), - ]; - - for (s, t) in data_types { - let parsed = clickhouse_parser::data_type(s); - assert_eq!(parsed, Ok(t), "Able to parse correctly"); - } -} diff --git a/crates/common/src/clickhouse_parser.rs b/crates/common/src/clickhouse_parser.rs new file mode 100644 index 0000000..96761d1 --- /dev/null +++ b/crates/common/src/clickhouse_parser.rs @@ -0,0 +1,226 @@ +pub mod datatype; +pub mod parameterized_query; +use self::datatype::{ + AggregateFunctionDefinition, AggregateFunctionParameter, ClickHouseDataType as DT, Identifier, + SingleQuotedString, +}; + +use self::parameterized_query::{Parameter, ParameterizedQuery, ParameterizedQueryElement}; + +peg::parser! { + grammar clickhouse_parser() for str { + pub rule parameterized_query() -> ParameterizedQuery = elements:parameterized_query_element()* statement_end()? { ParameterizedQuery { elements } } + rule parameterized_query_element() -> ParameterizedQueryElement = p:parameter() { ParameterizedQueryElement::Parameter(p)} / s:$((!parameter() !statement_end() [_])+) { ParameterizedQueryElement::String(s.to_string()) } + rule parameter() -> Parameter = p:("{" _ name:identifier() _ ":" _ data_type:data_type() _ "}" { Parameter { name, data_type }}) + rule statement_end() = _ ";" _ + + pub rule data_type() -> DT = nullable() + / uint256() + / uint128() + / uint64() + / uint32() + / uint16() + / uint8() + / int256() + / int128() + / int64() + / int32() + / int16() + / int8() + / float32() + / float64() + / decimal256() + / decimal128() + / decimal64() + / decimal32() + / decimal() + / bool() + / string() + / fixed_string() + / date_time64() + / date_time() + / date32() + / date() + / json() + / uuid() + / ipv4() + / ipv6() + / low_cardinality() + / nested() + / array() + / map() + / tuple() + / r#enum() + / nothing() + / compound_identifier() + / single_identifier() + rule nullable() -> DT = "Nullable(" t:data_type() ")" { DT::Nullable(Box::new(t)) } + rule uint8() -> DT = "UInt8" { DT::UInt8 } + rule uint16() -> DT = "UInt16" { DT::UInt16 } + rule uint32() -> DT = "UInt32" { DT::UInt32 } + rule uint64() -> DT = "UInt64" { DT::UInt64 } + rule uint128() -> DT = "UInt128" { DT::UInt128 } + rule uint256() -> DT = "UInt256" { DT::UInt256 } + rule int8() -> DT = "Int8" { DT::Int8 } + rule int16() -> DT = "Int16" { DT::Int16 } + rule int32() -> DT = "Int32" { DT::Int32 } + rule int64() -> DT = "Int64" { DT::Int64 } + rule int128() -> DT = "Int128" { DT::Int128 } + rule int256() -> DT = "Int256" { DT::Int256 } + rule float32() -> DT = "Float32" { DT::Float32 } + rule float64() -> DT = "Float64" { DT::Float64 } + rule decimal() -> DT = "Decimal(" precision:integer_value() comma_separator() scale:integer_value() ")" { DT::Decimal { precision, scale } } + rule decimal32() -> DT = "Decimal32(" scale:integer_value() ")" { DT::Decimal32 { scale } } + rule decimal64() -> DT = "Decimal64(" scale:integer_value() ")" { DT::Decimal64 { scale } } + rule decimal128() -> DT = "Decimal128(" scale:integer_value() ")" { DT::Decimal128 { scale } } + rule decimal256() -> DT = "Decimal256(" scale:integer_value() ")" { DT::Decimal256 { scale } } + rule bool() -> DT = "Bool" { DT::Bool } + rule string() -> DT = "String" { DT::String } + rule fixed_string() -> DT = "FixedString(" n:integer_value() ")" { DT::FixedString(n) } + rule date() -> DT = "Date" { DT::Date } + rule date32() -> DT = "Date32" { DT::Date32 } + rule date_time() -> DT = "DateTime" tz:("(" tz:single_quoted_string_value()? ")" { tz })? { DT::DateTime { timezone: tz.flatten().map(|s| s.to_owned()) } } + rule date_time64() -> DT = "DateTime64(" precision:integer_value() tz:(comma_separator() tz:single_quoted_string_value()? { tz })? ")" { DT::DateTime64{ precision, timezone: tz.flatten().map(|s| s.to_owned())} } + rule json() -> DT = "JSON" { DT::Json } + rule uuid() -> DT = "UUID" { DT::Uuid } + rule ipv4() -> DT = "IPv4" { DT::IPv4 } + rule ipv6() -> DT = "IPv6" { DT::IPv6 } + rule low_cardinality() -> DT = "LowCardinality(" t:data_type() ")" { DT::LowCardinality(Box::new(t)) } + rule nested() -> DT = "Nested(" e:((n:identifier() __ t:data_type() { (n, t)}) ** comma_separator()) ")" { DT::Nested(e) } + rule array() -> DT = "Array(" t:data_type() ")" { DT::Array(Box::new(t)) } + rule map() -> DT = "Map(" k:data_type() comma_separator() v:data_type() ")" { DT::Map { key: Box::new(k), value: Box::new(v) } } + rule tuple() -> DT = "Tuple(" e:((n:(n:identifier() __ { n })? t:data_type() { (n, t) }) ** comma_separator()) ")" { DT::Tuple(e) } + rule r#enum() -> DT = "Enum" ("8" / "16")? "(" e:((n:single_quoted_string_value() i:(_ "=" _ i:integer_value() { i })? { (n, i) }) ** comma_separator()) ")" { DT::Enum(e)} + rule aggregate_function() -> DT = "AggregateFunction(" f:aggregate_function_definition() comma_separator() a:(data_type() ** comma_separator()) ")" { DT::AggregateFunction { function: f, arguments: a }} + rule simple_aggregate_function() -> DT = "SimpleAggregateFunction(" f:aggregate_function_definition() comma_separator() a:(data_type() ** comma_separator()) ")" { DT::SimpleAggregateFunction { function: f, arguments: a }} + rule nothing() -> DT = "Nothing" { DT::Nothing } + rule compound_identifier() -> DT = i:(i:identifier() ** ".") { DT::CompoundIdentifier(i) } + rule single_identifier() -> DT = i:identifier() { DT::SingleIdentifier(i) } + + rule aggregate_function_definition() -> AggregateFunctionDefinition = n:identifier() p:("(" p:(aggregate_function_parameter() ** comma_separator()) ")" { p })? { AggregateFunctionDefinition { name: n, parameters: p }} + rule aggregate_function_parameter() -> AggregateFunctionParameter = s:single_quoted_string_value() { AggregateFunctionParameter::SingleQuotedString(s)} + / f:floating_point_value() { AggregateFunctionParameter::FloatingPoint(f)} + / i:integer_value() { AggregateFunctionParameter::Integer(i) } + rule floating_point_value() -> f64 = f:$(['0'..='9']+("." ['0'..='9']+)?) {? f.parse().or(Err("f64")) } + rule integer_value() -> u32 = n:$(['0'..='9']+) {? n.parse().or(Err("u32")) } + // parsing quoted strings + // characters in quotes can be any char except quote char or backslash + // unless the backslash is followed by any another character (and is thus not escaping our end quote) + // for single quoted strings, single quotes in identifiers may also be escaped by another single quote, so include pairs of single quotes + rule single_quoted_string_value() -> SingleQuotedString = "'" s:$(([^'\'' | '\\'] / "\\" [_] / "''")*) "'" { SingleQuotedString(s.to_string()) } + rule double_quoted_string_value() -> Identifier = "\"" s:$(([^'\"' | '\\'] / "\\" [_])*) "\"" { Identifier::DoubleQuoted(s.to_string()) } + rule backtick_quoted_string_value() -> Identifier = "`" s:$(([^'`' | '\\'] / "\\" [_])*) "`" { Identifier::BacktickQuoted(s.to_string()) } + rule unquoted_identifier() -> Identifier = s:$(['a'..='z' | 'A'..='Z' | '_']['0'..='9' | 'a'..='z' | 'A'..='Z' | '_']*) { Identifier::Unquoted(s.to_string()) } + rule identifier() -> Identifier = unquoted_identifier() / double_quoted_string_value() / backtick_quoted_string_value() + + /// One or more whitespace + rule __ = [' ' | '\t' | '\r' | '\n']+ + /// Optional whitespace + rule _ = [' ' | '\t' | '\r' | '\n']* + /// A comma surrounded by optional whitespace + rule comma_separator() = _ "," _ + } +} + +#[test] +fn can_parse_clickhouse_data_type() { + let data_types = vec![ + ("Int32", DT::Int32), + ("Nullable(Int32)", DT::Nullable(Box::new(DT::Int32))), + ("Nullable(Date32)", DT::Nullable(Box::new(DT::Date32))), + ( + "DateTime64(9)", + DT::DateTime64 { + precision: 9, + timezone: None, + }, + ), + ("Float64", DT::Float64), + ("Date", DT::Date), + ( + "DateTime('Asia/Istanbul\\\\')", + DT::DateTime { + timezone: Some(SingleQuotedString("Asia/Istanbul\\\\".to_string())), + }, + ), + ( + "LowCardinality(String)", + DT::LowCardinality(Box::new(DT::String)), + ), + ( + "Map(LowCardinality(String), String)", + DT::Map { + key: Box::new(DT::LowCardinality(Box::new(DT::String))), + value: Box::new(DT::String), + }, + ), + ( + "Array(DateTime64(9))", + DT::Array(Box::new(DT::DateTime64 { + precision: 9, + timezone: None, + })), + ), + ( + "Array(Map(LowCardinality(String), String))", + DT::Array(Box::new(DT::Map { + key: Box::new(DT::LowCardinality(Box::new(DT::String))), + value: Box::new(DT::String), + })), + ), + ( + "Tuple(String, Int32)", + DT::Tuple(vec![(None, DT::String), (None, DT::Int32)]), + ), + ( + "Tuple(n String, \"i\" Int32, `u` UInt8)", + DT::Tuple(vec![ + (Some(Identifier::Unquoted("n".to_string())), DT::String), + (Some(Identifier::DoubleQuoted("i".to_string())), DT::Int32), + (Some(Identifier::BacktickQuoted("u".to_string())), DT::UInt8), + ]), + ), + ( + "\"t1\".t2.`t3`", + DT::CompoundIdentifier(vec![ + Identifier::DoubleQuoted("t1".to_string()), + Identifier::Unquoted("t2".to_string()), + Identifier::BacktickQuoted("t3".to_string()), + ]), + ), + ]; + + for (s, t) in data_types { + let parsed = clickhouse_parser::data_type(s); + assert_eq!(parsed, Ok(t), "Able to parse correctly"); + } +} + +#[test] +fn can_parse_parameterized_query() { + let query = r#" + SELECT Name + FROM "default"."Artist" + WHERE ArtistId = {ArtistId:Int32} AND Name != {ArtistName: String}; + +"#; + let expected = ParameterizedQuery { + elements: vec![ + ParameterizedQueryElement::String( + "\n SELECT Name\n FROM \"default\".\"Artist\"\n WHERE ArtistId = " + .to_string(), + ), + ParameterizedQueryElement::Parameter(Parameter { + name: Identifier::Unquoted("ArtistId".to_string()), + data_type: DT::Int32, + }), + ParameterizedQueryElement::String(" AND Name != ".to_string()), + ParameterizedQueryElement::Parameter(Parameter { + name: Identifier::Unquoted("ArtistName".to_string()), + data_type: DT::String, + }), + ], + }; + let parsed = clickhouse_parser::parameterized_query(&query); + assert_eq!(parsed, Ok(expected), "can parse parameterized query"); +} diff --git a/crates/common/src/clickhouse_parser/datatype.rs b/crates/common/src/clickhouse_parser/datatype.rs new file mode 100644 index 0000000..6ec58a7 --- /dev/null +++ b/crates/common/src/clickhouse_parser/datatype.rs @@ -0,0 +1,299 @@ +use std::{fmt::Display, str::FromStr}; + +use serde::{Deserialize, Serialize}; + +use super::clickhouse_parser; + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct SingleQuotedString(pub String); + +impl Display for SingleQuotedString { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "'{}'", self.0) + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum Identifier { + DoubleQuoted(String), + BacktickQuoted(String), + Unquoted(String), +} + +impl Display for Identifier { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Identifier::DoubleQuoted(s) => write!(f, "\"{s}\""), + Identifier::BacktickQuoted(s) => write!(f, "`{s}`"), + Identifier::Unquoted(s) => write!(f, "{s}"), + } + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub struct AggregateFunctionDefinition { + pub name: Identifier, + pub parameters: Option>, +} + +impl Display for AggregateFunctionDefinition { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.name)?; + + if let Some(parameters) = &self.parameters { + write!(f, "(")?; + let mut first = true; + for parameter in parameters { + if first { + first = false; + } else { + write!(f, ", ")?; + } + write!(f, "{parameter}")?; + } + write!(f, ")")?; + } + + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] +pub enum AggregateFunctionParameter { + SingleQuotedString(SingleQuotedString), + FloatingPoint(f64), + Integer(u32), +} + +impl Display for AggregateFunctionParameter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + AggregateFunctionParameter::SingleQuotedString(s) => write!(f, "{s}"), + AggregateFunctionParameter::FloatingPoint(n) => write!(f, "{n}"), + AggregateFunctionParameter::Integer(n) => write!(f, "{n}"), + } + } +} + +/// A parsed representation of a clickhouse datatype string +/// This should support the full scope of clickhouse types +/// To create one from a string slice, use try_from() +#[derive(Debug, Clone, PartialEq)] +pub enum ClickHouseDataType { + Nullable(Box), + Bool, + String, + FixedString(u32), + UInt8, + UInt16, + UInt32, + UInt64, + UInt128, + UInt256, + Int8, + Int16, + Int32, + Int64, + Int128, + Int256, + Float32, + Float64, + Decimal { + precision: u32, + scale: u32, + }, + Decimal32 { + scale: u32, + }, + Decimal64 { + scale: u32, + }, + Decimal128 { + scale: u32, + }, + Decimal256 { + scale: u32, + }, + Date, + Date32, + DateTime { + timezone: Option, + }, + DateTime64 { + precision: u32, + timezone: Option, + }, + Json, + Uuid, + IPv4, + IPv6, + LowCardinality(Box), + Nested(Vec<(Identifier, ClickHouseDataType)>), + Array(Box), + Map { + key: Box, + value: Box, + }, + Tuple(Vec<(Option, ClickHouseDataType)>), + Enum(Vec<(SingleQuotedString, Option)>), + SimpleAggregateFunction { + function: AggregateFunctionDefinition, + arguments: Vec, + }, + AggregateFunction { + function: AggregateFunctionDefinition, + arguments: Vec, + }, + Nothing, + CompoundIdentifier(Vec), + SingleIdentifier(Identifier), +} + +impl Display for ClickHouseDataType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + use ClickHouseDataType as DT; + match self { + DT::Nullable(inner) => write!(f, "Nullable({inner})"), + DT::Bool => write!(f, "Bool"), + DT::String => write!(f, "String"), + DT::FixedString(n) => write!(f, "FixedString({n})"), + DT::UInt8 => write!(f, "UInt8"), + DT::UInt16 => write!(f, "UInt16"), + DT::UInt32 => write!(f, "UInt32"), + DT::UInt64 => write!(f, "UInt64"), + DT::UInt128 => write!(f, "UInt128"), + DT::UInt256 => write!(f, "UInt256"), + DT::Int8 => write!(f, "Int8"), + DT::Int16 => write!(f, "Int16"), + DT::Int32 => write!(f, "Int32"), + DT::Int64 => write!(f, "Int64"), + DT::Int128 => write!(f, "Int128"), + DT::Int256 => write!(f, "Int256"), + DT::Float32 => write!(f, "Float32"), + DT::Float64 => write!(f, "Float64"), + DT::Decimal { precision, scale } => write!(f, "Decimal({precision}, {scale})"), + DT::Decimal32 { scale } => write!(f, "Decimal32({scale})"), + DT::Decimal64 { scale } => write!(f, "Decimal64({scale})"), + DT::Decimal128 { scale } => write!(f, "Decimal128({scale})"), + DT::Decimal256 { scale } => write!(f, "Decimal256({scale})"), + DT::Date => write!(f, "Date"), + DT::Date32 => write!(f, "Date32"), + DT::DateTime { timezone } => { + write!(f, "DateTime")?; + if let Some(tz) = timezone { + write!(f, "({tz})")?; + } + Ok(()) + } + DT::DateTime64 { + precision, + timezone, + } => { + write!(f, "DateTime64({precision}")?; + if let Some(tz) = timezone { + write!(f, ", {tz}")?; + } + write!(f, ")") + } + DT::Json => write!(f, "JSON"), + DT::Uuid => write!(f, "UUID"), + DT::IPv4 => write!(f, "IPv4"), + DT::IPv6 => write!(f, "IPv6"), + DT::LowCardinality(inner) => write!(f, "LowCardinality({inner})"), + DT::Nested(elements) => { + write!(f, "Nested(")?; + let mut first = true; + for (name, value) in elements { + if first { + first = false; + } else { + write!(f, ", ")?; + } + write!(f, "{name} {value}")?; + } + write!(f, ")") + } + DT::Array(inner) => write!(f, "Array({inner})"), + DT::Map { key, value } => write!(f, "Map({key}, {value})"), + DT::Tuple(elements) => { + write!(f, "Tuple(")?; + let mut first = true; + for (name, t) in elements { + if first { + first = false; + } else { + write!(f, ", ")?; + } + if let Some(name) = name { + write!(f, "{name} ")?; + } + write!(f, "{t}")?; + } + write!(f, ")") + } + DT::Enum(variants) => { + write!(f, "Enum(")?; + let mut first = true; + for (variant, id) in variants { + if first { + first = false; + } else { + write!(f, ", ")?; + } + + write!(f, "{variant}")?; + + if let Some(id) = id { + write!(f, " = {id}")?; + } + } + write!(f, ")") + } + DT::SimpleAggregateFunction { + function, + arguments, + } => { + write!(f, "SimpleAggregateFunction({function}")?; + for argument in arguments { + write!(f, ", {argument}")?; + } + write!(f, ")") + } + DT::AggregateFunction { + function, + arguments, + } => { + write!(f, "AggregateFunction({function}")?; + for argument in arguments { + write!(f, ", {argument}")?; + } + write!(f, ")") + } + DT::Nothing => write!(f, "Nothing"), + DT::CompoundIdentifier(identifiers) => { + let mut first = true; + for identifier in identifiers { + if first { + first = false; + } else { + write!(f, ".")?; + } + + write!(f, "{identifier}")?; + } + Ok(()) + } + DT::SingleIdentifier(identifier) => write!(f, "{identifier}"), + } + } +} + +impl FromStr for ClickHouseDataType { + type Err = peg::error::ParseError; + + /// Attempt to create a ClickHouseDataType from a string representation of the type. + /// May return a parse error if the type string is malformed, or if our implementation is out of date or incorrect + fn from_str(s: &str) -> Result { + clickhouse_parser::data_type(s) + } +} diff --git a/crates/common/src/clickhouse_parser/parameterized_query.rs b/crates/common/src/clickhouse_parser/parameterized_query.rs new file mode 100644 index 0000000..73954d7 --- /dev/null +++ b/crates/common/src/clickhouse_parser/parameterized_query.rs @@ -0,0 +1,57 @@ +use std::{fmt::Display, str::FromStr}; + +use super::{ + clickhouse_parser, + datatype::{ClickHouseDataType, Identifier}, +}; + +#[derive(Debug, Clone, PartialEq)] +pub struct ParameterizedQuery { + pub elements: Vec, +} + +impl Display for ParameterizedQuery { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + for element in &self.elements { + write!(f, "{}", element)?; + } + Ok(()) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Parameter { + pub name: Identifier, + pub data_type: ClickHouseDataType, +} + +impl Display for Parameter { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{{{}: {}}}", self.name, self.data_type) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ParameterizedQueryElement { + String(String), + Parameter(Parameter), +} + +impl Display for ParameterizedQueryElement { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ParameterizedQueryElement::String(s) => write!(f, "{s}"), + ParameterizedQueryElement::Parameter(p) => write!(f, "{p}"), + } + } +} + +impl FromStr for ParameterizedQuery { + type Err = peg::error::ParseError; + + /// Attempt to create a ParameterizedQuery from a string. + /// May return a parse error if the type string is malformed, or if our implementation is out of date or incorrect + fn from_str(s: &str) -> Result { + clickhouse_parser::parameterized_query(s) + } +} diff --git a/crates/common/src/config.rs b/crates/common/src/config.rs index 4f5437d..c2a55b3 100644 --- a/crates/common/src/config.rs +++ b/crates/common/src/config.rs @@ -1,27 +1,34 @@ -use std::collections::BTreeMap; +use std::{collections::BTreeMap, default}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; -use crate::clickhouse_datatype::ClickHouseDataType; +use crate::clickhouse_parser::{ + datatype::ClickHouseDataType, parameterized_query::ParameterizedQuery, +}; #[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] pub struct ServerConfigFile { #[serde(rename = "$schema")] pub schema: String, /// A list of tables available in this database + /// + /// The map key is a unique table alias that defaults to defaults to "_", + /// except for tables in the "default" schema where the table name is used + /// This is the name exposed to the engine, and may be configured by users. + /// When the configuration is updated, the table is identified by name and schema, and changes to the alias are preserved. pub tables: BTreeMap>, + /// Optionally define custom parameterized queries here + /// Note the names must not match table names + pub queries: Option>, } -#[derive(Debug, Default, Clone)] +#[derive(Debug, Clone)] pub struct ServerConfig { /// the connection part of the config is not part of the config file pub connection: ConnectionConfig, - /// The map key is a unique table alias that defaults to defaults to "_", - /// except for tables in the "default" schema where the table name is used - /// This is the name exposed to the engine, and may be configured by users. - /// When the configuration is updated, the table is identified by name and schema, and changes to the alias are preserved. pub tables: BTreeMap>, + pub queries: BTreeMap, } #[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] @@ -61,5 +68,54 @@ pub struct ColumnConfig { pub data_type: ColumnDataType, } +#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] +pub struct ParameterizedQueryConfigFile { + /// Whether this query should be exposed as a procedure (mutating) or collection (non-mutating) + kind: ParameterizedQueryKind, + /// A relative path to a sql file + file: String, + /// Either a type definition for the return type for this query, + /// or a reference to another return type: either a table's alias, + /// or another query's alias. If another query, that query must have a return type definition. + return_type: ParameterizedQueryReturnType, +} + +#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(untagged, rename_all = "snake_case")] +pub enum ParameterizedQueryKind { + #[default] + Collection, + Procedure, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum ParameterizedQueryReturnType { + /// the same as the return type for a known table + Reference { + /// the table alias must match a key in `tables`, and the query must return the same type as that table + /// alternatively, the alias may reference another parameterized query which has a return type definition, + alias: String, + }, + Definition { + fields: BTreeMap, + }, +} + +impl Default for ParameterizedQueryReturnType { + fn default() -> Self { + Self::Definition { + fields: BTreeMap::default(), + } + } +} + +#[derive(Debug, Clone)] +pub struct ParameterizedQueryConfig { + kind: ParameterizedQueryKind, + query: ParameterizedQuery, + return_type: ParameterizedQueryReturnType, +} + pub const CONFIG_FILE_NAME: &str = "configuration.json"; pub const CONFIG_SCHEMA_FILE_NAME: &str = "configuration.schema.json"; diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index 46a185a..bc1fa4d 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -1,3 +1,3 @@ -pub mod clickhouse_datatype; +pub mod clickhouse_parser; pub mod client; pub mod config; From d0d09fb124a7f204917e7bd20d8961945073b6dc Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 2 Apr 2024 07:15:55 -0400 Subject: [PATCH 14/28] ci manifest: mark plugin as hidden --- ci/templates/manifest.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/templates/manifest.yaml b/ci/templates/manifest.yaml index 9cfd1f5..fa60c35 100644 --- a/ci/templates/manifest.yaml +++ b/ci/templates/manifest.yaml @@ -2,6 +2,7 @@ name: clickhouse version: "${CLI_VERSION}" shortDescription: "CLI plugin for Hasura ndc-clickhouse" homepage: https://hasura.io/connectors/clickhouse +hidden: true platforms: - selector: darwin-arm64 uri: "https://github.com/hasura/ndc-clickhouse/releases/download/${CLI_VERSION}/ndc-clickhouse-cli-aarch64-apple-darwin" From 7ca2999abd1995675fd354cebf2d428847b9f788 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 2 Apr 2024 07:16:40 -0400 Subject: [PATCH 15/28] minor updates to parser and config type for parameterized queries --- crates/common/src/clickhouse_parser.rs | 23 +++------ .../common/src/clickhouse_parser/datatype.rs | 16 ------ crates/common/src/config.rs | 50 ++++++++++++------- 3 files changed, 39 insertions(+), 50 deletions(-) diff --git a/crates/common/src/clickhouse_parser.rs b/crates/common/src/clickhouse_parser.rs index 96761d1..444c805 100644 --- a/crates/common/src/clickhouse_parser.rs +++ b/crates/common/src/clickhouse_parser.rs @@ -5,13 +5,16 @@ use self::datatype::{ SingleQuotedString, }; -use self::parameterized_query::{Parameter, ParameterizedQuery, ParameterizedQueryElement}; +use self::parameterized_query::{ + Parameter, ParameterType, ParameterizedQuery, ParameterizedQueryElement, +}; peg::parser! { grammar clickhouse_parser() for str { pub rule parameterized_query() -> ParameterizedQuery = elements:parameterized_query_element()* statement_end()? { ParameterizedQuery { elements } } rule parameterized_query_element() -> ParameterizedQueryElement = p:parameter() { ParameterizedQueryElement::Parameter(p)} / s:$((!parameter() !statement_end() [_])+) { ParameterizedQueryElement::String(s.to_string()) } - rule parameter() -> Parameter = p:("{" _ name:identifier() _ ":" _ data_type:data_type() _ "}" { Parameter { name, data_type }}) + rule parameter() -> Parameter = p:("{" _ name:identifier() _ ":" _ t:parameter_type() _ "}" { Parameter { name, r#type: t }}) + rule parameter_type() -> ParameterType = d:data_type() { ParameterType::DataType(d) } / "Identifier" { ParameterType::Identifier } rule statement_end() = _ ";" _ pub rule data_type() -> DT = nullable() @@ -52,8 +55,6 @@ peg::parser! { / tuple() / r#enum() / nothing() - / compound_identifier() - / single_identifier() rule nullable() -> DT = "Nullable(" t:data_type() ")" { DT::Nullable(Box::new(t)) } rule uint8() -> DT = "UInt8" { DT::UInt8 } rule uint16() -> DT = "UInt16" { DT::UInt16 } @@ -94,8 +95,6 @@ peg::parser! { rule aggregate_function() -> DT = "AggregateFunction(" f:aggregate_function_definition() comma_separator() a:(data_type() ** comma_separator()) ")" { DT::AggregateFunction { function: f, arguments: a }} rule simple_aggregate_function() -> DT = "SimpleAggregateFunction(" f:aggregate_function_definition() comma_separator() a:(data_type() ** comma_separator()) ")" { DT::SimpleAggregateFunction { function: f, arguments: a }} rule nothing() -> DT = "Nothing" { DT::Nothing } - rule compound_identifier() -> DT = i:(i:identifier() ** ".") { DT::CompoundIdentifier(i) } - rule single_identifier() -> DT = i:identifier() { DT::SingleIdentifier(i) } rule aggregate_function_definition() -> AggregateFunctionDefinition = n:identifier() p:("(" p:(aggregate_function_parameter() ** comma_separator()) ")" { p })? { AggregateFunctionDefinition { name: n, parameters: p }} rule aggregate_function_parameter() -> AggregateFunctionParameter = s:single_quoted_string_value() { AggregateFunctionParameter::SingleQuotedString(s)} @@ -180,14 +179,6 @@ fn can_parse_clickhouse_data_type() { (Some(Identifier::BacktickQuoted("u".to_string())), DT::UInt8), ]), ), - ( - "\"t1\".t2.`t3`", - DT::CompoundIdentifier(vec![ - Identifier::DoubleQuoted("t1".to_string()), - Identifier::Unquoted("t2".to_string()), - Identifier::BacktickQuoted("t3".to_string()), - ]), - ), ]; for (s, t) in data_types { @@ -212,12 +203,12 @@ fn can_parse_parameterized_query() { ), ParameterizedQueryElement::Parameter(Parameter { name: Identifier::Unquoted("ArtistId".to_string()), - data_type: DT::Int32, + r#type: ParameterType::DataType(DT::Int32), }), ParameterizedQueryElement::String(" AND Name != ".to_string()), ParameterizedQueryElement::Parameter(Parameter { name: Identifier::Unquoted("ArtistName".to_string()), - data_type: DT::String, + r#type: ParameterType::DataType(DT::String), }), ], }; diff --git a/crates/common/src/clickhouse_parser/datatype.rs b/crates/common/src/clickhouse_parser/datatype.rs index 6ec58a7..f42bb11 100644 --- a/crates/common/src/clickhouse_parser/datatype.rs +++ b/crates/common/src/clickhouse_parser/datatype.rs @@ -145,8 +145,6 @@ pub enum ClickHouseDataType { arguments: Vec, }, Nothing, - CompoundIdentifier(Vec), - SingleIdentifier(Identifier), } impl Display for ClickHouseDataType { @@ -270,20 +268,6 @@ impl Display for ClickHouseDataType { write!(f, ")") } DT::Nothing => write!(f, "Nothing"), - DT::CompoundIdentifier(identifiers) => { - let mut first = true; - for identifier in identifiers { - if first { - first = false; - } else { - write!(f, ".")?; - } - - write!(f, "{identifier}")?; - } - Ok(()) - } - DT::SingleIdentifier(identifier) => write!(f, "{identifier}"), } } } diff --git a/crates/common/src/config.rs b/crates/common/src/config.rs index c2a55b3..3efda0e 100644 --- a/crates/common/src/config.rs +++ b/crates/common/src/config.rs @@ -1,10 +1,11 @@ -use std::{collections::BTreeMap, default}; +use std::collections::BTreeMap; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use crate::clickhouse_parser::{ - datatype::ClickHouseDataType, parameterized_query::ParameterizedQuery, + datatype::ClickHouseDataType, + parameterized_query::{ParameterType, ParameterizedQuery}, }; #[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] @@ -17,9 +18,11 @@ pub struct ServerConfigFile { /// except for tables in the "default" schema where the table name is used /// This is the name exposed to the engine, and may be configured by users. /// When the configuration is updated, the table is identified by name and schema, and changes to the alias are preserved. - pub tables: BTreeMap>, + #[serde(skip_serializing_if = "Option::is_none")] + pub tables: Option>>, /// Optionally define custom parameterized queries here /// Note the names must not match table names + #[serde(skip_serializing_if = "Option::is_none")] pub queries: Option>, } @@ -45,7 +48,9 @@ pub struct TableConfig { /// The table schema pub schema: String, /// Comments are sourced from the database table comment + #[serde(skip_serializing_if = "Option::is_none")] pub comment: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub primary_key: Option, /// The map key is a column alias identifying the table and may be customized. /// It defaults to the table name. @@ -71,40 +76,48 @@ pub struct ColumnConfig { #[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] pub struct ParameterizedQueryConfigFile { /// Whether this query should be exposed as a procedure (mutating) or collection (non-mutating) - kind: ParameterizedQueryKind, + pub exposed_as: ParameterizedQueryExposedAs, + /// A comment that will be exposed in the schema + #[serde(skip_serializing_if = "Option::is_none")] + pub comment: Option, /// A relative path to a sql file - file: String, + pub file: String, /// Either a type definition for the return type for this query, /// or a reference to another return type: either a table's alias, /// or another query's alias. If another query, that query must have a return type definition. - return_type: ParameterizedQueryReturnType, + pub return_type: ParameterizedQueryReturnType, } -#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(untagged, rename_all = "snake_case")] -pub enum ParameterizedQueryKind { +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum ParameterizedQueryExposedAs { #[default] Collection, Procedure, } #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(tag = "kind", rename_all = "snake_case")] +#[serde(tag = "definition", rename_all = "snake_case")] pub enum ParameterizedQueryReturnType { /// the same as the return type for a known table - Reference { + TableReference { /// the table alias must match a key in `tables`, and the query must return the same type as that table /// alternatively, the alias may reference another parameterized query which has a return type definition, - alias: String, + table_alias: String, }, - Definition { - fields: BTreeMap, + /// The same as the return type for another query that has a return type definition + QueryReference { + /// the table alias must match a key in `tables`, and the query must return the same type as that table + /// alternatively, the alias may reference another parameterized query which has a return type definition, + query_alias: String, }, + /// A custom return type definition to associate with this query + Custom { fields: BTreeMap }, } impl Default for ParameterizedQueryReturnType { fn default() -> Self { - Self::Definition { + Self::Custom { fields: BTreeMap::default(), } } @@ -112,9 +125,10 @@ impl Default for ParameterizedQueryReturnType { #[derive(Debug, Clone)] pub struct ParameterizedQueryConfig { - kind: ParameterizedQueryKind, - query: ParameterizedQuery, - return_type: ParameterizedQueryReturnType, + pub exposed_as: ParameterizedQueryExposedAs, + pub comment: Option, + pub query: ParameterizedQuery, + pub return_type: ParameterizedQueryReturnType, } pub const CONFIG_FILE_NAME: &str = "configuration.json"; From 161851d8bf46aa6e7ba49c7f213b4da6f0fe2f77 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 2 Apr 2024 07:17:02 -0400 Subject: [PATCH 16/28] update cli for new config format --- crates/ndc-clickhouse-cli/src/main.rs | 142 ++++++++++++++++++++++---- 1 file changed, 120 insertions(+), 22 deletions(-) diff --git a/crates/ndc-clickhouse-cli/src/main.rs b/crates/ndc-clickhouse-cli/src/main.rs index 0f6d092..11eab4e 100644 --- a/crates/ndc-clickhouse-cli/src/main.rs +++ b/crates/ndc-clickhouse-cli/src/main.rs @@ -8,10 +8,11 @@ use std::{ use clap::{Parser, Subcommand, ValueEnum}; use common::{ - clickhouse_datatype::ClickHouseDataType, + clickhouse_parser::{datatype::ClickHouseDataType, parameterized_query::ParameterizedQuery}, config::{ - ColumnConfig, ConnectionConfig, PrimaryKey, ServerConfigFile, TableConfig, - CONFIG_FILE_NAME, CONFIG_SCHEMA_FILE_NAME, + ColumnConfig, ConnectionConfig, ParameterizedQueryConfig, ParameterizedQueryConfigFile, + ParameterizedQueryReturnType, PrimaryKey, ServerConfigFile, TableConfig, CONFIG_FILE_NAME, + CONFIG_SCHEMA_FILE_NAME, }, }; use database_introspection::{introspect_database, ColumnInfo, TableInfo}; @@ -185,33 +186,23 @@ pub async fn update_tables_config( ); // check if data type can be parsed, to give early warning to the user // this is preferable to failing later while handling requests - let _data_type = - ClickHouseDataType::from_str(&column.data_type).map_err(|err| { - format!( - "Unable to parse data type \"{}\" for column {} in table {}.{}: {}", - column.data_type, - column.column_name, - table.table_schema, - table.table_name, - err - ) - })?; let column_config = ColumnConfig { name: column.column_name.to_owned(), data_type: column.data_type.to_owned(), }; - Ok((column_alias, column_config)) + (column_alias, column_config) }) - .collect::>()?, + .collect(), }; - Ok((table_alias, table_config)) + (table_alias, table_config) }) - .collect::>()?; + .collect(); let config = ServerConfigFile { schema: CONFIG_SCHEMA_FILE_NAME.to_owned(), - tables, + tables: Some(tables), + queries: old_config.queries.to_owned(), }; let config_schema = schema_for!(ServerConfigFile); @@ -222,6 +213,111 @@ pub async fn update_tables_config( ) .await?; + // validate after writing out the updated metadata. This should help users understand what the problem is + // check if some column types can't be parsed + if let Some(tables) = &config.tables { + for (table_alias, table_config) in tables { + for (column_alias, column_config) in &table_config.columns { + let _data_type = + ClickHouseDataType::from_str(&column_config.data_type).map_err(|err| { + format!( + "Unable to parse data type \"{}\" for column {} in table {}: {}", + column_config.data_type, column_alias, table_alias, err + ) + })?; + } + } + } + + if let Some(queries) = &config.queries { + for (query_alias, query_config) in queries { + // check for duplicate alias + if config + .tables + .as_ref() + .and_then(|tables| tables.get(query_alias)) + .is_some() + { + return Err(format!( + "Name collision: query \"{query_alias}\" has the same name as a collection" + ) + .into()); + } + + // if return type is a reference, check it exists and is valid: + match &query_config.return_type { + ParameterizedQueryReturnType::TableReference { table_alias } => { + if config + .tables + .as_ref() + .and_then(|tables| tables.get(table_alias)) + .is_none() + { + return Err(format!( + "Orphan reference: query \"{query_alias}\" references table \"{table_alias}\" which cannot be found." + ) + .into()); + } + } + ParameterizedQueryReturnType::QueryReference { + query_alias: target_alias, + } => { + match config + .queries + .as_ref() + .and_then(|queries| queries.get(target_alias)) + { + Some(ParameterizedQueryConfigFile { + return_type: ParameterizedQueryReturnType::Custom { .. }, + .. + }) => { + // referencing a query that has a custom return type definition we can use. all is well + } + Some(_) => { + return Err(format!( + "Invalid reference: query \"{query_alias}\" references \"{target_alias}\" which does not have a return type definition." + ) + .into()); + } + None => { + return Err(format!( + "Orphan reference: query \"{query_alias}\" references query \"{target_alias}\" which cannot be found." + ) + .into()); + } + } + } + ParameterizedQueryReturnType::Custom { fields } => { + for (field_alias, field_type) in fields { + let _data_type = + ClickHouseDataType::from_str(&field_type).map_err(|err| { + format!( + "Unable to parse data type \"{}\" for field {} in query {}: {}", + field_type, field_alias, query_alias, err + ) + })?; + } + } + } + + // validate that we can find the referenced sql file + let file_path = configuration_dir.as_ref().join(&query_config.file); + let file_content = fs::read_to_string(&file_path).await.map_err(|err| { + format!( + "Error reading {} for query {query_alias}: {err}", + query_config.file + ) + })?; + // validate that we can parse the reference sql file + let _query = ParameterizedQuery::from_str(&file_content).map_err(|err| { + format!( + "Unable to parse file {} for parameterized query {}: {}", + query_config.file, query_alias, err + ) + })?; + } + } + Ok(()) } @@ -230,10 +326,12 @@ pub async fn update_tables_config( /// This allows custom aliases to be preserved fn get_old_table_config<'a>( table: &TableInfo, - old_tables: &'a BTreeMap>, + old_tables: &'a Option>>, ) -> Option<(&'a String, &'a TableConfig)> { - old_tables.iter().find(|(_, old_table)| { - old_table.name == table.table_name && old_table.schema == table.table_schema + old_tables.as_ref().and_then(|old_tables| { + old_tables.iter().find(|(_, old_table)| { + old_table.name == table.table_name && old_table.schema == table.table_schema + }) }) } From 4b88cfef1f41a03d02153cdd60a8d2cc660d55ee Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 2 Apr 2024 07:17:53 -0400 Subject: [PATCH 17/28] minor update to parameterized query type --- .../clickhouse_parser/parameterized_query.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/crates/common/src/clickhouse_parser/parameterized_query.rs b/crates/common/src/clickhouse_parser/parameterized_query.rs index 73954d7..8744ed8 100644 --- a/crates/common/src/clickhouse_parser/parameterized_query.rs +++ b/crates/common/src/clickhouse_parser/parameterized_query.rs @@ -22,12 +22,27 @@ impl Display for ParameterizedQuery { #[derive(Debug, Clone, PartialEq)] pub struct Parameter { pub name: Identifier, - pub data_type: ClickHouseDataType, + pub r#type: ParameterType, } impl Display for Parameter { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{{{}: {}}}", self.name, self.data_type) + write!(f, "{{{}: {}}}", self.name, self.r#type) + } +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ParameterType { + DataType(ClickHouseDataType), + Identifier, +} + +impl Display for ParameterType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + ParameterType::DataType(t) => write!(f, "{t}"), + ParameterType::Identifier => write!(f, "Identifier"), + } } } From b1756eb874a8e27214caeb91ef55add93441ff95 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 2 Apr 2024 07:18:23 -0400 Subject: [PATCH 18/28] update schema to show parameterized queries --- crates/ndc-clickhouse/src/connector.rs | 244 ++++++++++++++---- .../src/connector/handler/schema.rs | 136 +++++++++- .../src/schema/type_definition.rs | 21 +- crates/ndc-clickhouse/src/sql/ast.rs | 2 +- .../ndc-clickhouse/src/sql/query_builder.rs | 2 +- .../sql/query_builder/comparison_column.rs | 2 +- .../src/sql/query_builder/typecasting.rs | 2 +- 7 files changed, 346 insertions(+), 63 deletions(-) diff --git a/crates/ndc-clickhouse/src/connector.rs b/crates/ndc-clickhouse/src/connector.rs index e225898..b90299a 100644 --- a/crates/ndc-clickhouse/src/connector.rs +++ b/crates/ndc-clickhouse/src/connector.rs @@ -1,7 +1,7 @@ pub mod handler; pub mod state; -use std::{env, path::Path, str::FromStr}; +use std::{collections::BTreeMap, env, path::Path, str::FromStr}; use tokio::fs; use async_trait::async_trait; @@ -17,9 +17,12 @@ use ndc_sdk::{ use self::state::ServerState; use common::{ - clickhouse_datatype::ClickHouseDataType, + clickhouse_parser::{ + self, datatype::ClickHouseDataType, parameterized_query::ParameterizedQuery, + }, config::{ - ColumnConfig, ConnectionConfig, ServerConfig, ServerConfigFile, TableConfig, + ColumnConfig, ConnectionConfig, ParameterizedQueryConfig, ParameterizedQueryConfigFile, + ParameterizedQueryReturnType, ServerConfig, ServerConfigFile, TableConfig, CONFIG_FILE_NAME, }, }; @@ -147,63 +150,194 @@ pub async fn read_server_config( _ => ParseError::IoError(err), })?; - let ServerConfigFile { schema: _, tables } = - serde_json::from_str::(&config_file).map_err(|err| { - ParseError::ParseError(LocatedError { - file_path: file_path.to_owned(), - line: err.line(), - column: err.column(), - message: err.to_string(), - }) + let config = serde_json::from_str::(&config_file).map_err(|err| { + ParseError::ParseError(LocatedError { + file_path: file_path.to_owned(), + line: err.line(), + column: err.column(), + message: err.to_string(), + }) + })?; + + let tables = config + .tables + .unwrap_or_default() + .into_iter() + .map(|(table_alias, table_config)| { + Ok(( + table_alias.clone(), + TableConfig { + name: table_config.name, + schema: table_config.schema, + comment: table_config.comment, + primary_key: table_config.primary_key, + columns: table_config + .columns + .into_iter() + .map(|(column_alias, column_config)| { + Ok(( + column_alias.clone(), + ColumnConfig { + name: column_config.name, + data_type: ClickHouseDataType::from_str( + &column_config.data_type, + ) + .map_err(|_err| { + ParseError::ValidateError(InvalidNodes(vec![InvalidNode { + file_path: file_path.to_owned(), + node_path: vec![ + KeyOrIndex::Key("tables".to_string()), + KeyOrIndex::Key(table_alias.to_owned()), + KeyOrIndex::Key("columns".to_string()), + KeyOrIndex::Key(column_alias.to_owned()), + KeyOrIndex::Key("data_type".to_string()), + ], + message: "Unable to parse data type".to_string(), + }])) + })?, + }, + )) + }) + .collect::>()?, + }, + )) + }) + .collect::, ParseError>>()?; + + let mut queries = BTreeMap::new(); + + for (query_alias, query_config) in config.queries.clone().unwrap_or_default() { + let query_file_path = configuration_dir.as_ref().join(&query_config.file); + let file_content = fs::read_to_string(&query_file_path).await.map_err(|err| { + if let std::io::ErrorKind::NotFound = err.kind() { + ParseError::CouldNotFindConfiguration(query_file_path.to_owned()) + } else { + ParseError::IoError(err) + } })?; - let config = ServerConfig { - connection, - tables: tables - .into_iter() - .map(|(table_alias, table_config)| { - Ok(( - table_alias.clone(), - TableConfig { - name: table_config.name, - schema: table_config.schema, - comment: table_config.comment, - primary_key: table_config.primary_key, - columns: table_config - .columns + let query = ParameterizedQuery::from_str(&file_content).map_err(|err| { + ParseError::ValidateError(InvalidNodes(vec![InvalidNode { + file_path: query_file_path.clone(), + node_path: vec![ + KeyOrIndex::Key("queries".to_string()), + KeyOrIndex::Key(query_alias.clone()), + ], + message: format!("Unable to parse parameterized query: {}", err), + }])) + })?; + + let query_definition = ParameterizedQueryConfig { + exposed_as: query_config.exposed_as.to_owned(), + comment: query_config.comment.to_owned(), + query, + return_type: match &query_config.return_type { + ParameterizedQueryReturnType::TableReference { + table_alias: target_alias, + } => { + if tables.contains_key(target_alias) { + ParameterizedQueryReturnType::TableReference { + table_alias: target_alias.to_owned(), + } + } else { + return Err(ParseError::ValidateError(InvalidNodes(vec![InvalidNode { + file_path: file_path.clone(), + node_path: vec![ + KeyOrIndex::Key("queries".to_owned()), + KeyOrIndex::Key(query_alias.to_owned()), + KeyOrIndex::Key("return_type".to_owned()), + KeyOrIndex::Key("alias".to_owned()), + ], + message: format!( + "Orphan reference: cannot table {} referenced by query {}", + target_alias, query_alias + ), + }]))); + } + } + ParameterizedQueryReturnType::QueryReference { + query_alias: target_alias, + } => match config + .queries + .as_ref() + .and_then(|queries| queries.get(target_alias)) + { + Some(ParameterizedQueryConfigFile { + return_type: ParameterizedQueryReturnType::Custom { .. }, + .. + }) => ParameterizedQueryReturnType::QueryReference { + query_alias: target_alias.to_owned(), + }, + Some(_) => { + return Err(ParseError::ValidateError(InvalidNodes(vec![ + InvalidNode { + file_path: file_path.clone(), + node_path: vec![ + KeyOrIndex::Key("queries".to_owned()), + KeyOrIndex::Key(query_alias.to_owned()), + KeyOrIndex::Key("return_type".to_owned()), + KeyOrIndex::Key("alias".to_owned()), + ], + message: format!( + "Invalid reference: query {} referenced by query {} does not have a custom return type", + target_alias, query_alias + ), + }, + ]))); + } + None => { + return Err(ParseError::ValidateError(InvalidNodes(vec![InvalidNode { + file_path: file_path.clone(), + node_path: vec![ + KeyOrIndex::Key("queries".to_owned()), + KeyOrIndex::Key(query_alias.to_owned()), + KeyOrIndex::Key("return_type".to_owned()), + KeyOrIndex::Key("alias".to_owned()), + ], + message: format!( + "Orphan reference: cannot table {} referenced by query {}", + target_alias, query_alias + ), + }]))); + } + }, + ParameterizedQueryReturnType::Custom { fields } => { + ParameterizedQueryReturnType::Custom { + fields: fields .into_iter() - .map(|(column_alias, column_config)| { - Ok(( - column_alias.clone(), - ColumnConfig { - name: column_config.name, - data_type: ClickHouseDataType::from_str( - &column_config.data_type, - ) - .map_err(|_err| { - ParseError::ValidateError(InvalidNodes(vec![ - InvalidNode { - file_path: file_path.to_owned(), - node_path: vec![ - KeyOrIndex::Key("tables".to_string()), - KeyOrIndex::Key(table_alias.to_owned()), - KeyOrIndex::Key("columns".to_string()), - KeyOrIndex::Key(column_alias.to_owned()), - KeyOrIndex::Key("data_type".to_string()), - ], - message: "Unable to parse data type" - .to_string(), - }, - ])) - })?, - }, - )) + .map(|(field_alias, field_type)| { + let data_type = + ClickHouseDataType::from_str(&field_type).map_err(|err| { + ParseError::ValidateError(InvalidNodes(vec![InvalidNode { + file_path: file_path.clone(), + node_path: vec![ + KeyOrIndex::Key("queries".to_string()), + KeyOrIndex::Key(query_alias.clone()), + KeyOrIndex::Key("return_type".to_string()), + KeyOrIndex::Key("fields".to_string()), + KeyOrIndex::Key(field_alias.clone()), + ], + message: format!( + "Unable to parse data type \"{}\": {}", + field_type, err + ), + }])) + })?; + Ok((field_alias.to_owned(), data_type)) }) .collect::>()?, - }, - )) - }) - .collect::>()?, + } + } + }, + }; + + queries.insert(query_alias.to_owned(), query_definition); + } + + let config = ServerConfig { + connection, + tables, + queries, }; Ok(config) diff --git a/crates/ndc-clickhouse/src/connector/handler/schema.rs b/crates/ndc-clickhouse/src/connector/handler/schema.rs index ba427d8..9314dda 100644 --- a/crates/ndc-clickhouse/src/connector/handler/schema.rs +++ b/crates/ndc-clickhouse/src/connector/handler/schema.rs @@ -1,5 +1,11 @@ use crate::schema::ClickHouseTypeDefinition; -use common::config::{PrimaryKey, ServerConfig}; +use common::{ + clickhouse_parser::{ + datatype::{ClickHouseDataType, Identifier}, + parameterized_query::{Parameter, ParameterType, ParameterizedQueryElement}, + }, + config::{ParameterizedQueryExposedAs, ParameterizedQueryReturnType, PrimaryKey, ServerConfig}, +}; use ndc_sdk::{connector::SchemaError, models}; use std::collections::BTreeMap; @@ -47,7 +53,72 @@ pub async fn schema(configuration: &ServerConfig) -> Result n, + }; + let data_type = match r#type { + ParameterType::Identifier => &ClickHouseDataType::String, + ParameterType::DataType(t) => t, + }; + let type_definition = ClickHouseTypeDefinition::from_query_argument( + data_type, + &argument_alias, + query_alias, + ); + + let (scalars, objects) = type_definition.type_definitions(); + + for (name, definition) in objects { + object_type_definitions.push((name, definition)); + } + for (name, definition) in scalars { + // silently dropping duplicate scalar definitions + // this could be an issue if somehow an enum has the same name as a primitive scalar + // there is the potential for name collisions resulting in dropped enum defintions + scalar_type_definitions.insert(name, definition); + } + } + } + } + + let table_collections = configuration .tables .iter() .map(|(table_alias, table_config)| models::CollectionInfo { @@ -67,8 +138,67 @@ pub async fn schema(configuration: &ServerConfig) -> Result None, + ParameterizedQueryElement::Parameter(Parameter { name, r#type }) => { + let argument_alias = match name { + Identifier::DoubleQuoted(n) + | Identifier::BacktickQuoted(n) + | Identifier::Unquoted(n) => n, + }; + let data_type = match r#type { + ParameterType::Identifier => &ClickHouseDataType::String, + ParameterType::DataType(t) => &t, + }; + let type_definition = ClickHouseTypeDefinition::from_query_argument( + data_type, + &argument_alias, + query_alias, + ); + + Some(( + argument_alias.to_owned(), + models::ArgumentInfo { + description: None, + argument_type: type_definition.type_identifier(), + }, + )) + } + }) + .collect(); + + models::CollectionInfo { + name: query_alias.to_owned(), + description: query_config.comment.to_owned(), + arguments, + collection_type: match &query_config.return_type { + ParameterizedQueryReturnType::TableReference { + table_alias: target_alias, + } + | ParameterizedQueryReturnType::QueryReference { + query_alias: target_alias, + } => target_alias.to_owned(), + ParameterizedQueryReturnType::Custom { .. } => query_alias.to_owned(), + }, + uniqueness_constraints: BTreeMap::new(), + foreign_keys: BTreeMap::new(), + } + }); + + let collections = table_collections.chain(query_collections).collect(); Ok(models::SchemaResponse { scalar_types: scalar_type_definitions, diff --git a/crates/ndc-clickhouse/src/schema/type_definition.rs b/crates/ndc-clickhouse/src/schema/type_definition.rs index 31efb0e..2e622ce 100644 --- a/crates/ndc-clickhouse/src/schema/type_definition.rs +++ b/crates/ndc-clickhouse/src/schema/type_definition.rs @@ -1,6 +1,9 @@ use std::collections::BTreeMap; -use common::clickhouse_datatype::{ClickHouseDataType, Identifier, SingleQuotedString}; +use common::{ + clickhouse_parser::datatype::{ClickHouseDataType, Identifier, SingleQuotedString}, + config::ParameterizedQueryReturnType, +}; use ndc_sdk::models; use super::{ClickHouseBinaryComparisonOperator, ClickHouseSingleColumnAggregateFunction}; @@ -455,6 +458,22 @@ impl ClickHouseTypeDefinition { let namespace = format!("{table_alias}.{column_alias}"); Self::new(data_type, &namespace) } + pub fn from_query_return_type( + data_type: &ClickHouseDataType, + field_alias: &str, + query_alias: &str, + ) -> Self { + let namespace = format!("{query_alias}.{field_alias}"); + Self::new(data_type, &namespace) + } + pub fn from_query_argument( + data_type: &ClickHouseDataType, + argument_alias: &str, + query_alias: &str, + ) -> Self { + let namespace = format!("{query_alias}.arg.{argument_alias}"); + Self::new(data_type, &namespace) + } fn new(data_type: &ClickHouseDataType, namespace: &str) -> Self { match data_type { ClickHouseDataType::Nullable(inner) => Self::Nullable { diff --git a/crates/ndc-clickhouse/src/sql/ast.rs b/crates/ndc-clickhouse/src/sql/ast.rs index 3fff521..6fcbef5 100644 --- a/crates/ndc-clickhouse/src/sql/ast.rs +++ b/crates/ndc-clickhouse/src/sql/ast.rs @@ -2,7 +2,7 @@ use std::fmt; mod parameter_extractor; -use common::clickhouse_datatype::ClickHouseDataType; +use common::clickhouse_parser::datatype::ClickHouseDataType; use parameter_extractor::ParameterExtractor; //.A statement containing placeholders where parameters used to be diff --git a/crates/ndc-clickhouse/src/sql/query_builder.rs b/crates/ndc-clickhouse/src/sql/query_builder.rs index e63866c..f68f4c4 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder.rs @@ -1,6 +1,6 @@ use std::str::FromStr; -use common::{clickhouse_datatype::ClickHouseDataType, config::ServerConfig}; +use common::{clickhouse_parser::datatype::ClickHouseDataType, config::ServerConfig}; use indexmap::IndexMap; use ndc_sdk::models; diff --git a/crates/ndc-clickhouse/src/sql/query_builder/comparison_column.rs b/crates/ndc-clickhouse/src/sql/query_builder/comparison_column.rs index b496fa7..d524305 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder/comparison_column.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder/comparison_column.rs @@ -1,4 +1,4 @@ -use common::clickhouse_datatype::ClickHouseDataType; +use common::clickhouse_parser::datatype::ClickHouseDataType; use crate::sql::ast::{Expr, Function, Ident, Join, Lambda}; diff --git a/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs b/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs index 30a89a9..58e3ee5 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs @@ -1,7 +1,7 @@ use std::{collections::BTreeMap, fmt::Display, str::FromStr}; use common::{ - clickhouse_datatype::ClickHouseDataType, + clickhouse_parser::datatype::ClickHouseDataType, config::{ColumnConfig, ServerConfig}, }; use indexmap::IndexMap; From 24c6068a4ca7fd9131ed98eb56ce5d4a09a886d2 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 2 Apr 2024 07:19:14 -0400 Subject: [PATCH 19/28] add changelog entries --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2be85c4..28726aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Return error if empty list of query variables passed. Variables should be ommited or be a list with at least one member - Use table comment as description for corresponding collection and object type - Return json representation for applicable scalar types in schema response +- Add `configuration.schema.json` to generated configuration directory +- Bump ndc-spec dependency to 0.1.1 +- Config breaking change: use maps for tables and columns list, rather than arrays. This should help avoid duplicate alias issues +- Move parsing column data types into configuration time and startup time, instead of query execution time. This should give error feedback earlier ## [0.2.1] From f271be77c3b763eb8c879d5cbffebf7f106ad414 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 2 Apr 2024 07:19:48 -0400 Subject: [PATCH 20/28] update workspace --- .gitignore | 2 +- Cargo.lock | 258 ++++++++++------------------------------------------- Cargo.toml | 3 +- 3 files changed, 47 insertions(+), 216 deletions(-) diff --git a/.gitignore b/.gitignore index a8ae2fd..0cad7ed 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ /target /.idea .env -config \ No newline at end of file +/config \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 31522b2..e28e86e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -119,9 +119,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.78" +version = "0.1.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "461abc97219de0eaaf81fe3ef974a540158f3d079c2ab200f891f1a2ef201e85" +checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" dependencies = [ "proc-macro2", "quote", @@ -251,9 +251,9 @@ checksum = "7ff69b9dd49fd426c69a0db9fc04dd934cdb6645ff000864d98f7e2af8830eaa" [[package]] name = "bytes" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" [[package]] name = "cc" @@ -282,9 +282,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.3" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "949626d00e063efc93b6dca932419ceb5432f99769911c0b995f7e884c778813" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" dependencies = [ "clap_builder", "clap_derive", @@ -304,9 +304,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.3" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "90239a040c80f5e14809ca132ddc4176ab33d5e17e49691793296e3fcb34d72f" +checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" dependencies = [ "heck 0.5.0", "proc-macro2", @@ -320,16 +320,6 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "98cc8fbded0c607b7ba9dd60cd98df59af97e84d24e49c8557331cfc26d301ce" -[[package]] -name = "client" -version = "0.2.1" -dependencies = [ - "config", - "reqwest", - "serde", - "serde_json", -] - [[package]] name = "colorchoice" version = "1.0.0" @@ -337,21 +327,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7" [[package]] -name = "colored" -version = "2.1.0" +name = "colorful" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cbf2150cce219b664a8a70df7a1f933836724b503f8a413af9365b4dcc4d90b8" -dependencies = [ - "lazy_static", - "windows-sys 0.48.0", -] +checksum = "97af0562545a7d7f3d9222fcf909963bec36dcb502afaacab98c6ffac8da47ce" [[package]] -name = "config" +name = "common" version = "0.2.1" dependencies = [ + "peg", + "reqwest", "schemars", "serde", + "serde_json", ] [[package]] @@ -370,15 +359,6 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" -[[package]] -name = "crc32fast" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" -dependencies = [ - "cfg-if", -] - [[package]] name = "crossbeam-channel" version = "0.5.12" @@ -482,16 +462,6 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25cbce373ec4653f1a01a31e8a5e5ec0c622dc27ff9c4e6606eefef5cbbed4a5" -[[package]] -name = "flate2" -version = "1.0.28" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46303f565772937ffe1d394a4fac6f411c6013172fadde9dcdb1e147a086940e" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - [[package]] name = "fnv" version = "1.0.7" @@ -745,9 +715,9 @@ dependencies = [ "futures-util", "http", "hyper", - "rustls 0.21.10", + "rustls", "tokio", - "tokio-rustls 0.24.1", + "tokio-rustls", ] [[package]] @@ -980,11 +950,9 @@ name = "ndc-clickhouse" version = "0.2.1" dependencies = [ "async-trait", - "client", - "config", + "common", "indexmap 2.2.5", "ndc-sdk", - "peg", "prometheus", "reqwest", "serde", @@ -998,8 +966,8 @@ name = "ndc-clickhouse-cli" version = "0.2.1" dependencies = [ "clap", - "client", - "config", + "common", + "schemars", "serde", "serde_json", "tokio", @@ -1007,12 +975,12 @@ dependencies = [ [[package]] name = "ndc-client" -version = "0.1.0" -source = "git+http://github.com/hasura/ndc-spec.git?tag=v0.1.0#8892f0524affd37e94097c2ce43da8740fc57aca" +version = "0.1.1" +source = "git+http://github.com/hasura/ndc-spec.git?tag=v0.1.1#17c61946cc9a3ff6dcee1d535af33141213b639a" dependencies = [ "async-trait", "indexmap 2.2.5", - "opentelemetry 0.20.0", + "opentelemetry", "reqwest", "schemars", "serde", @@ -1025,7 +993,7 @@ dependencies = [ [[package]] name = "ndc-sdk" version = "0.1.0" -source = "git+https://github.com/hasura/ndc-hub.git?rev=4c31e8b#4c31e8b5872ccf879b82e6d4f0cbcbbf4793920e" +source = "git+https://github.com/hasura/ndc-sdk-rs?rev=7b56fac#7b56fac3aba2bc6533d3163111377fd5fbeb3011" dependencies = [ "async-trait", "axum", @@ -1036,11 +1004,11 @@ dependencies = [ "mime", "ndc-client", "ndc-test", - "opentelemetry 0.22.0", + "opentelemetry", "opentelemetry-http", "opentelemetry-otlp", "opentelemetry-semantic-conventions", - "opentelemetry_sdk 0.22.1", + "opentelemetry_sdk", "prometheus", "reqwest", "serde", @@ -1056,12 +1024,12 @@ dependencies = [ [[package]] name = "ndc-test" -version = "0.1.0" -source = "git+http://github.com/hasura/ndc-spec.git?tag=v0.1.0#8892f0524affd37e94097c2ce43da8740fc57aca" +version = "0.1.1" +source = "git+http://github.com/hasura/ndc-spec.git?tag=v0.1.1#17c61946cc9a3ff6dcee1d535af33141213b639a" dependencies = [ "async-trait", "clap", - "colored", + "colorful", "indexmap 2.2.5", "ndc-client", "rand", @@ -1167,16 +1135,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "opentelemetry" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9591d937bc0e6d2feb6f71a559540ab300ea49955229c347a517a28d27784c54" -dependencies = [ - "opentelemetry_api", - "opentelemetry_sdk 0.20.0", -] - [[package]] name = "opentelemetry" version = "0.22.0" @@ -1201,7 +1159,7 @@ dependencies = [ "async-trait", "bytes", "http", - "opentelemetry 0.22.0", + "opentelemetry", "reqwest", ] @@ -1214,11 +1172,11 @@ dependencies = [ "async-trait", "futures-core", "http", - "opentelemetry 0.22.0", + "opentelemetry", "opentelemetry-http", "opentelemetry-proto", "opentelemetry-semantic-conventions", - "opentelemetry_sdk 0.22.1", + "opentelemetry_sdk", "prost", "reqwest", "thiserror", @@ -1232,8 +1190,8 @@ version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a8fddc9b68f5b80dae9d6f510b88e02396f006ad48cac349411fbecc80caae4" dependencies = [ - "opentelemetry 0.22.0", - "opentelemetry_sdk 0.22.1", + "opentelemetry", + "opentelemetry_sdk", "prost", "tonic", ] @@ -1244,41 +1202,6 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9ab5bd6c42fb9349dcf28af2ba9a0667f697f9bdcca045d39f2cec5543e2910" -[[package]] -name = "opentelemetry_api" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a81f725323db1b1206ca3da8bb19874bbd3f57c3bcd59471bfb04525b265b9b" -dependencies = [ - "futures-channel", - "futures-util", - "indexmap 1.9.3", - "js-sys", - "once_cell", - "pin-project-lite", - "thiserror", - "urlencoding", -] - -[[package]] -name = "opentelemetry_sdk" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa8e705a0612d48139799fcbaba0d4a90f06277153e43dd2bdc16c6f0edd8026" -dependencies = [ - "async-trait", - "crossbeam-channel", - "futures-channel", - "futures-executor", - "futures-util", - "once_cell", - "opentelemetry_api", - "ordered-float 3.9.2", - "percent-encoding", - "rand", - "thiserror", -] - [[package]] name = "opentelemetry_sdk" version = "0.22.1" @@ -1292,8 +1215,8 @@ dependencies = [ "futures-util", "glob", "once_cell", - "opentelemetry 0.22.0", - "ordered-float 4.2.0", + "opentelemetry", + "ordered-float", "percent-encoding", "rand", "thiserror", @@ -1301,15 +1224,6 @@ dependencies = [ "tokio-stream", ] -[[package]] -name = "ordered-float" -version = "3.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1e1c390732d15f1d48471625cd92d154e66db2c56645e29a9cd26f4699f72dc" -dependencies = [ - "num-traits", -] - [[package]] name = "ordered-float" version = "4.2.0" @@ -1602,8 +1516,8 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.21.10", - "rustls-pemfile 1.0.4", + "rustls", + "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", @@ -1611,7 +1525,7 @@ dependencies = [ "system-configuration", "tokio", "tokio-native-tls", - "tokio-rustls 0.24.1", + "tokio-rustls", "tower-service", "url", "wasm-bindgen", @@ -1663,37 +1577,10 @@ checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" dependencies = [ "log", "ring", - "rustls-webpki 0.101.7", + "rustls-webpki", "sct", ] -[[package]] -name = "rustls" -version = "0.22.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41" -dependencies = [ - "log", - "ring", - "rustls-pki-types", - "rustls-webpki 0.102.2", - "subtle", - "zeroize", -] - -[[package]] -name = "rustls-native-certs" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f1fb85efa936c42c6d5fc28d2629bb51e4b2f4b8a5211e297d599cc5a093792" -dependencies = [ - "openssl-probe", - "rustls-pemfile 2.1.1", - "rustls-pki-types", - "schannel", - "security-framework", -] - [[package]] name = "rustls-pemfile" version = "1.0.4" @@ -1703,22 +1590,6 @@ dependencies = [ "base64 0.21.7", ] -[[package]] -name = "rustls-pemfile" -version = "2.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f48172685e6ff52a556baa527774f61fcaa884f59daf3375c62a3f1cd2549dab" -dependencies = [ - "base64 0.21.7", - "rustls-pki-types", -] - -[[package]] -name = "rustls-pki-types" -version = "1.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "868e20fada228fefaf6b652e00cc73623d54f8171e7352c18bb281571f2d92da" - [[package]] name = "rustls-webpki" version = "0.101.7" @@ -1729,17 +1600,6 @@ dependencies = [ "untrusted", ] -[[package]] -name = "rustls-webpki" -version = "0.102.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610" -dependencies = [ - "ring", - "rustls-pki-types", - "untrusted", -] - [[package]] name = "rustversion" version = "1.0.14" @@ -1866,9 +1726,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.114" +version = "1.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" +checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" dependencies = [ "indexmap 2.2.5", "itoa", @@ -2018,12 +1878,6 @@ dependencies = [ "syn 2.0.53", ] -[[package]] -name = "subtle" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" - [[package]] name = "syn" version = "1.0.109" @@ -2217,18 +2071,7 @@ version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls 0.21.10", - "tokio", -] - -[[package]] -name = "tokio-rustls" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" -dependencies = [ - "rustls 0.22.2", - "rustls-pki-types", + "rustls", "tokio", ] @@ -2268,7 +2111,6 @@ dependencies = [ "axum", "base64 0.21.7", "bytes", - "flate2", "h2", "http", "http-body", @@ -2277,11 +2119,7 @@ dependencies = [ "percent-encoding", "pin-project", "prost", - "rustls-native-certs", - "rustls-pemfile 2.1.1", - "rustls-pki-types", "tokio", - "tokio-rustls 0.25.0", "tokio-stream", "tower", "tower-layer", @@ -2393,8 +2231,8 @@ checksum = "a9be14ba1bbe4ab79e9229f7f89fab8d120b865859f10527f31c033e599d2284" dependencies = [ "js-sys", "once_cell", - "opentelemetry 0.22.0", - "opentelemetry_sdk 0.22.1", + "opentelemetry", + "opentelemetry_sdk", "smallvec", "tracing", "tracing-core", @@ -2794,9 +2632,3 @@ dependencies = [ "cfg-if", "windows-sys 0.48.0", ] - -[[package]] -name = "zeroize" -version = "1.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" diff --git a/Cargo.toml b/Cargo.toml index 3eddecc..0557072 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,8 +2,7 @@ members = [ "crates/ndc-clickhouse", "crates/ndc-clickhouse-cli", - "crates/config", - "crates/client", + "crates/common", ] resolver = "2" From 054fa8ad612e705c1cfc7ee31ac9659d221dfbae Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Sun, 7 Apr 2024 08:28:50 -0400 Subject: [PATCH 21/28] gave up on small commits sorry... native queries and parameterized views now working --- Cargo.lock | 45 +- crates/common/src/clickhouse_parser.rs | 37 +- .../clickhouse_parser/parameterized_query.rs | 8 +- crates/common/src/config.rs | 122 +---- crates/common/src/config_file.rs | 103 ++++ crates/common/src/lib.rs | 1 + .../src/database_introspection.rs | 24 +- .../src/database_introspection.sql | 5 +- crates/ndc-clickhouse-cli/src/main.rs | 407 +++++++++------ crates/ndc-clickhouse/Cargo.toml | 1 + crates/ndc-clickhouse/src/connector.rs | 333 ++++++++----- .../src/connector/handler/explain.rs | 23 +- .../src/connector/handler/schema.rs | 96 ++-- .../src/schema/type_definition.rs | 5 +- crates/ndc-clickhouse/src/sql/ast.rs | 142 +++++- .../src/sql/ast/parameter_extractor.rs | 18 +- .../ndc-clickhouse/src/sql/query_builder.rs | 466 +++++++++++++----- .../sql/query_builder/collection_context.rs | 78 +++ .../src/sql/query_builder/error.rs | 22 + .../src/sql/query_builder/typecasting.rs | 55 ++- 20 files changed, 1368 insertions(+), 623 deletions(-) create mode 100644 crates/common/src/config_file.rs create mode 100644 crates/ndc-clickhouse/src/sql/query_builder/collection_context.rs diff --git a/Cargo.lock b/Cargo.lock index e28e86e..c0ec813 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -821,6 +821,15 @@ dependencies = [ "either", ] +[[package]] +name = "itertools" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.10" @@ -907,6 +916,12 @@ dependencies = [ "unicase", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + [[package]] name = "miniz_oxide" version = "0.7.2" @@ -957,6 +972,7 @@ dependencies = [ "reqwest", "serde", "serde_json", + "sqlformat", "strum", "tokio", ] @@ -1041,6 +1057,16 @@ dependencies = [ "tokio", ] +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -1386,7 +1412,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "efb6c9a1dd1def8e2124d17e83a20af56f1570d6c2d2bd9e266ccb768df3840e" dependencies = [ "anyhow", - "itertools", + "itertools 0.10.5", "proc-macro2", "quote", "syn 2.0.53", @@ -1844,6 +1870,17 @@ version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67" +[[package]] +name = "sqlformat" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ce81b7bd7c4493975347ef60d8c7e8b742d4694f4c49f93e0a12ea263938176c" +dependencies = [ + "itertools 0.12.1", + "nom", + "unicode_categories", +] + [[package]] name = "strsim" version = "0.10.0" @@ -2306,6 +2343,12 @@ dependencies = [ "tinyvec", ] +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "untrusted" version = "0.9.0" diff --git a/crates/common/src/clickhouse_parser.rs b/crates/common/src/clickhouse_parser.rs index 444c805..8350d42 100644 --- a/crates/common/src/clickhouse_parser.rs +++ b/crates/common/src/clickhouse_parser.rs @@ -12,7 +12,9 @@ use self::parameterized_query::{ peg::parser! { grammar clickhouse_parser() for str { pub rule parameterized_query() -> ParameterizedQuery = elements:parameterized_query_element()* statement_end()? { ParameterizedQuery { elements } } - rule parameterized_query_element() -> ParameterizedQueryElement = p:parameter() { ParameterizedQueryElement::Parameter(p)} / s:$((!parameter() !statement_end() [_])+) { ParameterizedQueryElement::String(s.to_string()) } + // single quoted strings, or anything that doesn't match a statement end or a parameter, is a part of the string + // this prevents matching on parameters insides of single quotes, if for some reason we have sql like that? + rule parameterized_query_element() -> ParameterizedQueryElement = s:$((single_quoted_string_value() / !parameter() !statement_end() [_])+) { ParameterizedQueryElement::String(s.to_string()) } / p:parameter() { ParameterizedQueryElement::Parameter(p)} rule parameter() -> Parameter = p:("{" _ name:identifier() _ ":" _ t:parameter_type() _ "}" { Parameter { name, r#type: t }}) rule parameter_type() -> ParameterType = d:data_type() { ParameterType::DataType(d) } / "Identifier" { ParameterType::Identifier } rule statement_end() = _ ";" _ @@ -215,3 +217,36 @@ fn can_parse_parameterized_query() { let parsed = clickhouse_parser::parameterized_query(&query); assert_eq!(parsed, Ok(expected), "can parse parameterized query"); } + +#[test] +fn can_parse_empty_parameterized_query() { + let query = r#""#; + let expected = ParameterizedQuery { elements: vec![] }; + let parsed = clickhouse_parser::parameterized_query(&query); + assert_eq!(parsed, Ok(expected), "can parse parameterized query"); +} + +#[test] +fn does_not_parse_parameters_insides_quoted_strings() { + let query = r#" + SELECT Name + FROM "default"."Artist" + WHERE ArtistId = {ArtistId:Int32} AND Name = '{ArtistName: String}'; + +"#; + let expected = ParameterizedQuery { + elements: vec![ + ParameterizedQueryElement::String( + "\n SELECT Name\n FROM \"default\".\"Artist\"\n WHERE ArtistId = " + .to_string(), + ), + ParameterizedQueryElement::Parameter(Parameter { + name: Identifier::Unquoted("ArtistId".to_string()), + r#type: ParameterType::DataType(DT::Int32), + }), + ParameterizedQueryElement::String(" AND Name = '{ArtistName: String}'".to_string()), + ], + }; + let parsed = clickhouse_parser::parameterized_query(&query); + assert_eq!(parsed, Ok(expected), "can parse parameterized query"); +} diff --git a/crates/common/src/clickhouse_parser/parameterized_query.rs b/crates/common/src/clickhouse_parser/parameterized_query.rs index 8744ed8..e373a73 100644 --- a/crates/common/src/clickhouse_parser/parameterized_query.rs +++ b/crates/common/src/clickhouse_parser/parameterized_query.rs @@ -5,7 +5,7 @@ use super::{ datatype::{ClickHouseDataType, Identifier}, }; -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, Default, PartialEq)] pub struct ParameterizedQuery { pub elements: Vec, } @@ -37,6 +37,12 @@ pub enum ParameterType { Identifier, } +impl From for ParameterType { + fn from(value: ClickHouseDataType) -> Self { + Self::DataType(value) + } +} + impl Display for ParameterType { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { diff --git a/crates/common/src/config.rs b/crates/common/src/config.rs index 3efda0e..6d1677d 100644 --- a/crates/common/src/config.rs +++ b/crates/common/src/config.rs @@ -1,126 +1,45 @@ use std::collections::BTreeMap; -use schemars::JsonSchema; -use serde::{Deserialize, Serialize}; - -use crate::clickhouse_parser::{ - datatype::ClickHouseDataType, - parameterized_query::{ParameterType, ParameterizedQuery}, +use crate::{ + clickhouse_parser::{datatype::ClickHouseDataType, parameterized_query::ParameterizedQuery}, + config_file::{ParameterizedQueryExposedAs, PrimaryKey}, }; -#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] -pub struct ServerConfigFile { - #[serde(rename = "$schema")] - pub schema: String, - /// A list of tables available in this database - /// - /// The map key is a unique table alias that defaults to defaults to "_", - /// except for tables in the "default" schema where the table name is used - /// This is the name exposed to the engine, and may be configured by users. - /// When the configuration is updated, the table is identified by name and schema, and changes to the alias are preserved. - #[serde(skip_serializing_if = "Option::is_none")] - pub tables: Option>>, - /// Optionally define custom parameterized queries here - /// Note the names must not match table names - #[serde(skip_serializing_if = "Option::is_none")] - pub queries: Option>, -} - #[derive(Debug, Clone)] +/// In memory, runtime configuration, built from the configuration file(s) and environment variables pub struct ServerConfig { /// the connection part of the config is not part of the config file pub connection: ConnectionConfig, - pub tables: BTreeMap>, + pub table_types: BTreeMap, + pub tables: BTreeMap, pub queries: BTreeMap, } -#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] +#[derive(Debug, Clone)] +pub struct TableType { + pub comment: Option, + pub columns: BTreeMap, +} + +#[derive(Debug, Default, Clone)] pub struct ConnectionConfig { pub username: String, pub password: String, pub url: String, } -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct TableConfig { +#[derive(Debug, Clone)] +pub struct TableConfig { /// The table name pub name: String, /// The table schema pub schema: String, /// Comments are sourced from the database table comment - #[serde(skip_serializing_if = "Option::is_none")] pub comment: Option, - #[serde(skip_serializing_if = "Option::is_none")] pub primary_key: Option, - /// The map key is a column alias identifying the table and may be customized. - /// It defaults to the table name. - /// When the configuration is updated, the column is identified by name, and changes to the alias are preserved. - pub columns: BTreeMap>, -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct PrimaryKey { - pub name: String, - /// The names of columns in this primary key - pub columns: Vec, -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct ColumnConfig { - /// The column name - pub name: String, - /// The column data type - pub data_type: ColumnDataType, -} - -#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] -pub struct ParameterizedQueryConfigFile { - /// Whether this query should be exposed as a procedure (mutating) or collection (non-mutating) - pub exposed_as: ParameterizedQueryExposedAs, - /// A comment that will be exposed in the schema - #[serde(skip_serializing_if = "Option::is_none")] - pub comment: Option, - /// A relative path to a sql file - pub file: String, - /// Either a type definition for the return type for this query, - /// or a reference to another return type: either a table's alias, - /// or another query's alias. If another query, that query must have a return type definition. - pub return_type: ParameterizedQueryReturnType, -} - -#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub enum ParameterizedQueryExposedAs { - #[default] - Collection, - Procedure, -} - -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -#[serde(tag = "definition", rename_all = "snake_case")] -pub enum ParameterizedQueryReturnType { - /// the same as the return type for a known table - TableReference { - /// the table alias must match a key in `tables`, and the query must return the same type as that table - /// alternatively, the alias may reference another parameterized query which has a return type definition, - table_alias: String, - }, - /// The same as the return type for another query that has a return type definition - QueryReference { - /// the table alias must match a key in `tables`, and the query must return the same type as that table - /// alternatively, the alias may reference another parameterized query which has a return type definition, - query_alias: String, - }, - /// A custom return type definition to associate with this query - Custom { fields: BTreeMap }, -} - -impl Default for ParameterizedQueryReturnType { - fn default() -> Self { - Self::Custom { - fields: BTreeMap::default(), - } - } + pub arguments: BTreeMap, + // this key coresponds to a return type definition in the config table types + pub return_type: ReturnTypeRef, } #[derive(Debug, Clone)] @@ -128,8 +47,7 @@ pub struct ParameterizedQueryConfig { pub exposed_as: ParameterizedQueryExposedAs, pub comment: Option, pub query: ParameterizedQuery, - pub return_type: ParameterizedQueryReturnType, + pub return_type: ReturnTypeRef, } -pub const CONFIG_FILE_NAME: &str = "configuration.json"; -pub const CONFIG_SCHEMA_FILE_NAME: &str = "configuration.schema.json"; +type ReturnTypeRef = String; diff --git a/crates/common/src/config_file.rs b/crates/common/src/config_file.rs new file mode 100644 index 0000000..3d5d634 --- /dev/null +++ b/crates/common/src/config_file.rs @@ -0,0 +1,103 @@ +use std::collections::BTreeMap; + +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] +/// the main configuration file +pub struct ServerConfigFile { + #[serde(rename = "$schema")] + pub schema: String, + /// A list of tables available in this database + /// + /// The map key is a unique table alias that defaults to defaults to "_", + /// except for tables in the "default" schema where the table name is used + /// This is the name exposed to the engine, and may be configured by users. + /// When the configuration is updated, the table is identified by name and schema, and changes to the alias are preserved. + #[serde(skip_serializing_if = "BTreeMap::is_empty", default)] + pub tables: BTreeMap, + /// Optionally define custom parameterized queries here + /// Note the names must not match table names + #[serde(skip_serializing_if = "BTreeMap::is_empty", default)] + pub queries: BTreeMap, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct TableConfigFile { + /// The table name + pub name: String, + /// The table schema + pub schema: String, + /// Comments are sourced from the database table comment + #[serde(skip_serializing_if = "Option::is_none")] + pub comment: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub primary_key: Option, + #[serde(skip_serializing_if = "BTreeMap::is_empty", default)] + pub arguments: BTreeMap, + /// The map key is a column alias identifying the table and may be customized. + /// It defaults to the table name. + /// When the configuration is updated, the column is identified by name, and changes to the alias are preserved. + pub return_type: ReturnType, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +pub struct PrimaryKey { + pub name: String, + /// The names of columns in this primary key + pub columns: Vec, +} + +#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum ReturnType { + /// A custom return type definition + /// The keys are column names, the values are parsable clichouse datatypes + Definition { columns: BTreeMap }, + /// the same as the return type for another table + TableReference { + /// the table alias must match a key in `tables`, and the query must return the same type as that table + /// alternatively, the alias may reference another parameterized query which has a return type definition, + table_name: String, + }, + /// The same as the return type for another query + QueryReference { + /// the table alias must match a key in `tables`, and the query must return the same type as that table + /// alternatively, the alias may reference another parameterized query which has a return type definition, + query_name: String, + }, +} + +impl Default for ReturnType { + fn default() -> Self { + Self::Definition { + columns: BTreeMap::new(), + } + } +} + +#[derive(Debug, Default, Clone, Serialize, Deserialize, JsonSchema)] +pub struct ParameterizedQueryConfigFile { + /// Whether this query should be exposed as a procedure (mutating) or collection (non-mutating) + pub exposed_as: ParameterizedQueryExposedAs, + /// A comment that will be exposed in the schema + #[serde(skip_serializing_if = "Option::is_none")] + pub comment: Option, + /// A relative path to a sql file + pub file: String, + /// Either a type definition for the return type for this query, + /// or a reference to another return type: either a table's alias, + /// or another query's alias. If another query, that query must have a return type definition. + pub return_type: ReturnType, +} + +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum ParameterizedQueryExposedAs { + #[default] + Collection, + Procedure, +} + +pub const CONFIG_FILE_NAME: &str = "configuration.json"; +pub const CONFIG_SCHEMA_FILE_NAME: &str = "configuration.schema.json"; diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index bc1fa4d..712db38 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -1,3 +1,4 @@ pub mod clickhouse_parser; pub mod client; pub mod config; +pub mod config_file; diff --git a/crates/ndc-clickhouse-cli/src/database_introspection.rs b/crates/ndc-clickhouse-cli/src/database_introspection.rs index 8f71aa2..4ef71be 100644 --- a/crates/ndc-clickhouse-cli/src/database_introspection.rs +++ b/crates/ndc-clickhouse-cli/src/database_introspection.rs @@ -1,12 +1,13 @@ use std::error::Error; -use serde::{Deserialize, Serialize}; +use serde::Deserialize; -use common::client::{execute_query, get_http_client}; +use common::{ + client::{execute_query, get_http_client}, + config::ConnectionConfig, +}; -use super::ConnectionConfig; - -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Deserialize)] pub struct TableInfo { pub table_name: String, pub table_schema: String, @@ -14,10 +15,11 @@ pub struct TableInfo { pub table_comment: Option, pub table_type: TableType, pub primary_key: Option, + pub view_definition: String, pub columns: Vec, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Deserialize)] pub struct ColumnInfo { pub column_name: String, pub data_type: String, @@ -25,11 +27,15 @@ pub struct ColumnInfo { pub is_in_primary_key: bool, } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Debug, Deserialize)] pub enum TableType { - #[serde(rename = "BASE TABLE")] + #[serde( + rename = "BASE TABLE", + alias = "FOREIGN TABLE", + alias = "LOCAL TEMPORARY" + )] Table, - #[serde(rename = "VIEW")] + #[serde(rename = "VIEW", alias = "SYSTEM VIEW")] View, } diff --git a/crates/ndc-clickhouse-cli/src/database_introspection.sql b/crates/ndc-clickhouse-cli/src/database_introspection.sql index 68799d9..9d4d91d 100644 --- a/crates/ndc-clickhouse-cli/src/database_introspection.sql +++ b/crates/ndc-clickhouse-cli/src/database_introspection.sql @@ -3,12 +3,15 @@ SELECT t.table_name AS "table_name", t.table_catalog AS "table_catalog", t.table_comment AS "table_comment", if(empty(st.primary_key), null, st.primary_key) AS "primary_key", - toString(t.table_type) as "table_type", + toString(t.table_type) AS "table_type", + v.view_definition AS "view_definition", cast( c.columns, 'Array(Tuple(column_name String, data_type String, is_nullable Bool, is_in_primary_key Bool))' ) AS "columns" FROM INFORMATION_SCHEMA.TABLES AS t + LEFT JOIN INFORMATION_SCHEMA.VIEWS AS v ON v.table_schema = t.table_schema + AND v.table_name = t.table_name LEFT JOIN system.tables AS st ON st.database = t.table_schema AND st.name = t.table_name LEFT JOIN ( diff --git a/crates/ndc-clickhouse-cli/src/main.rs b/crates/ndc-clickhouse-cli/src/main.rs index 11eab4e..37f12c4 100644 --- a/crates/ndc-clickhouse-cli/src/main.rs +++ b/crates/ndc-clickhouse-cli/src/main.rs @@ -8,14 +8,17 @@ use std::{ use clap::{Parser, Subcommand, ValueEnum}; use common::{ - clickhouse_parser::{datatype::ClickHouseDataType, parameterized_query::ParameterizedQuery}, - config::{ - ColumnConfig, ConnectionConfig, ParameterizedQueryConfig, ParameterizedQueryConfigFile, - ParameterizedQueryReturnType, PrimaryKey, ServerConfigFile, TableConfig, CONFIG_FILE_NAME, - CONFIG_SCHEMA_FILE_NAME, + clickhouse_parser::{ + datatype::ClickHouseDataType, + parameterized_query::{Parameter, ParameterizedQuery, ParameterizedQueryElement}, + }, + config::ConnectionConfig, + config_file::{ + self, ParameterizedQueryConfigFile, PrimaryKey, ReturnType, ServerConfigFile, + TableConfigFile, CONFIG_FILE_NAME, CONFIG_SCHEMA_FILE_NAME, }, }; -use database_introspection::{introspect_database, ColumnInfo, TableInfo}; +use database_introspection::{introspect_database, TableInfo}; use schemars::schema_for; use tokio::fs; mod database_introspection; @@ -145,7 +148,7 @@ pub async fn update_tables_config( let old_config = match fs::read_to_string(&file_path).await { Ok(file) => serde_json::from_str(&file) .map_err(|err| format!("Error parsing {CONFIG_FILE_NAME}: {err}\n\nDelete {CONFIG_FILE_NAME} to create a fresh file")), - Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(ServerConfigFile::default()), + Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(config_file::ServerConfigFile::default()), Err(_) => Err(format!("Error reading {CONFIG_FILE_NAME}")), }?; @@ -155,7 +158,20 @@ pub async fn update_tables_config( let old_table_config = get_old_table_config(table, &old_config.tables); let table_alias = get_table_alias(table, &old_table_config); - let table_config = TableConfig { + let arguments = ParameterizedQuery::from_str(&table.view_definition) + // when unable to parse, default to empty arguments list + .unwrap_or_default() + .elements + .iter() + .filter_map(|element| match element { + ParameterizedQueryElement::String(_) => None, + ParameterizedQueryElement::Parameter(Parameter { name, r#type }) => { + Some((name.to_string(), r#type.to_string())) + } + }) + .collect(); + + let table_config = TableConfigFile { name: table.table_name.to_owned(), schema: table.table_schema.to_owned(), comment: table.table_comment.to_owned(), @@ -166,33 +182,20 @@ pub async fn update_tables_config( .iter() .filter_map(|column| { if column.is_in_primary_key { - Some(get_column_alias( - column, - &get_old_column_config(column, &old_table_config), - )) + Some(column.column_name.to_owned()) } else { None } }) .collect(), }), - columns: table - .columns - .iter() - .map(|column| { - let column_alias = get_column_alias( - column, - &get_old_column_config(column, &old_table_config), - ); - // check if data type can be parsed, to give early warning to the user - // this is preferable to failing later while handling requests - let column_config = ColumnConfig { - name: column.column_name.to_owned(), - data_type: column.data_type.to_owned(), - }; - (column_alias, column_config) - }) - .collect(), + arguments, + return_type: get_table_return_type( + table, + &old_table_config, + &old_config, + &table_infos, + ), }; (table_alias, table_config) @@ -201,7 +204,7 @@ pub async fn update_tables_config( let config = ServerConfigFile { schema: CONFIG_SCHEMA_FILE_NAME.to_owned(), - tables: Some(tables), + tables: tables, queries: old_config.queries.to_owned(), }; let config_schema = schema_for!(ServerConfigFile); @@ -215,107 +218,157 @@ pub async fn update_tables_config( // validate after writing out the updated metadata. This should help users understand what the problem is // check if some column types can't be parsed - if let Some(tables) = &config.tables { - for (table_alias, table_config) in tables { - for (column_alias, column_config) in &table_config.columns { - let _data_type = - ClickHouseDataType::from_str(&column_config.data_type).map_err(|err| { - format!( - "Unable to parse data type \"{}\" for column {} in table {}: {}", - column_config.data_type, column_alias, table_alias, err + for (table_alias, table_config) in &config.tables { + match &table_config.return_type { + ReturnType::TableReference { + table_name: target_table, + } => { + match config.tables.get(target_table) { + Some(TableConfigFile { + return_type: ReturnType::Definition { .. }, + .. + }) => { + // referencing a table that has a return type defintion we can use. all is well + } + Some(_) => { + return Err(format!( + "Invalid reference: table \"{table_alias}\" references table \"{target_table}\" which does not have a return type definition." + ) + .into()); + } + None => { + return Err(format!( + "Orphan reference: table \"{table_alias}\" references table \"{target_table}\" which cannot be found." + ) + .into()); + } + } + } + ReturnType::QueryReference { + query_name: target_query, + } => { + match config.queries.get(target_query) { + Some(ParameterizedQueryConfigFile { + return_type: ReturnType::Definition { .. }, + .. + }) => { + // referencing a query that has a return type definition we can use. all is well + } + Some(_) => { + return Err(format!( + "Invalid reference: table \"{table_alias}\" references query \"{target_query}\" which does not have a return type definition." ) - })?; + .into()); + } + None => { + return Err(format!( + "Orphan reference: table \"{table_alias}\" references query \"{target_query}\" which cannot be found." + ) + .into()); + } + } + } + ReturnType::Definition { columns } => { + for (column_alias, column_data_type) in columns { + let _data_type = + ClickHouseDataType::from_str(&column_data_type).map_err(|err| { + format!( + "Unable to parse data type \"{}\" for column {} in table {}: {}", + column_data_type, column_alias, table_alias, err + ) + })?; + } } } } - if let Some(queries) = &config.queries { - for (query_alias, query_config) in queries { - // check for duplicate alias - if config - .tables - .as_ref() - .and_then(|tables| tables.get(query_alias)) - .is_some() - { - return Err(format!( - "Name collision: query \"{query_alias}\" has the same name as a collection" - ) - .into()); - } + for (query_alias, query_config) in &config.queries { + // check for duplicate alias + if config.tables.contains_key(query_alias) { + return Err(format!( + "Name collision: query \"{query_alias}\" has the same name as a collection" + ) + .into()); + } - // if return type is a reference, check it exists and is valid: - match &query_config.return_type { - ParameterizedQueryReturnType::TableReference { table_alias } => { - if config - .tables - .as_ref() - .and_then(|tables| tables.get(table_alias)) - .is_none() - { - return Err(format!( - "Orphan reference: query \"{query_alias}\" references table \"{table_alias}\" which cannot be found." - ) - .into()); + // if return type is a reference, check it exists and is valid: + match &query_config.return_type { + ReturnType::TableReference { + table_name: target_table, + } => { + match config.tables.get(target_table) { + Some(TableConfigFile { + return_type: ReturnType::Definition { .. }, + .. + }) => { + // referencing a table that has a return type defintion we can use. all is well } - } - ParameterizedQueryReturnType::QueryReference { - query_alias: target_alias, - } => { - match config - .queries - .as_ref() - .and_then(|queries| queries.get(target_alias)) - { - Some(ParameterizedQueryConfigFile { - return_type: ParameterizedQueryReturnType::Custom { .. }, - .. - }) => { - // referencing a query that has a custom return type definition we can use. all is well - } - Some(_) => { - return Err(format!( - "Invalid reference: query \"{query_alias}\" references \"{target_alias}\" which does not have a return type definition." - ) - .into()); - } - None => { - return Err(format!( - "Orphan reference: query \"{query_alias}\" references query \"{target_alias}\" which cannot be found." + Some(_) => { + return Err(format!( + "Invalid reference: query \"{query_alias}\" references table \"{target_table}\" which does not have a return type definition." ) .into()); - } + } + None => { + return Err(format!( + "Orphan reference: query \"{query_alias}\" references table \"{target_table}\" which cannot be found." + ) + .into()); } } - ParameterizedQueryReturnType::Custom { fields } => { - for (field_alias, field_type) in fields { - let _data_type = - ClickHouseDataType::from_str(&field_type).map_err(|err| { - format!( - "Unable to parse data type \"{}\" for field {} in query {}: {}", - field_type, field_alias, query_alias, err - ) - })?; + } + ReturnType::QueryReference { + query_name: target_query, + } => { + match config.queries.get(target_query) { + Some(ParameterizedQueryConfigFile { + return_type: ReturnType::Definition { .. }, + .. + }) => { + // referencing a query that has a return type definition we can use. all is well + } + Some(_) => { + return Err(format!( + "Invalid reference: query \"{query_alias}\" references \"{target_query}\" which does not have a return type definition." + ) + .into()); + } + None => { + return Err(format!( + "Orphan reference: query \"{query_alias}\" references query \"{target_query}\" which cannot be found." + ) + .into()); } } } - - // validate that we can find the referenced sql file - let file_path = configuration_dir.as_ref().join(&query_config.file); - let file_content = fs::read_to_string(&file_path).await.map_err(|err| { - format!( - "Error reading {} for query {query_alias}: {err}", - query_config.file - ) - })?; - // validate that we can parse the reference sql file - let _query = ParameterizedQuery::from_str(&file_content).map_err(|err| { - format!( - "Unable to parse file {} for parameterized query {}: {}", - query_config.file, query_alias, err - ) - })?; + ReturnType::Definition { columns } => { + for (column_name, column_data_type) in columns { + let _data_type = + ClickHouseDataType::from_str(&column_data_type).map_err(|err| { + format!( + "Unable to parse data type \"{}\" for field {} in query {}: {}", + column_data_type, column_name, query_alias, err + ) + })?; + } + } } + + // validate that we can find the referenced sql file + let file_path = configuration_dir.as_ref().join(&query_config.file); + let file_content = fs::read_to_string(&file_path).await.map_err(|err| { + format!( + "Error reading {} for query {query_alias}: {err}", + query_config.file + ) + })?; + // validate that we can parse the reference sql file + let _query = ParameterizedQuery::from_str(&file_content).map_err(|err| { + format!( + "Unable to parse file {} for parameterized query {}: {}", + query_config.file, query_alias, err + ) + })?; } Ok(()) @@ -326,39 +379,17 @@ pub async fn update_tables_config( /// This allows custom aliases to be preserved fn get_old_table_config<'a>( table: &TableInfo, - old_tables: &'a Option>>, -) -> Option<(&'a String, &'a TableConfig)> { - old_tables.as_ref().and_then(|old_tables| { - old_tables.iter().find(|(_, old_table)| { - old_table.name == table.table_name && old_table.schema == table.table_schema - }) + old_tables: &'a BTreeMap, +) -> Option<(&'a String, &'a TableConfigFile)> { + old_tables.iter().find(|(_, old_table)| { + old_table.name == table.table_name && old_table.schema == table.table_schema }) } -/// Get old column config, if any -/// Note this uses the column name to search, not the alias -/// This allows custom aliases to be preserved -fn get_old_column_config<'a>( - column: &ColumnInfo, - old_table: &Option<(&'a String, &'a TableConfig)>, -) -> Option<(&'a String, &'a ColumnConfig)> { - old_table - .map(|(_, old_table)| { - old_table - .columns - .iter() - .find(|(_, old_column)| old_column.name == column.column_name) - }) - .flatten() -} - /// Table aliases default to _, /// except for tables in the default schema where the table name is used. /// Prefer existing, old aliases over creating a new one -fn get_table_alias( - table: &TableInfo, - old_table: &Option<(&String, &TableConfig)>, -) -> String { +fn get_table_alias(table: &TableInfo, old_table: &Option<(&String, &TableConfigFile)>) -> String { // to preserve any customization, aliases are kept throught updates if let Some((old_table_alias, _)) = old_table { old_table_alias.to_string() @@ -369,16 +400,82 @@ fn get_table_alias( } } -/// Table aliases default to the column namee -/// Prefer existing, old aliases over creating a new one -fn get_column_alias( - column: &ColumnInfo, - old_column: &Option<(&String, &ColumnConfig)>, -) -> String { - // to preserve any customization, aliases are kept throught updates - if let Some((old_column_alias, _)) = old_column { - old_column_alias.to_string() - } else { - column.column_name.to_owned() - } +/// Given table info, and optionally old table info, get the return type for this table +/// +/// If the old configuration's return type is a reference +/// to a table: check that table still exists, and that it returns the same type as this table +/// to a query: check that query still exists, and that it returns the same type as this table +fn get_table_return_type( + table: &TableInfo, + old_table: &Option<(&String, &TableConfigFile)>, + old_config: &ServerConfigFile, + introspection: &Vec, +) -> ReturnType { + let new_columns = get_return_type_columns(table); + + let old_return_type = + old_table.and_then( + |(_table_alias, table_config)| match &table_config.return_type { + ReturnType::Definition { .. } => None, + ReturnType::TableReference { table_name } => { + // get the old table config for the referenced table + let referenced_table_config = old_config.tables.get(table_name); + // get the new table info for the referenced table, if the referenced table's return type is a definition + let referenced_table_info = + referenced_table_config.and_then(|old_table| match old_table.return_type { + ReturnType::TableReference { .. } + | ReturnType::QueryReference { .. } => None, + ReturnType::Definition { .. } => { + introspection.iter().find(|table_info| { + table_info.table_schema == old_table.schema + && table_info.table_name == table_config.name + }) + } + }); + + // get the new return type for the referenced table + let referenced_table_columns = + referenced_table_info.map(get_return_type_columns); + + // preserve the reference if the return type for the referenced table matches this table + if referenced_table_columns.is_some_and(|r| r == new_columns) { + Some(ReturnType::TableReference { + table_name: table_name.to_owned(), + }) + } else { + None + } + } + // if the old config references a query, keep the it if it points to a query that returns the same type as we just introspected + ReturnType::QueryReference { query_name } => old_config + .queries + .get(query_name) + .and_then(|query| match &query.return_type { + ReturnType::TableReference { .. } | ReturnType::QueryReference { .. } => { + None + } + ReturnType::Definition { columns } => { + if columns == &new_columns { + Some(ReturnType::QueryReference { + query_name: query_name.to_owned(), + }) + } else { + None + } + } + }), + }, + ); + + old_return_type.unwrap_or_else(|| ReturnType::Definition { + columns: new_columns, + }) +} + +fn get_return_type_columns(table: &TableInfo) -> BTreeMap { + table + .columns + .iter() + .map(|column| (column.column_name.to_owned(), column.data_type.to_owned())) + .collect() } diff --git a/crates/ndc-clickhouse/Cargo.toml b/crates/ndc-clickhouse/Cargo.toml index 82dfd34..444b185 100644 --- a/crates/ndc-clickhouse/Cargo.toml +++ b/crates/ndc-clickhouse/Cargo.toml @@ -15,5 +15,6 @@ reqwest = { version = "0.11.27", features = [ ], default-features = false } serde = { version = "1.0.197", features = ["derive"] } serde_json = "1.0.114" +sqlformat = "0.2.3" strum = { version = "0.26.2", features = ["derive"] } tokio = "1.36.0" diff --git a/crates/ndc-clickhouse/src/connector.rs b/crates/ndc-clickhouse/src/connector.rs index b90299a..ab8fc71 100644 --- a/crates/ndc-clickhouse/src/connector.rs +++ b/crates/ndc-clickhouse/src/connector.rs @@ -1,7 +1,12 @@ pub mod handler; pub mod state; -use std::{collections::BTreeMap, env, path::Path, str::FromStr}; +use std::{ + collections::BTreeMap, + env, + path::{Path, PathBuf}, + str::FromStr, +}; use tokio::fs; use async_trait::async_trait; @@ -17,12 +22,10 @@ use ndc_sdk::{ use self::state::ServerState; use common::{ - clickhouse_parser::{ - self, datatype::ClickHouseDataType, parameterized_query::ParameterizedQuery, - }, - config::{ - ColumnConfig, ConnectionConfig, ParameterizedQueryConfig, ParameterizedQueryConfigFile, - ParameterizedQueryReturnType, ServerConfig, ServerConfigFile, TableConfig, + clickhouse_parser::{datatype::ClickHouseDataType, parameterized_query::ParameterizedQuery}, + config::{ConnectionConfig, ParameterizedQueryConfig, ServerConfig, TableConfig, TableType}, + config_file::{ + ParameterizedQueryConfigFile, ReturnType, ServerConfigFile, TableConfigFile, CONFIG_FILE_NAME, }, }; @@ -159,44 +162,89 @@ pub async fn read_server_config( }) })?; + let table_types = config + .tables + .iter() + .map(|(table_alias, table_config)| { + let table_type = validate_and_parse_return_type( + &table_config.return_type, + &config, + &file_path, + &["tables", &table_alias, "return_type"], + )? + .and_then(|columns| { + Some(( + table_alias.to_owned(), + TableType { + comment: table_config.comment.to_owned(), + columns, + }, + )) + }); + + Ok(table_type) + }) + .chain(config.queries.iter().map(|(query_alias, query_config)| { + let table_type = validate_and_parse_return_type( + &query_config.return_type, + &config, + &file_path, + &["query", &query_alias, "return_type"], + )? + .and_then(|columns| { + Some(( + query_alias.to_owned(), + TableType { + comment: query_config.comment.to_owned(), + columns, + }, + )) + }); + + Ok(table_type) + })) + .filter_map(|table_type| table_type.transpose()) + .collect::>()?; + let tables = config .tables - .unwrap_or_default() - .into_iter() + .iter() .map(|(table_alias, table_config)| { Ok(( table_alias.clone(), TableConfig { - name: table_config.name, - schema: table_config.schema, - comment: table_config.comment, - primary_key: table_config.primary_key, - columns: table_config - .columns - .into_iter() - .map(|(column_alias, column_config)| { - Ok(( - column_alias.clone(), - ColumnConfig { - name: column_config.name, - data_type: ClickHouseDataType::from_str( - &column_config.data_type, - ) - .map_err(|_err| { - ParseError::ValidateError(InvalidNodes(vec![InvalidNode { - file_path: file_path.to_owned(), - node_path: vec![ - KeyOrIndex::Key("tables".to_string()), - KeyOrIndex::Key(table_alias.to_owned()), - KeyOrIndex::Key("columns".to_string()), - KeyOrIndex::Key(column_alias.to_owned()), - KeyOrIndex::Key("data_type".to_string()), - ], - message: "Unable to parse data type".to_string(), - }])) - })?, - }, - )) + name: table_config.name.to_owned(), + schema: table_config.schema.to_owned(), + comment: table_config.comment.to_owned(), + primary_key: table_config.primary_key.to_owned(), + return_type: match &table_config.return_type { + ReturnType::Definition { .. } => table_alias.to_owned(), + ReturnType::TableReference { + table_name: target_alias, + } + | ReturnType::QueryReference { + query_name: target_alias, + } => target_alias.to_owned(), + }, + arguments: table_config + .arguments + .iter() + .map(|(name, r#type)| { + let data_type = + ClickHouseDataType::from_str(&r#type).map_err(|_err| { + ParseError::ValidateError(InvalidNodes(vec![InvalidNode { + file_path: file_path.to_owned(), + node_path: vec![ + KeyOrIndex::Key("tables".to_string()), + KeyOrIndex::Key(table_alias.to_owned()), + KeyOrIndex::Key("arguments".to_string()), + KeyOrIndex::Key(name.to_owned()), + ], + message: "Unable to parse data type".to_string(), + }])) + })?; + + Ok((name.to_owned(), data_type)) }) .collect::>()?, }, @@ -206,7 +254,7 @@ pub async fn read_server_config( let mut queries = BTreeMap::new(); - for (query_alias, query_config) in config.queries.clone().unwrap_or_default() { + for (query_alias, query_config) in config.queries.clone() { let query_file_path = configuration_dir.as_ref().join(&query_config.file); let file_content = fs::read_to_string(&query_file_path).await.map_err(|err| { if let std::io::ErrorKind::NotFound = err.kind() { @@ -231,103 +279,14 @@ pub async fn read_server_config( exposed_as: query_config.exposed_as.to_owned(), comment: query_config.comment.to_owned(), query, - return_type: match &query_config.return_type { - ParameterizedQueryReturnType::TableReference { - table_alias: target_alias, - } => { - if tables.contains_key(target_alias) { - ParameterizedQueryReturnType::TableReference { - table_alias: target_alias.to_owned(), - } - } else { - return Err(ParseError::ValidateError(InvalidNodes(vec![InvalidNode { - file_path: file_path.clone(), - node_path: vec![ - KeyOrIndex::Key("queries".to_owned()), - KeyOrIndex::Key(query_alias.to_owned()), - KeyOrIndex::Key("return_type".to_owned()), - KeyOrIndex::Key("alias".to_owned()), - ], - message: format!( - "Orphan reference: cannot table {} referenced by query {}", - target_alias, query_alias - ), - }]))); - } - } - ParameterizedQueryReturnType::QueryReference { - query_alias: target_alias, - } => match config - .queries - .as_ref() - .and_then(|queries| queries.get(target_alias)) - { - Some(ParameterizedQueryConfigFile { - return_type: ParameterizedQueryReturnType::Custom { .. }, - .. - }) => ParameterizedQueryReturnType::QueryReference { - query_alias: target_alias.to_owned(), - }, - Some(_) => { - return Err(ParseError::ValidateError(InvalidNodes(vec![ - InvalidNode { - file_path: file_path.clone(), - node_path: vec![ - KeyOrIndex::Key("queries".to_owned()), - KeyOrIndex::Key(query_alias.to_owned()), - KeyOrIndex::Key("return_type".to_owned()), - KeyOrIndex::Key("alias".to_owned()), - ], - message: format!( - "Invalid reference: query {} referenced by query {} does not have a custom return type", - target_alias, query_alias - ), - }, - ]))); - } - None => { - return Err(ParseError::ValidateError(InvalidNodes(vec![InvalidNode { - file_path: file_path.clone(), - node_path: vec![ - KeyOrIndex::Key("queries".to_owned()), - KeyOrIndex::Key(query_alias.to_owned()), - KeyOrIndex::Key("return_type".to_owned()), - KeyOrIndex::Key("alias".to_owned()), - ], - message: format!( - "Orphan reference: cannot table {} referenced by query {}", - target_alias, query_alias - ), - }]))); - } - }, - ParameterizedQueryReturnType::Custom { fields } => { - ParameterizedQueryReturnType::Custom { - fields: fields - .into_iter() - .map(|(field_alias, field_type)| { - let data_type = - ClickHouseDataType::from_str(&field_type).map_err(|err| { - ParseError::ValidateError(InvalidNodes(vec![InvalidNode { - file_path: file_path.clone(), - node_path: vec![ - KeyOrIndex::Key("queries".to_string()), - KeyOrIndex::Key(query_alias.clone()), - KeyOrIndex::Key("return_type".to_string()), - KeyOrIndex::Key("fields".to_string()), - KeyOrIndex::Key(field_alias.clone()), - ], - message: format!( - "Unable to parse data type \"{}\": {}", - field_type, err - ), - }])) - })?; - Ok((field_alias.to_owned(), data_type)) - }) - .collect::>()?, - } + return_type: match query_config.return_type { + ReturnType::Definition { .. } => query_alias.to_owned(), + ReturnType::TableReference { + table_name: target_alias, } + | ReturnType::QueryReference { + query_name: target_alias, + } => target_alias.to_owned(), }, }; @@ -336,6 +295,7 @@ pub async fn read_server_config( let config = ServerConfig { connection, + table_types, tables, queries, }; @@ -359,3 +319,104 @@ fn get_connection_config() -> Result { password, }) } + +fn validate_and_parse_return_type( + return_type: &ReturnType, + config: &ServerConfigFile, + file_path: &PathBuf, + node_path: &[&str], +) -> Result>, ParseError> { + let get_node_path = |extra_segments: &[&str]| { + node_path + .iter() + .chain(extra_segments.iter()) + .map(|s| KeyOrIndex::Key(s.to_string())) + .collect() + }; + match return_type { + ReturnType::TableReference { table_name } => { + match config.tables.get(table_name) { + Some(TableConfigFile { + return_type: ReturnType::Definition { .. }, + .. + }) => Ok(None), + Some(_) => { + Err(ParseError::ValidateError(InvalidNodes(vec![ + InvalidNode { + file_path: file_path.clone(), + node_path: get_node_path(&["table_name"]), + message: format!( + "Invalid reference: referenced table {} which does not have a return type definition", + table_name, + ), + }, + ]))) + } + None => { + return Err(ParseError::ValidateError(InvalidNodes(vec![ + InvalidNode { + file_path: file_path.clone(), + node_path: get_node_path(&["table_name"]), + message: format!( + "Orphan reference: cannot find referenced table {}", + table_name, + ), + }, + ]))); + } + } + } + ReturnType::QueryReference { query_name } => { + match config.queries.get(query_name) { + Some(ParameterizedQueryConfigFile { + return_type: ReturnType::Definition { .. }, + .. + }) => Ok(None), + Some(_) => { + Err(ParseError::ValidateError(InvalidNodes(vec![ + InvalidNode { + file_path: file_path.clone(), + node_path: get_node_path(&["query_name"]), + message: format!( + "Invalid reference: referenced query {} which does not have a return type definition", + query_name, + ), + }, + ]))) + } + None => { + Err(ParseError::ValidateError(InvalidNodes(vec![ + InvalidNode { + file_path: file_path.clone(), + node_path: get_node_path(&["query_name"]), + message: format!( + "Orphan reference: cannot find referenced query {}", + query_name, + ), + }, + ]))) + } + } + } + ReturnType::Definition { columns } => Ok(Some( + + columns + .iter() + .map(|(field_alias, field_type)| { + let data_type = ClickHouseDataType::from_str(&field_type).map_err(|err| { + ParseError::ValidateError(InvalidNodes(vec![InvalidNode { + file_path: file_path.clone(), + node_path: get_node_path(&["columns", field_alias]), + message: format!( + "Unable to parse data type \"{}\": {}", + field_type, err + ), + }])) + })?; + Ok((field_alias.to_owned(), data_type)) + }) + .collect::, ParseError>>()? + + )) + } +} diff --git a/crates/ndc-clickhouse/src/connector/handler/explain.rs b/crates/ndc-clickhouse/src/connector/handler/explain.rs index d063141..3d7bc11 100644 --- a/crates/ndc-clickhouse/src/connector/handler/explain.rs +++ b/crates/ndc-clickhouse/src/connector/handler/explain.rs @@ -48,16 +48,23 @@ pub async fn explain( let details = BTreeMap::from_iter(vec![ ( - "inlined_sql".to_string(), - unsafe_statement.to_unsafe_sql_string(), + "SQL Query".to_string(), + pretty_print_sql(&unsafe_statement.to_unsafe_sql_string()), ), - ("parameterized_sql".to_string(), statement_string), - ( - "parameters".to_string(), - serde_json::to_string(¶meters).map_err(|err| ExplainError::Other(Box::new(err)))?, - ), - ("explain".to_string(), explain), + ("Execution Plan".to_string(), explain), ]); Ok(models::ExplainResponse { details }) } + +fn pretty_print_sql(query: &str) -> String { + use sqlformat::{format, FormatOptions, Indent, QueryParams}; + let params = QueryParams::None; + let options = FormatOptions { + indent: Indent::Spaces(2), + uppercase: false, + lines_between_queries: 1, + }; + + format(query, ¶ms, options) +} diff --git a/crates/ndc-clickhouse/src/connector/handler/schema.rs b/crates/ndc-clickhouse/src/connector/handler/schema.rs index 9314dda..9fdb524 100644 --- a/crates/ndc-clickhouse/src/connector/handler/schema.rs +++ b/crates/ndc-clickhouse/src/connector/handler/schema.rs @@ -4,7 +4,8 @@ use common::{ datatype::{ClickHouseDataType, Identifier}, parameterized_query::{Parameter, ParameterType, ParameterizedQueryElement}, }, - config::{ParameterizedQueryExposedAs, ParameterizedQueryReturnType, PrimaryKey, ServerConfig}, + config::ServerConfig, + config_file::{ParameterizedQueryExposedAs, PrimaryKey}, }; use ndc_sdk::{connector::SchemaError, models}; use std::collections::BTreeMap; @@ -13,14 +14,13 @@ pub async fn schema(configuration: &ServerConfig) -> Result Result Result Result Result target_alias.to_owned(), - ParameterizedQueryReturnType::Custom { .. } => query_alias.to_owned(), - }, + collection_type: query_config.return_type.to_owned(), uniqueness_constraints: BTreeMap::new(), foreign_keys: BTreeMap::new(), } diff --git a/crates/ndc-clickhouse/src/schema/type_definition.rs b/crates/ndc-clickhouse/src/schema/type_definition.rs index 2e622ce..e78f0c7 100644 --- a/crates/ndc-clickhouse/src/schema/type_definition.rs +++ b/crates/ndc-clickhouse/src/schema/type_definition.rs @@ -1,9 +1,6 @@ use std::collections::BTreeMap; -use common::{ - clickhouse_parser::datatype::{ClickHouseDataType, Identifier, SingleQuotedString}, - config::ParameterizedQueryReturnType, -}; +use common::clickhouse_parser::datatype::{ClickHouseDataType, Identifier, SingleQuotedString}; use ndc_sdk::models; use super::{ClickHouseBinaryComparisonOperator, ClickHouseSingleColumnAggregateFunction}; diff --git a/crates/ndc-clickhouse/src/sql/ast.rs b/crates/ndc-clickhouse/src/sql/ast.rs index 6fcbef5..b69ced7 100644 --- a/crates/ndc-clickhouse/src/sql/ast.rs +++ b/crates/ndc-clickhouse/src/sql/ast.rs @@ -2,7 +2,7 @@ use std::fmt; mod parameter_extractor; -use common::clickhouse_parser::datatype::ClickHouseDataType; +use common::clickhouse_parser::parameterized_query::ParameterType; use parameter_extractor::ParameterExtractor; //.A statement containing placeholders where parameters used to be @@ -385,6 +385,10 @@ pub enum TableFactor { function: Function, alias: Option, }, + NativeQuery { + native_query: NativeQuery, + alias: Option, + }, } impl TableFactor { @@ -396,6 +400,13 @@ impl TableFactor { TableFactor::TableFunction { function, alias: _ } => { TableFactor::TableFunction { function, alias } } + TableFactor::NativeQuery { + native_query, + alias: _, + } => TableFactor::NativeQuery { + native_query, + alias, + }, } } pub fn into_table_with_joins(self, joins: Vec) -> TableWithJoins { @@ -427,11 +438,61 @@ impl fmt::Display for TableFactor { write!(f, " AS {}", alias)?; } } + TableFactor::NativeQuery { + native_query, + alias, + } => { + write!(f, "({})", native_query)?; + if let Some(alias) = alias { + write!(f, " AS {}", alias)?; + } + } + } + Ok(()) + } +} + +#[derive(Debug, Clone)] +pub struct NativeQuery { + elements: Vec, +} + +impl NativeQuery { + pub fn new(elements: Vec) -> Self { + Self { elements } + } + pub fn into_table_factor(self) -> TableFactor { + TableFactor::NativeQuery { + native_query: self, + alias: None, + } + } +} + +impl fmt::Display for NativeQuery { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for element in &self.elements { + write!(f, "{element}")?; } Ok(()) } } +#[derive(Debug, Clone)] +pub enum NativeQueryElement { + String(String), + Parameter(Parameter), +} + +impl fmt::Display for NativeQueryElement { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + NativeQueryElement::String(s) => write!(f, "{s}"), + NativeQueryElement::Parameter(p) => write!(f, "{p}"), + } + } +} + #[derive(Debug, Clone)] pub struct ObjectName(pub Vec); @@ -442,6 +503,14 @@ impl ObjectName { alias: None, } } + pub fn into_table_function(self) -> Function { + Function { + name: self, + args: vec![], + over: None, + distinct: false, + } + } } impl fmt::Display for ObjectName { @@ -478,8 +547,8 @@ impl Expr { None => SelectItem::UnnamedExpr(self), } } - pub fn into_arg(self) -> FunctionArgExpr { - FunctionArgExpr::Expr(self) + pub fn into_arg(self) -> FunctionArg { + FunctionArg::new(FunctionArgExpr::Expr(self)) } pub fn into_with_item>(self, alias: S) -> WithItem { WithItem::Expr { @@ -504,15 +573,7 @@ impl fmt::Display for Expr { Expr::Not(expr) => write!(f, "NOT {expr}"), Expr::Nested(expr) => write!(f, "({})", expr), Expr::Value(value) => write!(f, "{}", value), - Expr::Parameter(p) => match p { - Parameter::Value { - value, - data_type: _, - } => write!(f, "{value}"), - Parameter::Placeholder { name, data_type } => { - write!(f, "{{{}:{}}}", name, data_type) - } - }, + Expr::Parameter(p) => write!(f, "{}", p), Expr::Function(function) => write!(f, "{}", function), Expr::Lambda(lambda) => write!(f, "{}", lambda), Expr::List(list) => write!(f, "({})", display_separated(list, ", ")), @@ -523,17 +584,17 @@ impl fmt::Display for Expr { #[derive(Debug, Clone)] pub enum Parameter { Value { - data_type: ClickHouseDataType, + data_type: ParameterType, value: Value, }, Placeholder { - data_type: ClickHouseDataType, + data_type: ParameterType, name: String, }, } impl Parameter { - pub fn new(value: Value, data_type: ClickHouseDataType) -> Self { + pub fn new(value: Value, data_type: ParameterType) -> Self { Self::Value { data_type, value } } pub fn into_expr(self) -> Expr { @@ -541,6 +602,20 @@ impl Parameter { } } +impl fmt::Display for Parameter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Parameter::Value { + value, + data_type: _, + } => write!(f, "{value}"), + Parameter::Placeholder { name, data_type } => { + write!(f, "{{{}:{}}}", name, data_type) + } + } + } +} + #[derive(Debug, Clone)] pub struct Lambda { args: Vec, @@ -573,7 +648,7 @@ impl fmt::Display for Lambda { #[derive(Debug, Clone)] pub struct Function { pub name: ObjectName, - pub args: Vec, + pub args: Vec, pub over: Option, pub distinct: bool, } @@ -595,7 +670,7 @@ impl Function { distinct: false, } } - pub fn args(self, args: Vec) -> Self { + pub fn args(self, args: Vec) -> Self { Self { args, ..self } } pub fn over(self, over: Option) -> Self { @@ -655,6 +730,33 @@ impl fmt::Display for WindowSpec { } } +#[derive(Debug, Clone)] +pub struct FunctionArg { + name: Option, + value: FunctionArgExpr, +} + +impl FunctionArg { + pub fn new(value: FunctionArgExpr) -> Self { + Self { value, name: None } + } + pub fn name(self, name: Ident) -> Self { + Self { + name: Some(name), + ..self + } + } +} + +impl fmt::Display for FunctionArg { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match &self.name { + Some(name) => write!(f, "{name}={}", self.value), + None => write!(f, "{}", self.value), + } + } +} + #[derive(Debug, Clone)] pub enum FunctionArgExpr { Expr(Expr), @@ -664,6 +766,12 @@ pub enum FunctionArgExpr { Wildcard, } +impl FunctionArgExpr { + pub fn into_arg(self) -> FunctionArg { + FunctionArg::new(self) + } +} + impl fmt::Display for FunctionArgExpr { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { diff --git a/crates/ndc-clickhouse/src/sql/ast/parameter_extractor.rs b/crates/ndc-clickhouse/src/sql/ast/parameter_extractor.rs index 6113fbf..3864a9c 100644 --- a/crates/ndc-clickhouse/src/sql/ast/parameter_extractor.rs +++ b/crates/ndc-clickhouse/src/sql/ast/parameter_extractor.rs @@ -72,7 +72,7 @@ impl ParameterExtractor { Expr::BinaryOp { left, op: _, right } => { self.visit_expr(left); self.visit_expr(right); - }, + } Expr::Not(expr) => self.visit_expr(expr), Expr::Nested(expr) => self.visit_expr(expr), Expr::Value(_) => {} @@ -103,6 +103,10 @@ impl ParameterExtractor { ref mut function, alias: _, } => self.visit_function(function), + TableFactor::NativeQuery { + ref mut native_query, + alias: _, + } => self.visit_native_query(native_query), } } fn visit_join(&mut self, join: &mut Join) { @@ -122,8 +126,8 @@ impl ParameterExtractor { } fn visit_function(&mut self, function: &mut Function) { for arg in function.args.iter_mut() { - match arg { - FunctionArgExpr::Expr(expr) => self.visit_expr(expr), + match arg.value { + FunctionArgExpr::Expr(ref mut expr) => self.visit_expr(expr), FunctionArgExpr::QualifiedWildcard(_) => {} FunctionArgExpr::Wildcard => {} } @@ -137,6 +141,14 @@ impl ParameterExtractor { } } } + fn visit_native_query(&mut self, native_query: &mut NativeQuery) { + for element in native_query.elements.iter_mut() { + match element { + NativeQueryElement::String(_) => {} + NativeQueryElement::Parameter(ref mut parameter) => self.visit_parameter(parameter), + } + } + } fn visit_parameter(&mut self, parameter: &mut Parameter) { match parameter { Parameter::Placeholder { .. } => panic!("Attempted to extract parameter that had already been replaced with a placeholder. This is likely a bug"), diff --git a/crates/ndc-clickhouse/src/sql/query_builder.rs b/crates/ndc-clickhouse/src/sql/query_builder.rs index f68f4c4..e19b8c1 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder.rs @@ -1,17 +1,26 @@ use std::str::FromStr; -use common::{clickhouse_parser::datatype::ClickHouseDataType, config::ServerConfig}; +use common::{ + clickhouse_parser::{ + datatype::{ClickHouseDataType, Identifier}, + parameterized_query::ParameterizedQueryElement, + }, + config::ServerConfig, +}; use indexmap::IndexMap; -use ndc_sdk::models; +mod collection_context; mod comparison_column; mod error; mod typecasting; use comparison_column::ComparisonColumn; pub use error::QueryBuilderError; +use ndc_sdk::models; use typecasting::{AggregatesTypeString, RowsTypeString}; +use self::collection_context::CollectionContext; + use super::ast::*; use crate::schema::{ClickHouseBinaryComparisonOperator, ClickHouseSingleColumnAggregateFunction}; @@ -33,10 +42,10 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { fn rows_typecast_string( &self, fields: &IndexMap, - collection_alias: &str, + current_collection: &CollectionContext, ) -> Result { Ok(RowsTypeString::new( - collection_alias, + current_collection.alias(), fields, &self.request.collection_relationships, self.configuration, @@ -47,16 +56,16 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { fn agregates_typecast_string( &self, aggregates: &IndexMap, - collection_alias: &str, + current_collection: &CollectionContext, ) -> Result { Ok( - AggregatesTypeString::new(collection_alias, aggregates, self.configuration) + AggregatesTypeString::new(current_collection.alias(), aggregates, self.configuration) .map_err(|err| QueryBuilderError::Typecasting(err.to_string()))? .to_string(), ) } fn root_query(&self) -> Result { - let collection = &self.request.collection; + let collection = CollectionContext::new(&self.request.collection, &self.request.arguments); let query = &self.request.query; let get_typecasting_wrapper = |index: usize, alias: &str, typecast_string: String| { @@ -84,19 +93,19 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { (None, Some(aggregates)) => vec![get_typecasting_wrapper( 1, "aggregates", - self.agregates_typecast_string(aggregates, collection)?, + self.agregates_typecast_string(aggregates, &collection)?, )], (Some(fields), None) => vec![get_typecasting_wrapper( 1, "rows", - self.rows_typecast_string(fields, collection)?, + self.rows_typecast_string(fields, &collection)?, )], (Some(fields), Some(aggregates)) => vec![ - get_typecasting_wrapper(1, "rows", self.rows_typecast_string(fields, collection)?), + get_typecasting_wrapper(1, "rows", self.rows_typecast_string(fields, &collection)?), get_typecasting_wrapper( 2, "aggregates", - self.agregates_typecast_string(aggregates, collection)?, + self.agregates_typecast_string(aggregates, &collection)?, ), ], }; @@ -129,7 +138,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { Value::SingleQuotedString(serde_json::to_string(&variable_values).map_err( |err| QueryBuilderError::CannotSerializeVariables(err.to_string()), )?), - ClickHouseDataType::String, + ClickHouseDataType::String.into(), ) .into_expr(); @@ -148,7 +157,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { }; let from = vec![self - .rowset_subquery(collection, &vec![], query)? + .rowset_subquery(&collection, &vec![], query)? .into_table_factor() .alias("_rowset") .into_table_with_joins(vec![])]; @@ -176,7 +185,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { } fn rowset_subquery( &self, - current_collection: &str, + current_collection: &CollectionContext, relkeys: &Vec<&String>, query: &models::Query, ) -> Result { @@ -215,7 +224,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { .map(|(alias, aggregate)| { Ok(match aggregate { models::Aggregate::StarCount {} => Function::new_unquoted("COUNT") - .args(vec![FunctionArgExpr::Wildcard]) + .args(vec![FunctionArgExpr::Wildcard.into_arg()]) .into_expr(), models::Aggregate::ColumnCount { distinct, @@ -286,7 +295,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { } let from = vec![self - .row_subquery(current_collection, relkeys, query)? + .row_subquery(¤t_collection, relkeys, query)? .into_table_factor() .alias("_row") .into_table_with_joins(vec![])]; @@ -295,7 +304,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { } fn row_subquery( &self, - current_collection: &str, + current_collection: &CollectionContext, relkeys: &Vec<&String>, query: &models::Query, ) -> Result { @@ -366,18 +375,12 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { .alias("_vars"); let joins = vec![Join { - relation: self - .collection_ident(current_collection)? - .into_table_factor() - .alias("_origin"), + relation: self.collection_ident(current_collection)?.alias("_origin"), join_operator: JoinOperator::CrossJoin, }]; (table, joins) } else { - let table = self - .collection_ident(current_collection)? - .into_table_factor() - .alias("_origin"); + let table = self.collection_ident(current_collection)?.alias("_origin"); (table, vec![]) }; @@ -386,10 +389,12 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { if let models::Field::Relationship { query, relationship, - arguments: _, + arguments, } = field { let relationship = self.collection_relationship(relationship)?; + let relationship_collection = + CollectionContext::from_relationship(&relationship, arguments); let mut join_expr = relationship .column_mapping @@ -437,7 +442,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let join = Join { relation: self - .rowset_subquery(&relationship.target_collection, &relkeys, query)? + .rowset_subquery(&relationship_collection, &relkeys, query)? .into_table_factor() .alias(format!("_rel_{alias}")), join_operator, @@ -496,6 +501,10 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let relationship = self.collection_relationship(&first_element.relationship)?; + let relationship_collection = CollectionContext::from_relationship( + relationship, + &first_element.arguments, + ); let subquery = { let mut select = vec![]; @@ -508,20 +517,17 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { select.push( Expr::CompoundIdentifier(vec![ join_alias.clone(), - self.column_ident( - target_col, - &relationship.target_collection, - )?, + self.column_ident(target_col, &relationship_collection)?, ]) .into_select(Some(format!("_relkey_{target_col}"))), ); group_by.push(Expr::CompoundIdentifier(vec![ join_alias.clone(), - self.column_ident(target_col, &relationship.target_collection)?, + self.column_ident(target_col, &relationship_collection)?, ])); limit_by.push(Expr::CompoundIdentifier(vec![ join_alias.clone(), - self.column_ident(target_col, &relationship.target_collection)?, + self.column_ident(target_col, &relationship_collection)?, ])); } @@ -544,8 +550,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { } let table = self - .collection_ident(&relationship.target_collection)? - .into_table_factor() + .collection_ident(&relationship_collection)? .alias(&join_alias); let (table, base_joins) = if self.request.variables.is_some() { @@ -570,7 +575,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let (predicate, predicate_joins) = self.filter_expression( expression, &join_alias, - &relationship.target_collection, + &relationship_collection, false, &mut join_index, )?; @@ -583,7 +588,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { } let mut last_join_alias = join_alias; - let mut last_collection_name = &relationship.target_collection; + let mut last_collection_context = relationship_collection; for path_element in path.iter().skip(1) { let join_alias = @@ -592,6 +597,10 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let relationship = self.collection_relationship(&path_element.relationship)?; + let relationship_collection = CollectionContext::from_relationship( + relationship, + &path_element.arguments, + ); let join_exprs = relationship .column_mapping @@ -602,7 +611,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { last_join_alias.clone(), self.column_ident( source_col, - last_collection_name, + &last_collection_context, )?, ]) .into_box(), @@ -611,7 +620,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { join_alias.clone(), self.column_ident( target_col, - &relationship.target_collection, + &relationship_collection, )?, ]) .into_box(), @@ -627,8 +636,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { .unwrap_or(JoinOperator::CrossJoin); let relation = self - .collection_ident(&relationship.target_collection)? - .into_table_factor() + .collection_ident(&relationship_collection)? .alias(&join_alias); let join = Join { @@ -642,7 +650,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let (predicate, predicate_joins) = self.filter_expression( expression, &join_alias, - &relationship.target_collection, + &relationship_collection, false, &mut join_index, )?; @@ -655,14 +663,14 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { } last_join_alias = join_alias; - last_collection_name = &relationship.target_collection; + last_collection_context = relationship_collection; } match &element.target { models::OrderByTarget::Column { name, path: _ } => { let column = Expr::CompoundIdentifier(vec![ last_join_alias, - self.column_ident(name, last_collection_name)?, + self.column_ident(name, &last_collection_context)?, ]); select.push(column.into_select(Some("_order_by_value"))) } @@ -673,7 +681,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { } => { let column = Expr::CompoundIdentifier(vec![ last_join_alias, - self.column_ident(column, last_collection_name)?, + self.column_ident(column, &last_collection_context)?, ]); select.push( aggregate_function(function)? @@ -843,7 +851,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { &self, expression: &models::Expression, current_join_alias: &Ident, - current_collection: &str, + current_collection: &CollectionContext, current_is_origin: bool, name_index: &mut u32, ) -> Result<(Expr, Vec), QueryBuilderError> { @@ -980,7 +988,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { name_index, )?, models::ComparisonValue::Scalar { value } => ComparisonColumn::new_simple( - Parameter::new(value.into(), right_col_type.clone()).into_expr(), + Parameter::new(value.into(), right_col_type.clone().into()).into_expr(), right_col_type, ), @@ -1019,7 +1027,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { in_collection: &models::ExistsInCollection, expression: &Option>, previous_join_alias: &Ident, - previous_collection: &str, + previous_collection: &CollectionContext, name_index: &mut u32, ) -> Result<(Expr, Vec), QueryBuilderError> { let exists_join_ident = Ident::new_quoted(format!("_exists_{}", name_index)); @@ -1029,15 +1037,15 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let target_collection = match in_collection { models::ExistsInCollection::Related { relationship, - arguments: _, + arguments, } => { let relationship = self.collection_relationship(relationship)?; - &relationship.target_collection + CollectionContext::from_relationship(relationship, arguments) } models::ExistsInCollection::Unrelated { collection, - arguments: _, - } => collection, + arguments, + } => CollectionContext::new_unrelated(&collection, arguments), }; let subquery_origin_alias = Ident::new_quoted(format!("_exists_{}", name_index)); @@ -1048,7 +1056,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let (predicate, predicate_joins) = self.filter_expression( expression, &subquery_origin_alias, - target_collection, + &target_collection, false, name_index, )?; @@ -1058,8 +1066,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { }; let table = self - .collection_ident(target_collection)? - .into_table_factor() + .collection_ident(&target_collection)? .alias(&subquery_origin_alias); let (table, base_joins) = if self.request.variables.is_some() { @@ -1093,13 +1100,13 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { select.push( Expr::CompoundIdentifier(vec![ subquery_origin_alias.clone(), - self.column_ident(target_col, target_collection)?, + self.column_ident(target_col, &target_collection)?, ]) .into_select(Some(format!("_relkey_{target_col}"))), ); limit_by.push(Expr::CompoundIdentifier(vec![ subquery_origin_alias.clone(), - self.column_ident(target_col, target_collection)?, + self.column_ident(target_col, &target_collection)?, ])); } } @@ -1214,7 +1221,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { &self, column: &models::ComparisonTarget, current_join_alias: &Ident, - current_collection: &str, + current_collection: &CollectionContext, current_is_origin: bool, name_index: &mut u32, ) -> Result { @@ -1225,7 +1232,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { } => { if let Some(first_element) = path.first() { if current_is_origin { - let (join, join_alias, last_collection_name) = { + let (join, join_alias, last_collection_context) = { let previous_join_alias = current_join_alias.clone(); let current_join_alias = Ident::new_quoted(format!("_exists_{name_index}")); @@ -1233,6 +1240,10 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let relationship = self.collection_relationship(&first_element.relationship)?; + let relationship_collection = CollectionContext::from_relationship( + relationship, + &first_element.arguments, + ); let (subquery, last_collection_name) = { let mut select = vec![]; @@ -1246,17 +1257,14 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { join_alias.clone(), self.column_ident( target_col, - &relationship.target_collection, + &relationship_collection, )?, ]) .into_select(Some(format!("_relkey_{target_col}"))), ); group_by.push(Expr::CompoundIdentifier(vec![ join_alias.clone(), - self.column_ident( - target_col, - &relationship.target_collection, - )?, + self.column_ident(target_col, &relationship_collection)?, ])) } @@ -1275,8 +1283,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { } let table = self - .collection_ident(&relationship.target_collection)? - .into_table_factor() + .collection_ident(&relationship_collection)? .alias(&join_alias); let (table, base_joins) = if self.request.variables.is_some() { @@ -1301,7 +1308,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let (predicate, predicate_joins) = self.filter_expression( expression, &join_alias, - &relationship.target_collection, + &relationship_collection, false, &mut join_index, )?; @@ -1314,7 +1321,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { } let mut last_join_alias = join_alias; - let mut last_collection_name = &relationship.target_collection; + let mut last_collection_context = relationship_collection; for path_element in path.iter().skip(1) { let join_alias = @@ -1323,6 +1330,11 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let relationship = self.collection_relationship(&path_element.relationship)?; + let relationship_collection = + CollectionContext::from_relationship( + relationship, + &path_element.arguments, + ); let join_exprs = relationship .column_mapping @@ -1333,7 +1345,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { last_join_alias.clone(), self.column_ident( source_col, - last_collection_name, + &last_collection_context, )?, ]) .into_box(), @@ -1342,7 +1354,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { join_alias.clone(), self.column_ident( target_col, - &relationship.target_collection, + &relationship_collection, )?, ]) .into_box(), @@ -1358,8 +1370,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { .unwrap_or(JoinOperator::CrossJoin); let relation = self - .collection_ident(&relationship.target_collection)? - .into_table_factor() + .collection_ident(&relationship_collection)? .alias(&join_alias); let join = Join { @@ -1373,7 +1384,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let (predicate, predicate_joins) = self.filter_expression( expression, &join_alias, - &relationship.target_collection, + &relationship_collection, false, &mut join_index, )?; @@ -1386,7 +1397,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { } last_join_alias = join_alias; - last_collection_name = &relationship.target_collection; + last_collection_context = relationship_collection; } select.push( @@ -1395,7 +1406,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { last_join_alias, self.column_ident( comparison_column_name, - last_collection_name, + &last_collection_context, )?, ]) .into_arg()]) @@ -1417,7 +1428,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { .from(from) .predicate(predicate) .group_by(group_by), - last_collection_name, + last_collection_context, ) }; @@ -1485,14 +1496,18 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { column_ident, join, values_ident, - self.column_data_type(comparison_column_name, last_collection_name)?, + self.column_data_type( + comparison_column_name, + &last_collection_context, + )?, )) } else { let mut additional_joins = vec![]; let mut additional_predicates = vec![]; let mut last_join_alias = current_join_alias.clone(); - let mut last_collection_name = current_collection; + let mut last_collection_context: CollectionContext = + current_collection.to_owned(); for path_element in path { let join_alias = Ident::new_quoted(format!("_exists_{name_index}")); @@ -1500,6 +1515,10 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let relationship = self.collection_relationship(&path_element.relationship)?; + let relationship_collection = CollectionContext::from_relationship( + relationship, + &path_element.arguments, + ); let join_exprs = relationship .column_mapping @@ -1508,7 +1527,10 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { Ok(Expr::BinaryOp { left: Expr::CompoundIdentifier(vec![ last_join_alias.clone(), - self.column_ident(source_col, last_collection_name)?, + self.column_ident( + source_col, + &last_collection_context, + )?, ]) .into_box(), op: BinaryOperator::Eq, @@ -1516,7 +1538,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { join_alias.clone(), self.column_ident( target_col, - &relationship.target_collection, + &relationship_collection, )?, ]) .into_box(), @@ -1532,8 +1554,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { .unwrap_or(JoinOperator::CrossJoin); let table = self - .collection_ident(&relationship.target_collection)? - .into_table_factor() + .collection_ident(&relationship_collection)? .alias(&join_alias); let join = Join { @@ -1547,7 +1568,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let (predicate, predicate_joins) = self.filter_expression( expression, &join_alias, - &relationship.target_collection, + &relationship_collection, false, name_index, )?; @@ -1560,19 +1581,22 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { } last_join_alias = join_alias; - last_collection_name = &relationship.target_collection; + last_collection_context = relationship_collection; } let column_ident = Expr::CompoundIdentifier(vec![ last_join_alias, - self.column_ident(comparison_column_name, last_collection_name)?, + self.column_ident(comparison_column_name, &last_collection_context)?, ]); Ok(ComparisonColumn::new_flat( column_ident, additional_joins, additional_predicates.into_iter().reduce(and_reducer), - self.column_data_type(comparison_column_name, last_collection_name)?, + self.column_data_type( + comparison_column_name, + &last_collection_context, + )?, )) } } else { @@ -1598,7 +1622,7 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { )) } else { Err(QueryBuilderError::NotSupported( - "Comparisons to root not supported".to_string(), + "Comparisons to root".to_string(), )) } } @@ -1616,55 +1640,261 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { relationship.to_string(), )) } - fn collection_ident(&self, collection_alias: &str) -> Result { - // todo: get the collection name based on the alias from config - let table = self - .configuration - .tables - .get(collection_alias) - .ok_or_else(|| QueryBuilderError::UnknownTable(collection_alias.to_owned()))?; + fn collection_ident( + &self, + collection: &CollectionContext, + ) -> Result { + if let Some(table) = self.configuration.tables.get(collection.alias()) { + let table_argument_type = |argument_name: &str| { + table.arguments.get(argument_name).ok_or_else(|| { + QueryBuilderError::UnknownTableArgument { + table: collection.alias().to_owned(), + argument: argument_name.to_owned(), + } + }) + }; + let table_name = ObjectName(vec![ + Ident::new_quoted(&table.schema), + Ident::new_quoted(&table.name), + ]); + if collection.has_arguments() { + let arguments = match collection { + CollectionContext::Base { + collection_alias: _, + arguments, + } => arguments + .iter() + .map(|(arg_name, arg)| match arg { + models::Argument::Variable { name } => { + let varkey = format!("_var_{name}"); + + Ok(Expr::CompoundIdentifier(vec![ + Ident::new_quoted("_vars"), + Ident::new_quoted(varkey), + ]) + .into_arg() + .name(Ident::new_quoted(arg_name))) + } + models::Argument::Literal { value } => { + Ok(Expr::Parameter(Parameter::new( + value.into(), + table_argument_type(arg_name)?.to_owned().into(), + )) + .into_arg() + .name(Ident::new_quoted(arg_name))) + } + }) + .collect::, _>>()?, + CollectionContext::Relationship { + collection_alias: _, + arguments, + relationship_arguments, + } => relationship_arguments + .iter() + .chain(arguments.iter()) + .map(|(arg_name, arg)| match arg { + models::RelationshipArgument::Variable { name } => { + let varkey = format!("_var_{name}"); + + Ok(Expr::CompoundIdentifier(vec![ + Ident::new_quoted("_vars"), + Ident::new_quoted(varkey), + ]) + .into_arg() + .name(Ident::new_quoted(arg_name))) + } + models::RelationshipArgument::Literal { value } => { + Ok(Expr::Parameter(Parameter::new( + value.into(), + table_argument_type(arg_name)?.to_owned().into(), + )) + .into_arg() + .name(Ident::new_quoted(arg_name))) + } + models::RelationshipArgument::Column { .. } => { + Err(QueryBuilderError::NotSupported( + "Column argument value".to_string(), + )) + } + }) + .collect::, _>>()?, + CollectionContext::UnrelatedRelationship { + collection_alias: _, + arguments, + } => arguments + .iter() + .map(|(arg_name, arg)| match arg { + models::RelationshipArgument::Variable { name } => { + let varkey = format!("_var_{name}"); - Ok(ObjectName(vec![ - Ident::new_quoted(&table.schema), - Ident::new_quoted(&table.name), - ])) + Ok(Expr::CompoundIdentifier(vec![ + Ident::new_quoted("_vars"), + Ident::new_quoted(varkey), + ]) + .into_arg() + .name(Ident::new_quoted(arg_name))) + } + models::RelationshipArgument::Literal { value } => { + Ok(Expr::Parameter(Parameter::new( + value.into(), + table_argument_type(arg_name)?.to_owned().into(), + )) + .into_arg() + .name(Ident::new_quoted(arg_name))) + } + models::RelationshipArgument::Column { .. } => { + Err(QueryBuilderError::NotSupported( + "Column argument value".to_string(), + )) + } + }) + .collect::, _>>()?, + }; + + let table_function = table_name.into_table_function().args(arguments); + + Ok(table_function.into_table_factor()) + } else { + Ok(table_name.into_table_factor()) + } + // let arguments = match collection { + // CollectionContext::Base { + // collection_alias: _, + // arguments, + // } => arguments.iter().map(|(name, )|), + // CollectionContext::Relationship { + // collection_alias, + // arguments, + // relationship_arguments, + // } => todo!(), + // CollectionContext::UnrelatedRelationship { + // collection_alias, + // arguments, + // } => todo!(), + // }; + } else if let Some(query) = self.configuration.queries.get(collection.alias()) { + let get_argument = |name| match collection { + CollectionContext::Base { + collection_alias: _, + arguments, + } => arguments.get(name).map(|arg| match arg { + models::Argument::Variable { .. } => Err(QueryBuilderError::NotSupported( + "native query variable argument".to_string(), + )), + models::Argument::Literal { value } => Ok(value), + }), + CollectionContext::Relationship { + collection_alias: _, + arguments, + relationship_arguments, + } => arguments + .get(name) + .or_else(|| relationship_arguments.get(name)) + .map(|arg| match arg { + models::RelationshipArgument::Variable { .. } => { + Err(QueryBuilderError::NotSupported( + "native query variable argument".to_string(), + )) + } + models::RelationshipArgument::Literal { value } => Ok(value), + models::RelationshipArgument::Column { .. } => { + Err(QueryBuilderError::NotSupported( + "native query column argument".to_string(), + )) + } + }), + CollectionContext::UnrelatedRelationship { + collection_alias: _, + arguments, + } => arguments.get(name).map(|arg| match arg { + models::RelationshipArgument::Variable { .. } => { + Err(QueryBuilderError::NotSupported( + "native query variable argument".to_string(), + )) + } + models::RelationshipArgument::Literal { value } => Ok(value), + models::RelationshipArgument::Column { .. } => Err( + QueryBuilderError::NotSupported("native query column argument".to_string()), + ), + }), + }; + + let elements = query + .query + .elements + .iter() + .map(|element| match element { + ParameterizedQueryElement::String(s) => { + Ok(NativeQueryElement::String(s.to_owned())) + } + ParameterizedQueryElement::Parameter(p) => { + let arg_alias = match &p.name { + Identifier::DoubleQuoted(n) + | Identifier::BacktickQuoted(n) + | Identifier::Unquoted(n) => n, + }; + + get_argument(arg_alias) + .transpose()? + .map(|value| { + NativeQueryElement::Parameter(Parameter::new( + value.into(), + p.r#type.clone(), + )) + }) + .ok_or_else(|| QueryBuilderError::MissingNativeQueryArgument { + query: collection.alias().to_owned(), + argument: arg_alias.to_owned(), + }) + } + }) + .collect::>()?; + + Ok(NativeQuery::new(elements).into_table_factor()) + } else { + Err(QueryBuilderError::UnknownTable( + collection.alias().to_owned(), + )) + } } fn column_ident( &self, column_alias: &str, - collection_alias: &str, + _collection: &CollectionContext, ) -> Result { - // todo: get column name based on column alias and collection alias - let table = self - .configuration - .tables - .get(collection_alias) - .ok_or_else(|| QueryBuilderError::UnknownTable(collection_alias.to_owned()))?; - - let column = table.columns.get(column_alias).ok_or_else(|| { - QueryBuilderError::UnknownColumn(column_alias.to_owned(), collection_alias.to_owned()) - })?; - - Ok(Ident::new_quoted(&column.name)) + Ok(Ident::new_quoted(column_alias)) } fn column_data_type( &self, column_alias: &str, - collection_alias: &str, + collection: &CollectionContext, ) -> Result { // todo: get column name based on column alias and collection alias - let table = self + let return_type = self .configuration .tables - .get(collection_alias) - .ok_or_else(|| QueryBuilderError::UnknownTable(collection_alias.to_owned()))?; + .get(collection.alias()) + .map(|table| &table.return_type) + .or_else(|| { + self.configuration + .queries + .get(collection.alias()) + .map(|query| &query.return_type) + }) + .ok_or_else(|| QueryBuilderError::UnknownTable(collection.alias().to_owned()))?; + + let table_type = &self + .configuration + .table_types + .get(return_type) + .ok_or_else(|| QueryBuilderError::UnknownTableType(return_type.to_owned()))?; - let column = table.columns.get(column_alias).ok_or_else(|| { - QueryBuilderError::UnknownColumn(column_alias.to_owned(), collection_alias.to_owned()) + let column_type = table_type.columns.get(column_alias).ok_or_else(|| { + QueryBuilderError::UnknownColumn(column_alias.to_owned(), collection.alias().to_owned()) })?; // todo: revise whether we want to get the data type from the type definition instead - Ok(column.data_type.to_owned()) + Ok(column_type.to_owned()) } } diff --git a/crates/ndc-clickhouse/src/sql/query_builder/collection_context.rs b/crates/ndc-clickhouse/src/sql/query_builder/collection_context.rs new file mode 100644 index 0000000..c0653ef --- /dev/null +++ b/crates/ndc-clickhouse/src/sql/query_builder/collection_context.rs @@ -0,0 +1,78 @@ +use std::collections::BTreeMap; + +use ndc_sdk::models::{Argument, Relationship, RelationshipArgument}; + +#[derive(Debug, Clone)] +pub enum CollectionContext<'a, 'b> { + Base { + collection_alias: &'a str, + arguments: &'b BTreeMap, + }, + Relationship { + collection_alias: &'a str, + arguments: &'b BTreeMap, + relationship_arguments: &'a BTreeMap, + }, + UnrelatedRelationship { + collection_alias: &'a str, + arguments: &'b BTreeMap, + }, +} + +impl<'a, 'b> CollectionContext<'a, 'b> { + pub fn new(collection_alias: &'a str, arguments: &'b BTreeMap) -> Self { + Self::Base { + collection_alias, + arguments, + } + } + pub fn new_unrelated( + collection_alias: &'a str, + arguments: &'b BTreeMap, + ) -> Self { + Self::UnrelatedRelationship { + collection_alias, + arguments, + } + } + pub fn from_relationship( + relationship: &'a Relationship, + arguments: &'b BTreeMap, + ) -> Self { + Self::Relationship { + collection_alias: &relationship.target_collection, + relationship_arguments: &relationship.arguments, + arguments, + } + } + pub fn alias(&self) -> &str { + match self { + CollectionContext::Base { + collection_alias, .. + } + | CollectionContext::Relationship { + collection_alias, .. + } + | CollectionContext::UnrelatedRelationship { + collection_alias, .. + } => &collection_alias, + } + } + pub fn has_arguments(&self) -> bool { + match self { + CollectionContext::Base { + collection_alias: _, + arguments, + } => !arguments.is_empty(), + CollectionContext::Relationship { + collection_alias: _, + arguments, + relationship_arguments, + } => !arguments.is_empty() || !relationship_arguments.is_empty(), + CollectionContext::UnrelatedRelationship { + collection_alias: _, + arguments, + } => !arguments.is_empty(), + } + } +} diff --git a/crates/ndc-clickhouse/src/sql/query_builder/error.rs b/crates/ndc-clickhouse/src/sql/query_builder/error.rs index 5aedaef..d4bcf79 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder/error.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder/error.rs @@ -4,8 +4,16 @@ use std::fmt; pub enum QueryBuilderError { /// A relationship referenced in the query is missing from the collection_relationships map MissingRelationship(String), + /// An argument required for a native query was not supplied + MissingNativeQueryArgument { query: String, argument: String }, /// A table was referenced but not found in configuration UnknownTable(String), + /// An argument was supplied for a table that does not have that argument + UnknownTableArgument { table: String, argument: String }, + /// An argument was supplied for a table that does not have that argument + UnknownQueryArgument { query: String, argument: String }, + /// A table in configuration referenced a table type that could not be found + UnknownTableType(String), /// A column was referenced but not found in configuration UnknownColumn(String, String), /// Unable to serialize variables into a json string @@ -28,7 +36,21 @@ impl fmt::Display for QueryBuilderError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { QueryBuilderError::MissingRelationship(rel) => write!(f, "Missing relationship: {rel}"), + QueryBuilderError::MissingNativeQueryArgument { query, argument } => write!( + f, + "Argument {argument} required for native query {query} was not supplied" + ), QueryBuilderError::UnknownTable(t) => write!(f, "Unable to find table {t} in config"), + QueryBuilderError::UnknownTableArgument { table, argument } => { + write!(f, "Unknown argument {argument} supplied for table {table}") + } + QueryBuilderError::UnknownQueryArgument { query, argument } => { + write!(f, "Unknown argument {argument} supplied for query {query}") + } + + QueryBuilderError::UnknownTableType(t) => { + write!(f, "Unable to find table type {t} in config") + } QueryBuilderError::UnknownColumn(c, t) => { write!(f, "Unable to find column {c} for table {t} in config") } diff --git a/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs b/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs index 58e3ee5..3cf691b 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs @@ -1,9 +1,6 @@ use std::{collections::BTreeMap, fmt::Display, str::FromStr}; -use common::{ - clickhouse_parser::datatype::ClickHouseDataType, - config::{ColumnConfig, ServerConfig}, -}; +use common::{clickhouse_parser::datatype::ClickHouseDataType, config::ServerConfig}; use indexmap::IndexMap; use ndc_sdk::models; @@ -71,9 +68,9 @@ impl AggregatesTypeString { column: column_alias, function, } => { - let column = get_column(column_alias, table_alias, config)?; + let column_type = get_column(column_alias, table_alias, config)?; let type_definition = ClickHouseTypeDefinition::from_table_column( - &column.data_type, + &column_type, column_alias, table_alias, ); @@ -83,7 +80,7 @@ impl AggregatesTypeString { |_err| TypeStringError::UnknownAggregateFunction { table: table_alias.to_owned(), column: column_alias.to_owned(), - data_type: column.data_type.to_owned(), + data_type: column_type.to_owned(), function: function.to_owned(), }, )?; @@ -97,7 +94,7 @@ impl AggregatesTypeString { .ok_or_else(|| TypeStringError::UnknownAggregateFunction { table: table_alias.to_owned(), column: column_alias.to_owned(), - data_type: column.data_type.to_owned(), + data_type: column_type.to_owned(), function: function.to_owned(), })?; @@ -130,9 +127,9 @@ impl RowsTypeString { if fields.is_some() { todo!("support nested field selection") } - let column = get_column(column_alias, table_alias, config)?; + let column_type = get_column(column_alias, table_alias, config)?; let type_definition = ClickHouseTypeDefinition::from_table_column( - &column.data_type, + &column_type, column_alias, table_alias, ); @@ -237,21 +234,37 @@ fn get_column<'a>( column_alias: &str, table_alias: &str, config: &'a ServerConfig, -) -> Result<&'a ColumnConfig, TypeStringError> { - let table = config +) -> Result<&'a ClickHouseDataType, TypeStringError> { + let return_type = config .tables .get(table_alias) + .map(|table| &table.return_type) + .or_else(|| { + config + .queries + .get(table_alias) + .map(|query| &query.return_type) + }) .ok_or_else(|| TypeStringError::UnknownTable { table: table_alias.to_owned(), })?; - let column = table - .columns - .get(column_alias) - .ok_or_else(|| TypeStringError::UnknownColumn { - table: table_alias.to_owned(), - column: column_alias.to_owned(), - })?; + let table_type = + config + .table_types + .get(return_type) + .ok_or_else(|| TypeStringError::UnknownTableType { + table: return_type.to_owned(), + })?; + + let column = + table_type + .columns + .get(column_alias) + .ok_or_else(|| TypeStringError::UnknownColumn { + table: table_alias.to_owned(), + column: column_alias.to_owned(), + })?; Ok(column) } @@ -261,6 +274,9 @@ pub enum TypeStringError { UnknownTable { table: String, }, + UnknownTableType { + table: String, + }, UnknownColumn { table: String, column: String, @@ -278,6 +294,7 @@ impl Display for TypeStringError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { TypeStringError::UnknownTable { table } => write!(f, "Unknown table: {table}"), + TypeStringError::UnknownTableType { table } => write!(f, "Unknown table type: {table}"), TypeStringError::UnknownColumn { table, column } => { write!(f, "Unknown column: {column} in table: {table}") } From eebce67de5809ea90f069abad1e4f7152fd4bf86 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Sun, 7 Apr 2024 08:34:11 -0400 Subject: [PATCH 22/28] add changelog entries --- CHANGELOG.md | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28726aa..836f296 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.2.2] + - Return error if empty list of query variables passed. Variables should be ommited or be a list with at least one member - Use table comment as description for corresponding collection and object type - Return json representation for applicable scalar types in schema response @@ -14,6 +16,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Bump ndc-spec dependency to 0.1.1 - Config breaking change: use maps for tables and columns list, rather than arrays. This should help avoid duplicate alias issues - Move parsing column data types into configuration time and startup time, instead of query execution time. This should give error feedback earlier +- Allow tables and native query return types to be marked as identical to return types for other tables/queries +- Support parameterized views (don't support column valued arguments) +- Support parameterized native queries, except in foreach queries. Also don't support column valued arguments +- Change explain output so the console knows how to extract generated SQL and sql explain plan to display to the user +- Pretty print explain SQL output ## [0.2.1] From 46f54163a35595bbb6f77e18cb3f95631ab45727 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Sun, 7 Apr 2024 08:38:46 -0400 Subject: [PATCH 23/28] bump reqwest version to latest --- Cargo.lock | 270 ++++++++++++++++++++++++------- crates/common/Cargo.toml | 2 +- crates/ndc-clickhouse/Cargo.toml | 2 +- 3 files changed, 214 insertions(+), 60 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c0ec813..00509f8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -145,9 +145,9 @@ dependencies = [ "bitflags 1.3.2", "bytes", "futures-util", - "http", - "http-body", - "hyper", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.28", "itoa", "matchit", "memchr", @@ -175,8 +175,8 @@ dependencies = [ "async-trait", "bytes", "futures-util", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.6", "mime", "rustversion", "tower-layer", @@ -193,8 +193,8 @@ dependencies = [ "axum-core", "bytes", "futures-util", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.6", "mime", "pin-project-lite", "serde", @@ -231,6 +231,12 @@ version = "0.21.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" +[[package]] +name = "base64" +version = "0.22.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9475866fec1451be56a3c2400fd081ff546538961565ccb5b7142cbd22bc7a51" + [[package]] name = "bitflags" version = "1.3.2" @@ -337,7 +343,7 @@ name = "common" version = "0.2.1" dependencies = [ "peg", - "reqwest", + "reqwest 0.12.3", "schemars", "serde", "serde_json", @@ -598,7 +604,7 @@ dependencies = [ "futures-core", "futures-sink", "futures-util", - "http", + "http 0.2.12", "indexmap 2.2.5", "slab", "tokio", @@ -653,6 +659,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21b9ddb458710bc376481b842f5da65cdf31522de232c1ca8146abce2a358258" +dependencies = [ + "bytes", + "fnv", + "itoa", +] + [[package]] name = "http-body" version = "0.4.6" @@ -660,7 +677,30 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" dependencies = [ "bytes", - "http", + "http 0.2.12", + "pin-project-lite", +] + +[[package]] +name = "http-body" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +dependencies = [ + "bytes", + "http 1.1.0", +] + +[[package]] +name = "http-body-util" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0475f8b2ac86659c21b64320d5d653f9efe42acd2a4e560073ec61a155a34f1d" +dependencies = [ + "bytes", + "futures-core", + "http 1.1.0", + "http-body 1.0.0", "pin-project-lite", ] @@ -693,8 +733,8 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.6", "httparse", "httpdate", "itoa", @@ -706,18 +746,40 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186548d73ac615b32a73aafe38fb4f56c0d340e110e5a200bcadbaf2e199263a" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.1.0", + "http-body 1.0.0", + "httparse", + "itoa", + "pin-project-lite", + "smallvec", + "tokio", + "want", +] + [[package]] name = "hyper-rustls" -version = "0.24.2" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c" dependencies = [ "futures-util", - "http", - "hyper", + "http 1.1.0", + "hyper 1.2.0", + "hyper-util", "rustls", + "rustls-pki-types", "tokio", "tokio-rustls", + "tower-service", ] [[package]] @@ -726,7 +788,7 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" dependencies = [ - "hyper", + "hyper 0.14.28", "pin-project-lite", "tokio", "tokio-io-timeout", @@ -739,12 +801,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper", + "hyper 0.14.28", "native-tls", "tokio", "tokio-native-tls", ] +[[package]] +name = "hyper-util" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ca38ef113da30126bbff9cd1705f9273e15d45498615d138b0c20279ac7a76aa" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.1.0", + "http-body 1.0.0", + "hyper 1.2.0", + "pin-project-lite", + "socket2", + "tokio", + "tower", + "tower-service", + "tracing", +] + [[package]] name = "iana-time-zone" version = "0.1.60" @@ -969,7 +1051,7 @@ dependencies = [ "indexmap 2.2.5", "ndc-sdk", "prometheus", - "reqwest", + "reqwest 0.12.3", "serde", "serde_json", "sqlformat", @@ -997,7 +1079,7 @@ dependencies = [ "async-trait", "indexmap 2.2.5", "opentelemetry", - "reqwest", + "reqwest 0.11.27", "schemars", "serde", "serde_derive", @@ -1016,7 +1098,7 @@ dependencies = [ "axum-extra", "bytes", "clap", - "http", + "http 0.2.12", "mime", "ndc-client", "ndc-test", @@ -1026,7 +1108,7 @@ dependencies = [ "opentelemetry-semantic-conventions", "opentelemetry_sdk", "prometheus", - "reqwest", + "reqwest 0.11.27", "serde", "serde_json", "thiserror", @@ -1049,7 +1131,7 @@ dependencies = [ "indexmap 2.2.5", "ndc-client", "rand", - "reqwest", + "reqwest 0.11.27", "semver", "serde", "serde_json", @@ -1184,9 +1266,9 @@ checksum = "7cbfa5308166ca861434f0b0913569579b8e587430a3d6bcd7fd671921ec145a" dependencies = [ "async-trait", "bytes", - "http", + "http 0.2.12", "opentelemetry", - "reqwest", + "reqwest 0.11.27", ] [[package]] @@ -1197,14 +1279,14 @@ checksum = "1a016b8d9495c639af2145ac22387dcb88e44118e45320d9238fbf4e7889abcb" dependencies = [ "async-trait", "futures-core", - "http", + "http 0.2.12", "opentelemetry", "opentelemetry-http", "opentelemetry-proto", "opentelemetry-semantic-conventions", "opentelemetry_sdk", "prost", - "reqwest", + "reqwest 0.11.27", "thiserror", "tokio", "tonic", @@ -1528,10 +1610,9 @@ dependencies = [ "futures-core", "futures-util", "h2", - "http", - "http-body", - "hyper", - "hyper-rustls", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.28", "hyper-tls", "ipnet", "js-sys", @@ -1542,8 +1623,7 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls", - "rustls-pemfile", + "rustls-pemfile 1.0.4", "serde", "serde_json", "serde_urlencoded", @@ -1551,6 +1631,45 @@ dependencies = [ "system-configuration", "tokio", "tokio-native-tls", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", + "winreg 0.50.0", +] + +[[package]] +name = "reqwest" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e6cc1e89e689536eb5aeede61520e874df5a4707df811cd5da4aa5fbb2aae19" +dependencies = [ + "base64 0.22.0", + "bytes", + "futures-core", + "futures-util", + "http 1.1.0", + "http-body 1.0.0", + "http-body-util", + "hyper 1.2.0", + "hyper-rustls", + "hyper-util", + "ipnet", + "js-sys", + "log", + "mime", + "once_cell", + "percent-encoding", + "pin-project-lite", + "rustls", + "rustls-pemfile 2.1.2", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", "tokio-rustls", "tower-service", "url", @@ -1558,7 +1677,7 @@ dependencies = [ "wasm-bindgen-futures", "web-sys", "webpki-roots", - "winreg", + "winreg 0.52.0", ] [[package]] @@ -1597,14 +1716,16 @@ dependencies = [ [[package]] name = "rustls" -version = "0.21.10" +version = "0.22.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d5a6813c0759e4609cd494e8e725babae6a2ca7b62a5536a13daaec6fcb7ba" +checksum = "99008d7ad0bbbea527ec27bddbc0e432c5b87d8175178cee68d2eec9c4a1813c" dependencies = [ "log", "ring", + "rustls-pki-types", "rustls-webpki", - "sct", + "subtle", + "zeroize", ] [[package]] @@ -1616,13 +1737,30 @@ dependencies = [ "base64 0.21.7", ] +[[package]] +name = "rustls-pemfile" +version = "2.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" +dependencies = [ + "base64 0.22.0", + "rustls-pki-types", +] + +[[package]] +name = "rustls-pki-types" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecd36cc4259e3e4514335c4a138c6b43171a8d61d8f5c9348f9fc7529416f247" + [[package]] name = "rustls-webpki" -version = "0.101.7" +version = "0.102.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +checksum = "faaa0a62740bedb9b2ef5afa303da42764c012f743917351dc9a237ea1663610" dependencies = [ "ring", + "rustls-pki-types", "untrusted", ] @@ -1680,16 +1818,6 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" -[[package]] -name = "sct" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "security-framework" version = "2.9.2" @@ -1915,6 +2043,12 @@ dependencies = [ "syn 2.0.53", ] +[[package]] +name = "subtle" +version = "2.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" + [[package]] name = "syn" version = "1.0.109" @@ -2104,11 +2238,12 @@ dependencies = [ [[package]] name = "tokio-rustls" -version = "0.24.1" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" dependencies = [ "rustls", + "rustls-pki-types", "tokio", ] @@ -2149,9 +2284,9 @@ dependencies = [ "base64 0.21.7", "bytes", "h2", - "http", - "http-body", - "hyper", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.28", "hyper-timeout", "percent-encoding", "pin-project", @@ -2194,8 +2329,8 @@ dependencies = [ "bytes", "futures-core", "futures-util", - "http", - "http-body", + "http 0.2.12", + "http-body 0.4.6", "http-range-header", "mime", "pin-project-lite", @@ -2499,9 +2634,12 @@ dependencies = [ [[package]] name = "webpki-roots" -version = "0.25.4" +version = "0.26.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f20c57d8d7db6d3b86154206ae5d8fba62dd39573114de97c2cb0578251f8e1" +checksum = "b3de34ae270483955a94f4b21bdaaeb83d508bb84a01435f393818edb0012009" +dependencies = [ + "rustls-pki-types", +] [[package]] name = "winapi" @@ -2675,3 +2813,19 @@ dependencies = [ "cfg-if", "windows-sys 0.48.0", ] + +[[package]] +name = "winreg" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a277a57398d4bfa075df44f501a17cfdf8542d224f0d36095a2adc7aee4ef0a5" +dependencies = [ + "cfg-if", + "windows-sys 0.48.0", +] + +[[package]] +name = "zeroize" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "525b4ec142c6b68a2d10f01f7bbf6755599ca3f81ea53b8431b7dd348f5fdb2d" diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index 6ba87a2..0cf7815 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -4,7 +4,7 @@ version.workspace = true edition.workspace = true [dependencies] -reqwest = { version = "0.11.27", features = [ +reqwest = { version = "0.12.3", features = [ "json", "rustls-tls", ], default-features = false } diff --git a/crates/ndc-clickhouse/Cargo.toml b/crates/ndc-clickhouse/Cargo.toml index 444b185..2d37b3b 100644 --- a/crates/ndc-clickhouse/Cargo.toml +++ b/crates/ndc-clickhouse/Cargo.toml @@ -9,7 +9,7 @@ common = { path = "../common" } indexmap = "2.1.0" ndc-sdk = { git = "https://github.com/hasura/ndc-sdk-rs", rev = "7b56fac", package = "ndc-sdk" } prometheus = "0.13.3" -reqwest = { version = "0.11.27", features = [ +reqwest = { version = "0.12.3", features = [ "json", "rustls-tls", ], default-features = false } From 9792ef95642d21cc4bd810ca508642df96d1ba5f Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Mon, 8 Apr 2024 19:11:36 -0400 Subject: [PATCH 24/28] update docs --- README.md | 75 +----------------- docs/configuration.md | 143 ++++++++++++++++++++++++++++++++++ docs/development.md | 173 ++++++++++++++++++++++++++++++++++++++++++ docs/index.md | 2 + 4 files changed, 322 insertions(+), 71 deletions(-) create mode 100644 docs/configuration.md create mode 100644 docs/development.md diff --git a/README.md b/README.md index 5bac9d4..7f2feba 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # Clickhouse Connector -**Compatible with Hasura DDN Alpha** +**Compatible with Hasura DDN Beta** The [Clickhouse](https://clickhouse.com/) Native Data Connector allows for connecting to a Clickhouse instance giving you an instant GraphQL API on top of your Clickhouse data. @@ -14,83 +14,16 @@ In order to use this connector you will need to: - Create a [Clickhouse account](https://clickhouse.cloud/signUp?loc=nav-get-started) - Log in to a [Hasura CLI](https://hasura.io/docs/3.0/cli/overview/) Session -- Create a Pre-Shared Token for service authentication between the Hasura V3 Engine and your connector - -## Features - -This native data connector implements the following Hasura Data Domain Specification features: - -| Feature | | -| ----------------------------------------------------------------------------------------------------------------------------------- | --- | -| [Simple Queries](https://hasura.io/docs/3.0/graphql-api/queries/simple-queries/) | ✅ | -| [Nested Queries](https://hasura.io/docs/3.0/graphql-api/queries/nested-queries/) | ✅ | -| [Query Result Sorting](https://hasura.io/docs/3.0/graphql-api/queries/sorting/) | ✅ | -| [Query Result Pagination](https://hasura.io/docs/3.0/graphql-api/queries/pagination/) | ✅ | -| [Multiple Query Arguments](https://hasura.io/docs/3.0/graphql-api/queries/multiple-arguments/) | ✅ | -| [Multiple Queries in a Request](https://hasura.io/docs/3.0/graphql-api/queries/multiple-queries/) | ✅ | -| [Variables, Aliases, Fragments, Directives](https://hasura.io/docs/3.0/graphql-api/queries/variables-aliases-fragments-directives/) | ✅ | -| [Query Filter: Value Comparison](https://hasura.io/docs/3.0/graphql-api/queries/filters/comparison-operators/) | ✅ | -| [Query Filter: Boolean Expressions](https://hasura.io/docs/3.0/graphql-api/queries/filters/boolean-operators/) | ✅ | -| [Query Filter: Text](https://hasura.io/docs/3.0/graphql-api/queries/filters/text-search-operators/) | ✅ | ## For Hasura Users This connector should be used via the hasura ddn cli -## For Developers - -The following instructions are for developers who wish to contribute to the Clickhouse Connector. - -### Prerequisites: - -1. Install [rustup](https://www.rust-lang.org/tools/install). -2. Install [docker](https://docs.docker.com/get-docker/). - -### Use the CLI to create/update a configuration directory - -View CLI help: - -```sh -cargo run --package ndc-clickhouse-cli -- --help -``` - -Create a configuration directory in the `./config` directory: +See [configuration instructions](./docs/configuration.md) for additional configuration instructions -```sh -cargo run --package ndc-clickhouse-cli -- init --context-path ./config --clickhouse-url "url" --clickhouse-username "user" --clickhouse-password "pass" -``` - -Update an existing directory. Will create the directory and files if not present. - -This is required whenever the database schema changes - -```sh -cargo run --package ndc-clickhouse-cli -- update --context-path ./config --clickhouse-url "url" --clickhouse-username "user" --clickhouse-password "pass" -``` - -### Run the connector server in docker - -Create a `.env` file in the project root, replacing the placeholders with the actual values: - -```env -CLICKHOUSE_URL= -CLICKHOUSE_USERNAME= -CLICKHOUSE_PASSWORD= -``` - -Run the connector container. Check `docker-compose.yaml` for configuration details: - -```sh -docker compose up -d -``` - -The connector should now be running and accepting requests. - -To re-build the connector: +## For Developers -```sh -docker compose up -d --build -``` +See [development instructions](./docs/development.md) ## Documentation diff --git a/docs/configuration.md b/docs/configuration.md new file mode 100644 index 0000000..04b70c9 --- /dev/null +++ b/docs/configuration.md @@ -0,0 +1,143 @@ +# Configuration + +## Initializing & Updating a Configuration Directory + +The connector requires a configuration directory to run + +When part of a ddn project, this configuration directory lives in the project. + +### Using the hasura ddn cli + +Hasura DDN CLI will initialize and update the configuration directory for you + +Follow the instructions on how to add a connector + + +### Using the ddn-clickhouse-cli executable + +You can run the ndc-clickhouse-cli executable yourself + + +If you have the executable: +``` +ndc-clickhouse-cli.exe --connector-context-path ./config --clickhouse-url "URL" --clickhouse-username "USERNAME" --clickhouse-password "PASSWORD" update +``` + +If you have the source code: + +``` +cargo run --package ndc-clickhouse-cli -- --connector-context-path ./config --clickhouse-url "URL" --clickhouse-username "USERNAME" --clickhouse-password "PASSWORD" update +``` + +See also: [development instructions](./development.md) + +## Tables + +Tables are added by introspecting the database provided during init/update of the configuration directory. +Most users do not need to further alter this configuration, but there are a couple additional options + +### Table alias + +The keys in the tables object in the configuration file can be changed to modify the alias a table will be exposed under. + +This alias must remain unique + +### Table Return Type + +Tables can return the same type as another table. + +This is useful for views that return rows from another table. + +This will allow both tables to share an object type, +which in turn allows both tables to share relationships and object type permissions. + +## Native Queries + +This connector supports native queries: writing raw SQL queries to treat as collections (virtual tables) + +This is an alternative to writing views on the database, which is usually preferable, but may not be plausible. +This can also be useful to iterate on views before creating them on the database. + +You can write a native query as a `.sql` file in your configuration directory, typically in a dedicated subdirectory + +Your file may only contain a single statement. + +Arguments may be specified using the [clickhouse parameter syntax](https://clickhouse.com/docs/en/interfaces/cli#cli-queries-with-parameters-syntax) + +```sql +-- queries/ArtistByName.sql +SELECT * +FROM "default"."Artist" +WHERE "Artist"."Name" = {ArtistName: String} +``` + +Then add the query to your `configuration.json` file. +You'll need to figure out the query return type + +```json +{ + "tables": {}, + "queries": { + "Name": { + "exposed_as": "collection", + "file": "queries/ArtistByName.sql", + "return_type": { + "kind": "definition", + "columns": { + "ArtistId": "Int32", + "Name": "String" + } + } + } + } +} +``` + +To figure out your return type, you can use the [ClickHouse `toTypeName` function](https://clickhouse.com/docs/en/sql-reference/functions/other-functions#totypenamex) + +One way to get the return types for your SQL statemen: + +```sql +SELECT * APPLY toTypeName +FROM ( + -- your SQL here +) q LIMIT 1; + +``` + +Alternatively, if your query returns the same type as another table, and you want this reflected in your schema: + +```json +{ + "tables": { + "Artist": { + "name": "Artist", + "schema": "default", + "comment": "", + "primary_key": { + "name": "ArtistId", + "columns": [ + "ArtistId" + ] + }, + "return_type": { + "kind": "definition", + "columns": { + "ArtistId": "Int32", + "Name": "Nullable(String)" + } + } + } + }, + "queries": { + "Name": { + "exposed_as": "collection", + "file": "queries/ArtistByName.sql", + "return_type": { + "kind": "table_reference", + "table_name": "Artist" + } + } + } +} +``` diff --git a/docs/development.md b/docs/development.md new file mode 100644 index 0000000..5d8fedc --- /dev/null +++ b/docs/development.md @@ -0,0 +1,173 @@ +# Development + +This document details steps to running this connector with hasura ddn for local testing during connector development + +## Prerequisites + +- [ClickHouse Database](https://clickhouse.com/) +- [Rust Toolchain](https://www.rust-lang.org/tools/install) +- [Docker](https://docs.docker.com/get-docker/) +- [NGrok](https://ngrok.com/) +- [Hasura DDN Cli](https://hasura.io/docs/3.0/cli/installation) + +## Clone the repository + +Clone this repository to the directory of your choice + +```sh +git clone https://github.com/hasura/ndc-clickhouse.git +``` +All subsequent commands will assume they are running from the directory the repository was cloned into + +``` +cd ndc-clickhouse +``` +## Generating a configuration directory + +Create a `config` directory in the project root + +```sh +mkdir config +``` + +Initialize the directory with a configuration based on your database schema + +Change the placeholder auth details to your actual database connection information + +```sh +cargo run --package ndc-clickhouse-cli -- --connector-context-path ./config --clickhouse-url "URL" --clickhouse-username "USERNAME" --clickhouse-password "PASSWORD" update +``` + +You can run this command again if your database schema has changed and you'd like the config to reflect that + +Any configuration customization should not be overwritten + +See also: [editing the configuration](./configuration.md) + +## Running the connector in docker + +Create a `.env` file in the project root + +```.env +CLICKHOUSE_URL= +CLICKHOUSE_USERNAME= +CLICKHOUSE_PASSWORD= +``` +Start the connector + +```sh +docker compose up -d +``` + +The first build may take a while. Subsequent builds should be faster thanks to layer caching + +To restart the connector (required for changes to configuration to take effect) + +```sh +docker compose restart +``` + +To rebuild the connector (required for changes to connector source code to take effect) + +```sh +docker compose up -d --build +``` + +## Exposing the running connector to the web + +We use `ngrok` to expose our connector to the web. You could use an alternative. + +```sh +ngrok http http://localhost:4000 +``` +Take note of the resulting address, we'll need it. + +See also: [ngrok documenation](https://ngrok.com/docs) + +## Adding the connector to a ddn project + +If you don't yet have a ddn project, you'll need to [create one](https://hasura.io/docs/3.0/getting-started/create-a-project#step-3-create-a-new-project) + +The following instructions assume a default, empty project. +We will be adding a new datasource `clickhouse`. Change the name as needed + +1. create a directory for the data source `app/clickhouse` +2. create the data source definition file `app/clickhouse/clickhouse.hml` with content: +```yaml +kind: DataConnectorLink +version: v1 +definition: + name: clickhouse + url: + Fn::ManifestRef: clickhouse +``` +3. create the data source connector directory `app/clickhouse/connector` +4. create the data source connector file `app/clickhouse/connector/clickhouse.build.hml` with content: +```yaml +kind: ConnectorManifest +version: v1 +spec: + supergraphManifests: + - base +definition: + name: clickhouse + type: endpoints + deployments: + - endpoint: + valueFromEnv: CLICKHOUSE_CONNECTOR_ENDPOINT +``` +4. add the `CLICKHOUSE_CONNECTOR_ENDPOINT` env var to `base.env.yaml` +```yaml +supergraph: {} +subgraphs: + app: + CLICKHOUSE_CONNECTOR_ENDPOINT: +``` +The endpoint should be the one [exposed by NGROK](#exposing-the-running-connector-to-the-web) + +## Building the ddn project + +### Using ddn dev + +This command will + +- watch for changes in the connector schema +- track and update models whenever the schema changes +- create ddn builds whenever the metadata changes + +```sh +ddn dev +``` + +You should now be able to navigate to your api + +### Using ddn build + +You can also replicate `ddn dev` step by step + +**Updating the connector schema** + +```sh +ddn update data-connector-link clickhouse +``` +note here `clickhouse` is our source name, change it if needed + +**Tracking models** + +To explicitly track a model + +```sh +ddn add model --data-connector-link clickhouse --name +``` +The model name should be one of the collections exposed by the connector. +Check the `app/clickhouse/clickhouse.hml` file for a list. + +note here `clickhouse` is our source name, change it if needed + +**Creating a build** + +```sh +ddn build connector-manifest +``` + +See also: [ddn documentation](https://hasura.io/docs/3.0) \ No newline at end of file diff --git a/docs/index.md b/docs/index.md index 6d4ee74..0dc9102 100644 --- a/docs/index.md +++ b/docs/index.md @@ -2,6 +2,8 @@ ClickHouse is a [Hasura](https://hasura.io/) Native Data Connector. +- [Configuration](./configuration.md) +- [Development](./development.md) - [Code of Conduct](./code-of-conduct.md) - [Contributing](./contributing.md) - [Support](./support.md) From 870ee1a7f5fcfc6741889ba012160fa859e05ed6 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 9 Apr 2024 07:25:00 -0400 Subject: [PATCH 25/28] add error handling for unsupported feature: nested field selection --- .../ndc-clickhouse/src/sql/query_builder.rs | 19 +++---------------- .../src/sql/query_builder/typecasting.rs | 6 +++++- 2 files changed, 8 insertions(+), 17 deletions(-) diff --git a/crates/ndc-clickhouse/src/sql/query_builder.rs b/crates/ndc-clickhouse/src/sql/query_builder.rs index e19b8c1..ddf1b58 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder.rs @@ -315,7 +315,9 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { let expr = match field { models::Field::Column { column, fields } => { if fields.is_some() { - todo!("support nested field selection") + return Err(QueryBuilderError::NotSupported( + "nested field selector".into(), + )); } Expr::CompoundIdentifier(vec![ Ident::new_quoted("_origin"), @@ -1757,21 +1759,6 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { } else { Ok(table_name.into_table_factor()) } - // let arguments = match collection { - // CollectionContext::Base { - // collection_alias: _, - // arguments, - // } => arguments.iter().map(|(name, )|), - // CollectionContext::Relationship { - // collection_alias, - // arguments, - // relationship_arguments, - // } => todo!(), - // CollectionContext::UnrelatedRelationship { - // collection_alias, - // arguments, - // } => todo!(), - // }; } else if let Some(query) = self.configuration.queries.get(collection.alias()) { let get_argument = |name| match collection { CollectionContext::Base { diff --git a/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs b/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs index 3cf691b..4a83791 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs @@ -125,7 +125,9 @@ impl RowsTypeString { fields, } => { if fields.is_some() { - todo!("support nested field selection") + return Err(TypeStringError::NotSupported( + "subfield selector".into(), + )); } let column_type = get_column(column_alias, table_alias, config)?; let type_definition = ClickHouseTypeDefinition::from_table_column( @@ -288,6 +290,7 @@ pub enum TypeStringError { function: String, }, MissingRelationship(String), + NotSupported(String), } impl Display for TypeStringError { @@ -305,6 +308,7 @@ impl Display for TypeStringError { function, } => write!(f, "Unknown aggregate function: {function} for column {column} of type: {data_type} in table {table}"), TypeStringError::MissingRelationship(rel) => write!(f, "Missing relationship: {rel}"), + TypeStringError::NotSupported(feature) => write!(f, "Not supported: {feature}"), } } } From bdbcd5e4dd6d7a40608857a73cc0fe885f27d281 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 9 Apr 2024 07:25:41 -0400 Subject: [PATCH 26/28] bump package version --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 00509f8..b8fa7f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -340,7 +340,7 @@ checksum = "97af0562545a7d7f3d9222fcf909963bec36dcb502afaacab98c6ffac8da47ce" [[package]] name = "common" -version = "0.2.1" +version = "0.2.2" dependencies = [ "peg", "reqwest 0.12.3", @@ -1044,7 +1044,7 @@ dependencies = [ [[package]] name = "ndc-clickhouse" -version = "0.2.1" +version = "0.2.2" dependencies = [ "async-trait", "common", @@ -1061,7 +1061,7 @@ dependencies = [ [[package]] name = "ndc-clickhouse-cli" -version = "0.2.1" +version = "0.2.2" dependencies = [ "clap", "common", diff --git a/Cargo.toml b/Cargo.toml index 0557072..5d56d27 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,5 +6,5 @@ members = [ ] resolver = "2" -package.version = "0.2.1" +package.version = "0.2.2" package.edition = "2021" From b04bb5b68012b9bd649abcb5ec816e9a3d1ae6de Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 9 Apr 2024 20:06:40 -0400 Subject: [PATCH 27/28] fix: return empty result set when no rows match foreach predicate --- .../ndc-clickhouse/src/sql/query_builder.rs | 36 ++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/crates/ndc-clickhouse/src/sql/query_builder.rs b/crates/ndc-clickhouse/src/sql/query_builder.rs index ddf1b58..6462759 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder.rs @@ -156,11 +156,39 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { vec![] }; - let from = vec![self + let rowset_subquery = self .rowset_subquery(&collection, &vec![], query)? .into_table_factor() - .alias("_rowset") - .into_table_with_joins(vec![])]; + .alias("_rowset"); + + let from = if self.request.variables.is_some() { + let table = ObjectName(vec![Ident::new_quoted("_vars")]) + .into_table_factor() + .alias("_vars"); + + let join_expr = Expr::BinaryOp { + left: Expr::CompoundIdentifier(vec![ + Ident::new_quoted("_vars"), + Ident::new_quoted("_varset_id"), + ]) + .into_box(), + op: BinaryOperator::Eq, + right: Expr::CompoundIdentifier(vec![ + Ident::new_quoted(format!("_rowset")), + Ident::new_quoted("_varset_id"), + ]) + .into_box(), + }; + + let join = Join { + relation: rowset_subquery, + join_operator: JoinOperator::LeftOuter(JoinConstraint::On(join_expr)), + }; + + vec![table.into_table_with_joins(vec![join])] + } else { + vec![rowset_subquery.into_table_with_joins(vec![])] + }; let order_by = if self.request.variables.is_some() { vec![OrderByExpr { @@ -1856,7 +1884,6 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { column_alias: &str, collection: &CollectionContext, ) -> Result { - // todo: get column name based on column alias and collection alias let return_type = self .configuration .tables @@ -1880,7 +1907,6 @@ impl<'r, 'c> QueryBuilder<'r, 'c> { QueryBuilderError::UnknownColumn(column_alias.to_owned(), collection.alias().to_owned()) })?; - // todo: revise whether we want to get the data type from the type definition instead Ok(column_type.to_owned()) } } From 015837b31ce180b73b5eb2ef29ad9f26c3d5f43f Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 9 Apr 2024 20:08:32 -0400 Subject: [PATCH 28/28] add changelog entry --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 836f296..5e10afb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Support parameterized native queries, except in foreach queries. Also don't support column valued arguments - Change explain output so the console knows how to extract generated SQL and sql explain plan to display to the user - Pretty print explain SQL output +- Fix a bug where no result sets where returned when foreach predicate didn't match any rows. Correct behavior: empty result set is returned ## [0.2.1]