From 03c973aea2c1add36ef7a7a38c5e3cdb40f595f2 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 1 Oct 2024 13:31:33 -0400 Subject: [PATCH 01/11] move print_separated utilities into common crate --- crates/common/src/format.rs | 73 +++++++++++++++++++++ crates/ndc-clickhouse/src/sql/ast/format.rs | 72 -------------------- 2 files changed, 73 insertions(+), 72 deletions(-) create mode 100644 crates/common/src/format.rs diff --git a/crates/common/src/format.rs b/crates/common/src/format.rs new file mode 100644 index 0000000..3924f87 --- /dev/null +++ b/crates/common/src/format.rs @@ -0,0 +1,73 @@ +use std::fmt; + +pub struct DisplaySeparated<'a, T, I, F> +where + F: Fn(&'_ mut fmt::Formatter, &I) -> fmt::Result, + &'a T: IntoIterator, +{ + list: &'a T, + separator: &'static str, + print: F, +} + +pub fn display_comma_separated<'a, T, I>( + list: &'a T, +) -> DisplaySeparated<'a, T, I, impl Fn(&mut fmt::Formatter, &I) -> fmt::Result> +where + &'a T: IntoIterator, + I: fmt::Display, +{ + DisplaySeparated { + list, + separator: ", ", + print: |f, i| write!(f, "{i}"), + } +} +pub fn display_period_separated<'a, T, I>( + list: &'a T, +) -> DisplaySeparated<'a, T, I, impl Fn(&mut fmt::Formatter, &I) -> fmt::Result> +where + &'a T: IntoIterator, + I: fmt::Display, +{ + DisplaySeparated { + list, + separator: ".", + print: |f, i| write!(f, "{i}"), + } +} + +pub fn display_separated<'a, T, I, F>( + list: &'a T, + separator: &'static str, + print: F, +) -> DisplaySeparated<'a, T, I, F> +where + &'a T: IntoIterator, + F: Fn(&'_ mut fmt::Formatter, &I) -> fmt::Result, +{ + DisplaySeparated { + list, + separator, + print, + } +} + +impl<'a, T, I, F> fmt::Display for DisplaySeparated<'a, T, I, F> +where + &'a T: IntoIterator, + F: Fn(&mut fmt::Formatter, &I) -> fmt::Result, +{ + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut first = true; + for item in self.list { + if first { + first = false + } else { + write!(f, "{}", self.separator)?; + } + (self.print)(f, &item)?; + } + Ok(()) + } +} diff --git a/crates/ndc-clickhouse/src/sql/ast/format.rs b/crates/ndc-clickhouse/src/sql/ast/format.rs index 5b7def4..43fe1cd 100644 --- a/crates/ndc-clickhouse/src/sql/ast/format.rs +++ b/crates/ndc-clickhouse/src/sql/ast/format.rs @@ -22,78 +22,6 @@ pub fn escape_string(s: &str) -> EscapedString { EscapedString(s) } -pub struct DisplaySeparated<'a, T, I, F> -where - F: Fn(&'_ mut fmt::Formatter, &I) -> fmt::Result, - &'a T: IntoIterator, -{ - list: &'a T, - separator: &'static str, - print: F, -} - -pub fn display_comma_separated<'a, T, I>( - list: &'a T, -) -> DisplaySeparated<'a, T, I, impl Fn(&mut fmt::Formatter, &I) -> fmt::Result> -where - &'a T: IntoIterator, - I: fmt::Display, -{ - DisplaySeparated { - list, - separator: ", ", - print: |f, i| write!(f, "{i}"), - } -} -pub fn display_period_separated<'a, T, I>( - list: &'a T, -) -> DisplaySeparated<'a, T, I, impl Fn(&mut fmt::Formatter, &I) -> fmt::Result> -where - &'a T: IntoIterator, - I: fmt::Display, -{ - DisplaySeparated { - list, - separator: ".", - print: |f, i| write!(f, "{i}"), - } -} - -pub fn display_separated<'a, T, I, F>( - list: &'a T, - separator: &'static str, - print: F, -) -> DisplaySeparated<'a, T, I, F> -where - &'a T: IntoIterator, - F: Fn(&'_ mut fmt::Formatter, &I) -> fmt::Result, -{ - DisplaySeparated { - list, - separator, - print, - } -} - -impl<'a, T, I, F> fmt::Display for DisplaySeparated<'a, T, I, F> -where - &'a T: IntoIterator, - F: Fn(&mut fmt::Formatter, &I) -> fmt::Result, -{ - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let mut first = true; - for item in self.list { - if first { - first = false - } else { - write!(f, "{}", self.separator)?; - } - (self.print)(f, &item)?; - } - Ok(()) - } -} - #[cfg(test)] mod tests { use super::*; From a53164e163221d7cc1158694f3ad41e7f3260434 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 1 Oct 2024 13:32:38 -0400 Subject: [PATCH 02/11] move configuration reading logic into common, logic now shared between connector and cli --- crates/common/src/config.rs | 345 +++++++++++++++++- crates/common/src/lib.rs | 1 + crates/ndc-clickhouse/src/connector/setup.rs | 353 +------------------ 3 files changed, 351 insertions(+), 348 deletions(-) diff --git a/crates/common/src/config.rs b/crates/common/src/config.rs index b7ca1c5..ca21be3 100644 --- a/crates/common/src/config.rs +++ b/crates/common/src/config.rs @@ -1,11 +1,19 @@ -use std::collections::BTreeMap; - -use ndc_models::{ArgumentName, CollectionName, FieldName, ObjectTypeName}; - use crate::{ clickhouse_parser::{datatype::ClickHouseDataType, parameterized_query::ParameterizedQuery}, - config_file::{ParameterizedQueryExposedAs, PrimaryKey}, + config_file::{ + ParameterizedQueryConfigFile, ParameterizedQueryExposedAs, PrimaryKey, ReturnType, + ServerConfigFile, TableConfigFile, CONFIG_FILE_NAME, + }, + format::display_period_separated, +}; +use ndc_models::{ArgumentName, CollectionName, FieldName, ObjectTypeName}; +use std::{ + collections::{BTreeMap, HashMap}, + env, io, + path::{Path, PathBuf}, + str::FromStr, }; +use tokio::fs; #[derive(Debug, Clone)] /// In memory, runtime configuration, built from the configuration file(s) and environment variables @@ -52,3 +60,330 @@ pub struct ParameterizedQueryConfig { pub query: ParameterizedQuery, pub return_type: ObjectTypeName, } + +#[derive(Debug, thiserror::Error)] +pub enum ConfigurationError { + #[error("missing required environment variable: {0}")] + MissingEnvironmentVariable(String), + #[error("could not find configuration file: {0}")] + FileNotFound(PathBuf), + #[error("error processing configuration: {0}")] + IoError(io::Error), + #[error( + "error parsing configuration: {file_path}, at line {line}, column {column}: {message}" + )] + ParseError { + file_path: PathBuf, + line: usize, + column: usize, + message: String, + }, + #[error( + "error validating configuration: {file_path}, at {}: {message}", + display_period_separated(node_path) + )] + ValidateError { + file_path: PathBuf, + node_path: Vec, + message: String, + }, +} + +#[derive(Debug, Clone)] +pub struct ConfigurationEnvironment { + url: Option, + username: Option, + password: Option, +} + +impl ConfigurationEnvironment { + pub fn from_environment() -> Self { + Self { + url: env::var("CLICKHOUSE_URL").ok(), + username: env::var("CLICKHOUSE_USERNAME").ok(), + password: env::var("CLICKHOUSE_PASSWORD").ok(), + } + } + pub fn from_simulated_environment(env: HashMap) -> Self { + Self { + url: env.get("CLICKHOUSE_URL").cloned(), + username: env.get("CLICKHOUSE_USERNAME").cloned(), + password: env.get("CLICKHOUSE_PASSWORD").cloned(), + } + } +} + +pub fn get_connection_configuration( + env: &ConfigurationEnvironment, +) -> Result { + let url = env + .url + .to_owned() + .ok_or(ConfigurationError::MissingEnvironmentVariable( + "CLICKHOUSE_URL".into(), + ))?; + let username = + env.username + .to_owned() + .ok_or(ConfigurationError::MissingEnvironmentVariable( + "CLICKHOUSE_USERNAME".into(), + ))?; + let password = + env.password + .to_owned() + .ok_or(ConfigurationError::MissingEnvironmentVariable( + "CLICKHOUSE_PASSWORD".into(), + ))?; + + Ok(ConnectionConfig { + url, + username, + password, + }) +} + +pub async fn read_server_config( + configuration_dir: impl AsRef + Send, + environment: &ConfigurationEnvironment, +) -> Result { + let file_path = configuration_dir.as_ref().join(CONFIG_FILE_NAME); + + let connection = get_connection_configuration(environment)?; + + let config_file = fs::read_to_string(&file_path) + .await + .map_err(|err| match err.kind() { + std::io::ErrorKind::NotFound => ConfigurationError::FileNotFound(file_path.to_owned()), + _ => ConfigurationError::IoError(err), + })?; + + let config = serde_json::from_str::(&config_file).map_err(|err| { + ConfigurationError::ParseError { + file_path: file_path.to_owned(), + line: err.line(), + column: err.column(), + message: err.to_string(), + } + })?; + + let table_types = config + .tables + .iter() + .map(|(table_alias, table_config)| { + let table_type = validate_and_parse_return_type( + &table_config.return_type, + &config, + &file_path, + &["tables", table_alias.inner(), "return_type"], + )? + .map(|columns| { + ( + table_alias.to_string().into(), + TableType { + comment: table_config.comment.to_owned(), + columns, + }, + ) + }); + + Ok(table_type) + }) + .chain(config.queries.iter().map(|(query_alias, query_config)| { + let table_type = validate_and_parse_return_type( + &query_config.return_type, + &config, + &file_path, + &["query", query_alias.inner(), "return_type"], + )? + .map(|columns| { + ( + query_alias.to_string().into(), + TableType { + comment: query_config.comment.to_owned(), + columns, + }, + ) + }); + + Ok(table_type) + })) + .filter_map(|table_type| table_type.transpose()) + .collect::>()?; + + let tables = config + .tables + .iter() + .map(|(table_alias, table_config)| { + Ok(( + table_alias.clone(), + TableConfig { + name: table_config.name.to_owned(), + schema: table_config.schema.to_owned(), + comment: table_config.comment.to_owned(), + primary_key: table_config.primary_key.to_owned(), + return_type: match &table_config.return_type { + ReturnType::Definition { .. } => table_alias.to_string().into(), + ReturnType::TableReference { + table_name: target_alias, + } + | ReturnType::QueryReference { + query_name: target_alias, + } => target_alias.to_string().into(), + }, + arguments: table_config + .arguments + .iter() + .map(|(name, r#type)| { + let data_type = + ClickHouseDataType::from_str(r#type).map_err(|err| { + ConfigurationError::ValidateError { + file_path: file_path.to_owned(), + node_path: vec![ + "tables".to_string(), + table_alias.to_string(), + "arguments".to_string(), + name.to_string(), + ], + message: format!("Unable to parse data type: {err}"), + } + })?; + + Ok((name.to_owned(), data_type)) + }) + .collect::>()?, + }, + )) + }) + .collect::, ConfigurationError>>()?; + + let mut queries = BTreeMap::new(); + + for (query_alias, query_config) in config.queries.clone() { + let query_file_path = configuration_dir.as_ref().join(&query_config.file); + let file_content = + fs::read_to_string(&query_file_path) + .await + .map_err(|err| match err.kind() { + std::io::ErrorKind::NotFound => { + ConfigurationError::FileNotFound(query_file_path.to_owned()) + } + _ => ConfigurationError::IoError(err), + })?; + + let query = ParameterizedQuery::from_str(&file_content).map_err(|err| { + ConfigurationError::ValidateError { + file_path: query_file_path.clone(), + node_path: vec!["queries".to_string(), query_alias.to_string()], + message: format!("Unable to parse parameterized query: {}", err), + } + })?; + + let query_definition = ParameterizedQueryConfig { + exposed_as: query_config.exposed_as.to_owned(), + comment: query_config.comment.to_owned(), + query, + return_type: match query_config.return_type { + ReturnType::Definition { .. } => query_alias.to_string().into(), + ReturnType::TableReference { + table_name: target_alias, + } + | ReturnType::QueryReference { + query_name: target_alias, + } => target_alias.to_string().into(), + }, + }; + + queries.insert(query_alias.to_owned(), query_definition); + } + + let config = ServerConfig { + connection, + // hardcoding separator for now, to avoid prematurely exposing configuration options we may not want to keep + // if we make this configurable, we must default to this separator when the option is not provided + namespace_separator: ".".to_string(), + table_types, + tables, + queries, + }; + + Ok(config) +} + +fn validate_and_parse_return_type( + return_type: &ReturnType, + config: &ServerConfigFile, + file_path: &Path, + node_path: &[&str], +) -> Result>, ConfigurationError> { + let get_node_path = |extra_segments: &[&str]| { + node_path + .iter() + .chain(extra_segments.iter()) + .map(ToString::to_string) + .collect() + }; + match return_type { + ReturnType::TableReference { table_name } => { + match config.tables.get(table_name) { + Some(TableConfigFile { + return_type: ReturnType::Definition { .. }, + .. + }) => Ok(None), + Some(_) => { + Err(ConfigurationError::ValidateError { file_path: file_path.to_path_buf(), node_path: get_node_path(&["table_name"]), message: format!( + "Invalid reference: referenced table {} which does not have a return type definition", + table_name, + ), }) + } + None => { + Err(ConfigurationError::ValidateError { file_path: file_path.to_path_buf(), node_path: get_node_path(&["table_name"]), message: format!( + "Orphan reference: cannot find referenced table {}", + table_name, + )}) + } + } + } + ReturnType::QueryReference { query_name } => { + match config.queries.get(query_name) { + Some(ParameterizedQueryConfigFile { + return_type: ReturnType::Definition { .. }, + .. + }) => Ok(None), + Some(_) => { + Err(ConfigurationError::ValidateError { file_path: file_path.to_path_buf(), + node_path: get_node_path(&["query_name"]), + message: format!( + "Invalid reference: referenced query {} which does not have a return type definition", + query_name, + ), }) + } + None => { + Err(ConfigurationError::ValidateError { file_path: file_path.to_path_buf(), + node_path: get_node_path(&["query_name"]), + message: format!( + "Orphan reference: cannot find referenced query {}", + query_name, ), }) + } + } + } + ReturnType::Definition { columns } => Ok(Some( + columns + .iter() + .map(|(field_alias, field_type)| { + let data_type = + ClickHouseDataType::from_str(field_type).map_err(|err| { + ConfigurationError::ValidateError { + file_path: file_path.to_path_buf(), + node_path: get_node_path(&["columns", field_alias.inner()]), + message: format!( + "Unable to parse data type \"{}\": {}", + field_type, err + ), + } + })?; + Ok((field_alias.to_owned(), data_type)) + }) + .collect::, ConfigurationError>>()?, + )), + } +} diff --git a/crates/common/src/lib.rs b/crates/common/src/lib.rs index 7dad1fd..685c871 100644 --- a/crates/common/src/lib.rs +++ b/crates/common/src/lib.rs @@ -3,4 +3,5 @@ pub mod clickhouse_parser; pub mod client; pub mod config; pub mod config_file; +pub mod format; pub mod schema; diff --git a/crates/ndc-clickhouse/src/connector/setup.rs b/crates/ndc-clickhouse/src/connector/setup.rs index 6a878ba..aaad914 100644 --- a/crates/ndc-clickhouse/src/connector/setup.rs +++ b/crates/ndc-clickhouse/src/connector/setup.rs @@ -1,34 +1,10 @@ -use async_trait::async_trait; -use common::{ - clickhouse_parser::{datatype::ClickHouseDataType, parameterized_query::ParameterizedQuery}, - config::{ConnectionConfig, ParameterizedQueryConfig, ServerConfig, TableConfig, TableType}, - config_file::{ - ParameterizedQueryConfigFile, ReturnType, ServerConfigFile, TableConfigFile, - CONFIG_FILE_NAME, - }, -}; -use ndc_sdk::{ - connector::{ - self, Connector, ConnectorSetup, InvalidNode, InvalidNodes, KeyOrIndex, LocatedError, - ParseError, - }, - models::FieldName, -}; -use std::{ - collections::{BTreeMap, HashMap}, - env, - path::{Path, PathBuf}, - str::FromStr, -}; -use tokio::fs; - use super::{state::ServerState, ClickhouseConnector}; +use async_trait::async_trait; +use common::config::{read_server_config, ConfigurationEnvironment}; +use ndc_sdk::connector::{self, Connector, ConnectorSetup, ErrorResponse}; +use std::{collections::HashMap, path::Path}; #[derive(Debug, Clone)] -pub struct ClickhouseConnectorSetup { - url: Option, - username: Option, - password: Option, -} +pub struct ClickhouseConnectorSetup(ConfigurationEnvironment); #[async_trait] impl ConnectorSetup for ClickhouseConnectorSetup { @@ -39,7 +15,9 @@ impl ConnectorSetup for ClickhouseConnectorSetup { configuration_dir: impl AsRef + Send, ) -> connector::Result<::Configuration> { // we wrap read_server_config so the ParseError is implicitly converted into an ErrorResponse - Ok(self.read_server_config(configuration_dir).await?) + read_server_config(configuration_dir, &self.0) + .await + .map_err(ErrorResponse::from_error) } async fn try_init_state( @@ -53,323 +31,12 @@ impl ConnectorSetup for ClickhouseConnectorSetup { impl Default for ClickhouseConnectorSetup { fn default() -> Self { - Self { - url: env::var("CLICKHOUSE_URL").ok(), - username: env::var("CLICKHOUSE_USERNAME").ok(), - password: env::var("CLICKHOUSE_PASSWORD").ok(), - } + Self(ConfigurationEnvironment::from_environment()) } } impl ClickhouseConnectorSetup { pub fn new_from_env(env: HashMap) -> Self { - Self { - url: env.get("CLICKHOUSE_URL").cloned(), - username: env.get("CLICKHOUSE_USERNAME").cloned(), - password: env.get("CLICKHOUSE_PASSWORD").cloned(), - } - } - pub async fn read_server_config( - &self, - configuration_dir: impl AsRef + Send, - ) -> Result { - let file_path = configuration_dir.as_ref().join(CONFIG_FILE_NAME); - - let connection = self.get_connection_config(&file_path)?; - - let config_file = fs::read_to_string(&file_path) - .await - .map_err(|err| match err.kind() { - std::io::ErrorKind::NotFound => { - ParseError::CouldNotFindConfiguration(file_path.to_owned()) - } - _ => ParseError::IoError(err), - })?; - - let config = serde_json::from_str::(&config_file).map_err(|err| { - ParseError::ParseError(LocatedError { - file_path: file_path.to_owned(), - line: err.line(), - column: err.column(), - message: err.to_string(), - }) - })?; - - let table_types = config - .tables - .iter() - .map(|(table_alias, table_config)| { - let table_type = self - .validate_and_parse_return_type( - &table_config.return_type, - &config, - &file_path, - &["tables", table_alias.inner(), "return_type"], - )? - .map(|columns| { - ( - table_alias.to_string().into(), - TableType { - comment: table_config.comment.to_owned(), - columns, - }, - ) - }); - - Ok(table_type) - }) - .chain(config.queries.iter().map(|(query_alias, query_config)| { - let table_type = self - .validate_and_parse_return_type( - &query_config.return_type, - &config, - &file_path, - &["query", query_alias.inner(), "return_type"], - )? - .map(|columns| { - ( - query_alias.to_string().into(), - TableType { - comment: query_config.comment.to_owned(), - columns, - }, - ) - }); - - Ok(table_type) - })) - .filter_map(|table_type| table_type.transpose()) - .collect::>()?; - - let tables = config - .tables - .iter() - .map(|(table_alias, table_config)| { - Ok(( - table_alias.clone(), - TableConfig { - name: table_config.name.to_owned(), - schema: table_config.schema.to_owned(), - comment: table_config.comment.to_owned(), - primary_key: table_config.primary_key.to_owned(), - return_type: match &table_config.return_type { - ReturnType::Definition { .. } => table_alias.to_string().into(), - ReturnType::TableReference { - table_name: target_alias, - } - | ReturnType::QueryReference { - query_name: target_alias, - } => target_alias.to_string().into(), - }, - arguments: table_config - .arguments - .iter() - .map(|(name, r#type)| { - let data_type = - ClickHouseDataType::from_str(r#type).map_err(|_err| { - ParseError::ValidateError(InvalidNodes(vec![InvalidNode { - file_path: file_path.to_owned(), - node_path: vec![ - KeyOrIndex::Key("tables".to_string()), - KeyOrIndex::Key(table_alias.to_string()), - KeyOrIndex::Key("arguments".to_string()), - KeyOrIndex::Key(name.to_string()), - ], - message: "Unable to parse data type".to_string(), - }])) - })?; - - Ok((name.to_owned(), data_type)) - }) - .collect::>()?, - }, - )) - }) - .collect::, ParseError>>()?; - - let mut queries = BTreeMap::new(); - - for (query_alias, query_config) in config.queries.clone() { - let query_file_path = configuration_dir.as_ref().join(&query_config.file); - let file_content = fs::read_to_string(&query_file_path).await.map_err(|err| { - if let std::io::ErrorKind::NotFound = err.kind() { - ParseError::CouldNotFindConfiguration(query_file_path.to_owned()) - } else { - ParseError::IoError(err) - } - })?; - - let query = ParameterizedQuery::from_str(&file_content).map_err(|err| { - ParseError::ValidateError(InvalidNodes(vec![InvalidNode { - file_path: query_file_path.clone(), - node_path: vec![ - KeyOrIndex::Key("queries".to_string()), - KeyOrIndex::Key(query_alias.to_string()), - ], - message: format!("Unable to parse parameterized query: {}", err), - }])) - })?; - - let query_definition = ParameterizedQueryConfig { - exposed_as: query_config.exposed_as.to_owned(), - comment: query_config.comment.to_owned(), - query, - return_type: match query_config.return_type { - ReturnType::Definition { .. } => query_alias.to_string().into(), - ReturnType::TableReference { - table_name: target_alias, - } - | ReturnType::QueryReference { - query_name: target_alias, - } => target_alias.to_string().into(), - }, - }; - - queries.insert(query_alias.to_owned(), query_definition); - } - - let config = ServerConfig { - connection, - // hardcoding separator for now, to avoid prematurely exposing configuration options we may not want to keep - // if we make this configurable, we must default to this separator when the option is not provided - namespace_separator: ".".to_string(), - table_types, - tables, - queries, - }; - - Ok(config) - } - fn get_connection_config(&self, file_path: &PathBuf) -> Result { - let url = self - .url - .to_owned() - .ok_or(ParseError::ValidateError(InvalidNodes(vec![InvalidNode { - file_path: file_path.to_owned(), - node_path: vec![], - message: "CLICKHOUSE_URL env var must be set".into(), - }])))?; - let username = self - .username - .to_owned() - .ok_or(ParseError::ValidateError(InvalidNodes(vec![InvalidNode { - file_path: file_path.to_owned(), - node_path: vec![], - message: "CLICKHOUSE_USERNAME env var must be set".into(), - }])))?; - let password = self - .password - .to_owned() - .ok_or(ParseError::ValidateError(InvalidNodes(vec![InvalidNode { - file_path: file_path.to_owned(), - node_path: vec![], - message: "CLICKHOUSE_PASSWORD env var must be set".into(), - }])))?; - - Ok(ConnectionConfig { - url, - username, - password, - }) - } - fn validate_and_parse_return_type( - &self, - return_type: &ReturnType, - config: &ServerConfigFile, - file_path: &Path, - node_path: &[&str], - ) -> Result>, ParseError> { - let get_node_path = |extra_segments: &[&str]| { - node_path - .iter() - .chain(extra_segments.iter()) - .map(|s| KeyOrIndex::Key(s.to_string())) - .collect() - }; - match return_type { - ReturnType::TableReference { table_name } => { - match config.tables.get(table_name) { - Some(TableConfigFile { - return_type: ReturnType::Definition { .. }, - .. - }) => Ok(None), - Some(_) => { - Err(ParseError::ValidateError(InvalidNodes(vec![ - InvalidNode { - file_path: file_path.to_path_buf(), - node_path: get_node_path(&["table_name"]), - message: format!( - "Invalid reference: referenced table {} which does not have a return type definition", - table_name, - ), - }, - ]))) - } - None => { - Err(ParseError::ValidateError(InvalidNodes(vec![ - InvalidNode { - file_path: file_path.to_path_buf(), - node_path: get_node_path(&["table_name"]), - message: format!( - "Orphan reference: cannot find referenced table {}", - table_name, - ), - }, - ]))) - } - } - } - ReturnType::QueryReference { query_name } => { - match config.queries.get(query_name) { - Some(ParameterizedQueryConfigFile { - return_type: ReturnType::Definition { .. }, - .. - }) => Ok(None), - Some(_) => { - Err(ParseError::ValidateError(InvalidNodes(vec![ - InvalidNode { - file_path: file_path.to_path_buf(), - node_path: get_node_path(&["query_name"]), - message: format!( - "Invalid reference: referenced query {} which does not have a return type definition", - query_name, - ), - }, - ]))) - } - None => { - Err(ParseError::ValidateError(InvalidNodes(vec![ - InvalidNode { - file_path: file_path.to_path_buf(), - node_path: get_node_path(&["query_name"]), - message: format!( - "Orphan reference: cannot find referenced query {}", - query_name, - ), - }, - ]))) - } - } - } - ReturnType::Definition { columns } => Ok(Some( - columns - .iter() - .map(|(field_alias, field_type)| { - let data_type = - ClickHouseDataType::from_str(field_type).map_err(|err| { - ParseError::ValidateError(InvalidNodes(vec![InvalidNode { - file_path: file_path.to_path_buf(), - node_path: get_node_path(&["columns", field_alias.inner()]), - message: format!( - "Unable to parse data type \"{}\": {}", - field_type, err - ), - }])) - })?; - Ok((field_alias.to_owned(), data_type)) - }) - .collect::, ParseError>>()?, - )), - } + Self(ConfigurationEnvironment::from_simulated_environment(env)) } } From 85ab15dd5bd94888cfc760ad3e9ced77670d1e5e Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 1 Oct 2024 13:33:00 -0400 Subject: [PATCH 03/11] implement printSchemaAndCapabilities --- Cargo.lock | 10 +++-- crates/common/Cargo.toml | 2 + crates/ndc-clickhouse-cli/src/main.rs | 43 +++++++++++++++++-- crates/ndc-clickhouse/src/sql/ast.rs | 7 ++- .../src/sql/query_builder/parameter.rs | 6 ++- crates/ndc-clickhouse/tests/query_builder.rs | 19 +++++--- 6 files changed, 69 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 13eae04..b961055 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -372,6 +372,8 @@ dependencies = [ "serde", "serde_json", "strum", + "thiserror", + "tokio", "tracing", ] @@ -2350,18 +2352,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.63" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0342370b38b6a11b6cc11d6a805569958d54cfa061a29969c3b5ce2ea405724" +checksum = "d50af8abc119fb8bb6dbabcfa89656f46f84aa0ac7688088608076ad2b459a84" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.63" +version = "1.0.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4558b58466b9ad7ca0f102865eccc95938dca1a74a856f2b57b6629050da261" +checksum = "08904e7672f5eb876eaaf87e0ce17857500934f4981c4a0ab2b4aa98baac7fc3" dependencies = [ "proc-macro2", "quote", diff --git a/crates/common/Cargo.toml b/crates/common/Cargo.toml index 1be8e3b..4a85548 100644 --- a/crates/common/Cargo.toml +++ b/crates/common/Cargo.toml @@ -16,6 +16,8 @@ schemars = "0.8.16" serde = { version = "1.0.197", features = ["derive"] } serde_json = "1.0.114" strum = { version = "0.26.3", features = ["derive"] } +thiserror = "1.0.64" +tokio = "1.36.0" tracing = "0.1.40" [dev-dependencies] diff --git a/crates/ndc-clickhouse-cli/src/main.rs b/crates/ndc-clickhouse-cli/src/main.rs index f8c3ff5..09694df 100644 --- a/crates/ndc-clickhouse-cli/src/main.rs +++ b/crates/ndc-clickhouse-cli/src/main.rs @@ -1,5 +1,5 @@ use std::{ - collections::BTreeMap, + collections::{BTreeMap, HashMap}, env, error::Error, path::{Path, PathBuf}, @@ -8,19 +8,22 @@ use std::{ use clap::{Parser, Subcommand, ValueEnum}; use common::{ + capabilities::capabilities_response, clickhouse_parser::{ datatype::ClickHouseDataType, parameterized_query::{Parameter, ParameterizedQuery, ParameterizedQueryElement}, }, - config::ConnectionConfig, + config::{read_server_config, ConfigurationEnvironment, ConnectionConfig}, config_file::{ ParameterizedQueryConfigFile, PrimaryKey, ReturnType, ServerConfigFile, TableConfigFile, CONFIG_FILE_NAME, CONFIG_SCHEMA_FILE_NAME, }, + schema::schema_response, }; use database_introspection::{introspect_database, TableInfo}; -use ndc_models::{CollectionName, FieldName}; +use ndc_models::{CapabilitiesResponse, CollectionName, FieldName, SchemaResponse}; use schemars::schema_for; +use serde::Serialize; use tokio::fs; mod database_introspection; @@ -82,6 +85,8 @@ enum Command { }, Validate {}, Watch {}, + PrintSchemaAndCapabilities {}, + UpgradeConfiguration {}, } #[derive(Clone, ValueEnum)] @@ -95,6 +100,12 @@ enum LogLevel { Trace, } +#[derive(Serialize)] +struct SchemaAndCapabilities { + schema: SchemaResponse, + capabilities: CapabilitiesResponse, +} + #[tokio::main] async fn main() -> Result<(), Box> { let args = CliArgs::parse(); @@ -144,6 +155,32 @@ async fn main() -> Result<(), Box> { Command::Watch {} => { todo!("implement watch command") } + Command::PrintSchemaAndCapabilities {} => { + // set mock values for required env vars, we won't be reading these anyways + let env = HashMap::from_iter(vec![ + ("CLICKHOUSE_URL".to_owned(), "".to_owned()), + ("CLICKHOUSE_USERNAME".to_owned(), "".to_owned()), + ("CLICKHOUSE_PASSWORD".to_owned(), "".to_owned()), + ]); + let configuration = read_server_config( + context_path, + &ConfigurationEnvironment::from_simulated_environment(env), + ) + .await?; + + let schema_and_capabilities = SchemaAndCapabilities { + schema: schema_response(&configuration), + capabilities: capabilities_response(), + }; + println!( + "{}", + serde_json::to_string(&schema_and_capabilities) + .expect("Schema and capabilities should serialize to JSON") + ) + } + Command::UpgradeConfiguration {} => { + println!("Upgrade Configuration command is currently a NOOP") + } } Ok(()) diff --git a/crates/ndc-clickhouse/src/sql/ast.rs b/crates/ndc-clickhouse/src/sql/ast.rs index a287f12..327a342 100644 --- a/crates/ndc-clickhouse/src/sql/ast.rs +++ b/crates/ndc-clickhouse/src/sql/ast.rs @@ -1,8 +1,11 @@ use std::fmt; pub mod format; use super::QueryBuilderError; -use common::clickhouse_parser::{datatype::ClickHouseDataType, parameterized_query::ParameterType}; -use format::{display_comma_separated, display_period_separated, display_separated, escape_string}; +use common::{ + clickhouse_parser::{datatype::ClickHouseDataType, parameterized_query::ParameterType}, + format::{display_comma_separated, display_period_separated, display_separated}, +}; +use format::escape_string; use indexmap::IndexMap; #[derive(Debug, Clone)] diff --git a/crates/ndc-clickhouse/src/sql/query_builder/parameter.rs b/crates/ndc-clickhouse/src/sql/query_builder/parameter.rs index f5d84a8..1315acf 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder/parameter.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder/parameter.rs @@ -1,6 +1,8 @@ use super::{format::escape_string, Expr, Parameter, QueryBuilderError, Value}; -use crate::sql::ast::format::display_separated; -use common::clickhouse_parser::{datatype::ClickHouseDataType, parameterized_query::ParameterType}; +use common::{ + clickhouse_parser::{datatype::ClickHouseDataType, parameterized_query::ParameterType}, + format::display_separated, +}; use std::fmt::Display; pub struct ParameterBuilder { diff --git a/crates/ndc-clickhouse/tests/query_builder.rs b/crates/ndc-clickhouse/tests/query_builder.rs index ec746fe..19d58c5 100644 --- a/crates/ndc-clickhouse/tests/query_builder.rs +++ b/crates/ndc-clickhouse/tests/query_builder.rs @@ -1,6 +1,10 @@ -use common::{config::ServerConfig, config_file::ServerConfigFile, schema::schema_response}; +use common::{ + config::{read_server_config, ConfigurationEnvironment, ServerConfig}, + config_file::ServerConfigFile, + schema::schema_response, +}; use insta::{assert_snapshot, assert_yaml_snapshot, glob}; -use ndc_clickhouse::{connector::setup::ClickhouseConnectorSetup, sql::QueryBuilder}; +use ndc_clickhouse::sql::QueryBuilder; use ndc_sdk::models; use schemars::schema_for; use std::{collections::HashMap, error::Error, fs, path::PathBuf}; @@ -18,12 +22,13 @@ async fn read_mock_configuration(schema_dir: &str) -> ServerConfig { ("CLICKHOUSE_USERNAME".to_owned(), "".to_owned()), ("CLICKHOUSE_PASSWORD".to_owned(), "".to_owned()), ]); - let setup = ClickhouseConnectorSetup::new_from_env(env); let config_dir = base_path().join(schema_dir).join("_config"); - setup - .read_server_config(config_dir) - .await - .expect("Should be able to read configuration") + read_server_config( + config_dir, + &ConfigurationEnvironment::from_simulated_environment(env), + ) + .await + .expect("Should be able to read configuration") } fn pretty_print_sql(query: &str) -> String { From 0753829e14107dcfd791a6a494a5ebaa696ca462 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 1 Oct 2024 13:33:40 -0400 Subject: [PATCH 04/11] add printSchemaAndCapabilities to connector metadata --- ci/templates/connector-metadata.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/templates/connector-metadata.yaml b/ci/templates/connector-metadata.yaml index b895c28..d49daa5 100644 --- a/ci/templates/connector-metadata.yaml +++ b/ci/templates/connector-metadata.yaml @@ -16,6 +16,7 @@ supportedEnvironmentVariables: required: true commands: update: hasura-clickhouse update + printSchemaAndCapabilities: hasura-clickhouse print-schema-and-capabilities cliPlugin: name: clickhouse version: "${CLI_VERSION}" From 05f75beb94adcef22dcb004a554a85afb865b4d5 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Tue, 1 Oct 2024 13:51:33 -0400 Subject: [PATCH 05/11] use thiserror for connector errors --- Cargo.lock | 1 + crates/ndc-clickhouse/Cargo.toml | 1 + .../src/sql/query_builder/error.rs | 95 +++++-------------- .../src/sql/query_builder/typecasting.rs | 50 +++------- 4 files changed, 37 insertions(+), 110 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b961055..85cddc0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1160,6 +1160,7 @@ dependencies = [ "serde_json", "sqlformat", "strum", + "thiserror", "tokio", "tracing", ] diff --git a/crates/ndc-clickhouse/Cargo.toml b/crates/ndc-clickhouse/Cargo.toml index 03b848d..000a526 100644 --- a/crates/ndc-clickhouse/Cargo.toml +++ b/crates/ndc-clickhouse/Cargo.toml @@ -20,6 +20,7 @@ serde = { version = "1.0.197", features = ["derive"] } serde_json = "1.0.114" sqlformat = "0.2.3" strum = { version = "0.26.3", features = ["derive"] } +thiserror = "1.0.64" tokio = "1.36.0" tracing = "0.1.40" diff --git a/crates/ndc-clickhouse/src/sql/query_builder/error.rs b/crates/ndc-clickhouse/src/sql/query_builder/error.rs index 78783f8..e825d77 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder/error.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder/error.rs @@ -8,73 +8,92 @@ use ndc_sdk::{ ObjectTypeName, RelationshipName, }, }; -use std::fmt; -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, thiserror::Error)] pub enum QueryBuilderError { /// A relationship referenced in the query is missing from the collection_relationships map + #[error("Missing relationship: {0}")] MissingRelationship(RelationshipName), /// An argument required for a native query was not supplied + #[error("Argument {argument} required for native query {query} was not supplied")] MissingNativeQueryArgument { query: CollectionName, argument: ArgumentName, }, /// A table was referenced but not found in configuration + #[error("Unable to find table {0} in config")] UnknownTable(CollectionName), /// An argument was supplied for a table that does not have that argument + #[error("Unknown argument {argument} supplied for table {table}")] UnknownTableArgument { table: CollectionName, argument: ArgumentName, }, - /// An argument was supplied for a table that does not have that argument + /// An argument was supplied for a query that does not have that argument + #[error("Unknown argument {argument} supplied for query {query}")] UnknownQueryArgument { query: CollectionName, argument: ArgumentName, }, /// A table in configuration referenced a table type that could not be found + #[error("Unable to find table type {0} in config")] UnknownTableType(ObjectTypeName), /// A column was referenced but not found in configuration + #[error("Unable to find column {0} for table {1} in config")] UnknownColumn(FieldName, ObjectTypeName), /// A field was referenced but not found in configuration + #[error("Unknown field {field_name} in type {data_type}")] UnknownSubField { field_name: FieldName, data_type: ClickHouseDataType, }, /// Unable to serialize variables into a json string + #[error("Unable to serialize variables into a json string: {0}")] CannotSerializeVariables(String), /// An unknown single column aggregate function was referenced + #[error("Unknown single column aggregate function: {0}")] UnknownSingleColumnAggregateFunction(AggregateFunctionName), /// An unknown binary comparison operator was referenced + #[error("Unknown binary comparison operator: {0}")] UnknownBinaryComparisonOperator(ComparisonOperatorName), /// A feature is not supported + #[error("Not supported: {0}")] NotSupported(String), /// An error that should never happen, and indicates a bug if triggered + #[error("Unexpected: {0}")] Unexpected(String), /// There was an issue creating typecasting strings + #[error("Typecasting: {0}")] Typecasting(TypeStringError), /// Column type did not match type asserted by request + #[error("Column Type Mismatch: expected {expected}, got {got}")] ColumnTypeMismatch { expected: String, got: String }, /// Attempted to cast a JSON value to a mismatching data type + #[error("Cannot cast value `{value}` to type `{data_type}`")] UnsupportedParameterCast { value: serde_json::Value, data_type: ParameterType, }, /// Attempted to cast a value to a tuple with a mismatching length + #[error("Tuple `{data_type}` length does not match value {value}")] TupleLengthMismatch { value: serde_json::Value, data_type: ParameterType, }, /// Attempted to cast an Array to a named tuple, which should be represented as an object + #[error("Expected anonymous tuple for value `{value}`, got `{data_type}`")] ExpectedAnonymousTuple { value: serde_json::Value, data_type: ParameterType, }, /// Attempted to cast an Object to an anonymous tuple, which should be represented as an array + #[error("Expected named tuple for value `{value}`, got `{data_type}`")] ExpectedNamedTuple { value: serde_json::Value, data_type: ParameterType, }, /// could not find field required by named tuple or nested in the source json object + #[error("Missing field `{field}` for `{data_type}` in `{value}`")] MissingNamedField { value: serde_json::Value, data_type: ParameterType, @@ -82,76 +101,6 @@ pub enum QueryBuilderError { }, } -impl fmt::Display for QueryBuilderError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - QueryBuilderError::MissingRelationship(rel) => write!(f, "Missing relationship: {rel}"), - QueryBuilderError::MissingNativeQueryArgument { query, argument } => write!( - f, - "Argument {argument} required for native query {query} was not supplied" - ), - QueryBuilderError::UnknownTable(t) => write!(f, "Unable to find table {t} in config"), - QueryBuilderError::UnknownTableArgument { table, argument } => { - write!(f, "Unknown argument {argument} supplied for table {table}") - } - QueryBuilderError::UnknownQueryArgument { query, argument } => { - write!(f, "Unknown argument {argument} supplied for query {query}") - } - - QueryBuilderError::UnknownTableType(t) => { - write!(f, "Unable to find table type {t} in config") - } - QueryBuilderError::UnknownColumn(c, t) => { - write!(f, "Unable to find column {c} for table {t} in config") - } - QueryBuilderError::UnknownSubField { - field_name, - data_type, - } => { - write!(f, "Unknown field {field_name} in type {data_type}") - } - QueryBuilderError::CannotSerializeVariables(e) => { - write!(f, "Unable to serialize variables into a json string: {e}") - } - QueryBuilderError::UnknownSingleColumnAggregateFunction(agg) => { - write!(f, "Unknown single column aggregate function: {agg}") - } - QueryBuilderError::UnknownBinaryComparisonOperator(op) => { - write!(f, "Unknown binary comparison operator: {op}") - } - QueryBuilderError::NotSupported(e) => write!(f, "Not supported: {e}"), - QueryBuilderError::Unexpected(e) => write!(f, "Unexpected: {e}"), - QueryBuilderError::Typecasting(e) => write!(f, "Typecasting: {e}"), - QueryBuilderError::ColumnTypeMismatch { expected, got } => { - write!(f, "Column Type Mismatch: expected {expected}, got {got}") - } - QueryBuilderError::UnsupportedParameterCast { value, data_type } => { - write!(f, "Cannot cast value `{}` to type `{}`", value, data_type) - } - QueryBuilderError::TupleLengthMismatch { value, data_type } => { - write!(f, "Tuple `{data_type}` length does not match value {value}") - } - QueryBuilderError::ExpectedAnonymousTuple { value, data_type } => { - write!( - f, - "Expected anonymous tuple for value `{value}`, got `{data_type}`" - ) - } - QueryBuilderError::ExpectedNamedTuple { value, data_type } => write!( - f, - "Expected named tuple for value `{value}`, got `{data_type}`" - ), - QueryBuilderError::MissingNamedField { - value, - data_type, - field, - } => write!(f, "Missing field `{field}` for `{data_type}` in `{value}`"), - } - } -} - -impl std::error::Error for QueryBuilderError {} - impl From for ErrorResponse { fn from(value: QueryBuilderError) -> Self { match value { diff --git a/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs b/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs index cb0c85b..87c5922 100644 --- a/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs +++ b/crates/ndc-clickhouse/src/sql/query_builder/typecasting.rs @@ -1,5 +1,3 @@ -use std::{collections::BTreeMap, fmt::Display, str::FromStr}; - use common::{ clickhouse_parser::datatype::{ClickHouseDataType, Identifier}, config::ServerConfig, @@ -13,6 +11,7 @@ use ndc_sdk::models::{ self, AggregateFunctionName, CollectionName, FieldName, NestedField, ObjectTypeName, RelationshipName, }; +use std::{collections::BTreeMap, str::FromStr}; use super::QueryBuilderError; @@ -435,60 +434,37 @@ fn get_return_type<'a>( }) } -#[derive(Debug, PartialEq)] +#[derive(Debug, PartialEq, thiserror::Error)] pub enum TypeStringError { - UnknownTable { - table: CollectionName, - }, - UnknownTableType { - table: ObjectTypeName, - }, + #[error("Unknown table: {table}")] + UnknownTable { table: CollectionName }, + #[error("Unknown table type: {table}")] + UnknownTableType { table: ObjectTypeName }, + #[error("Unknown column: {column} in table: {table}")] UnknownColumn { table: ObjectTypeName, column: FieldName, }, + #[error("Unknown aggregate function: {function} for column {column} of type: {data_type} in table {table}")] UnknownAggregateFunction { table: CollectionName, column: FieldName, data_type: ClickHouseDataType, function: AggregateFunctionName, }, + #[error("Missing relationship: {0}")] MissingRelationship(RelationshipName), + #[error("Not supported: {0}")] NotSupported(String), - NestedFieldTypeMismatch { - expected: String, - got: String, - }, + #[error("Nested field selector type mismatch, expected: {expected}, got {got}")] + NestedFieldTypeMismatch { expected: String, got: String }, + #[error("Missing field {field_name} in object type {object_type}")] MissingNestedField { field_name: FieldName, object_type: ObjectTypeName, }, } -impl Display for TypeStringError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - TypeStringError::UnknownTable { table } => write!(f, "Unknown table: {table}"), - TypeStringError::UnknownTableType { table } => write!(f, "Unknown table type: {table}"), - TypeStringError::UnknownColumn { table, column } => { - write!(f, "Unknown column: {column} in table: {table}") - } - TypeStringError::UnknownAggregateFunction { - table, - column, - data_type, - function, - } => write!(f, "Unknown aggregate function: {function} for column {column} of type: {data_type} in table {table}"), - TypeStringError::MissingRelationship(rel) => write!(f, "Missing relationship: {rel}"), - TypeStringError::NotSupported(feature) => write!(f, "Not supported: {feature}"), - TypeStringError::NestedFieldTypeMismatch { expected, got } => write!(f, "Nested field selector type mismatch, expected: {expected}, got {got}"), - TypeStringError::MissingNestedField { field_name, object_type } => write!(f, "Missing field {field_name} in object type {object_type}"), - } - } -} - -impl std::error::Error for TypeStringError {} - impl From for QueryBuilderError { fn from(value: TypeStringError) -> Self { QueryBuilderError::Typecasting(value) From fe88a62600d8296245764252b53d929782003e83 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Wed, 2 Oct 2024 17:50:16 -0400 Subject: [PATCH 06/11] bugfix: include scalar dependencies in schema response --- crates/common/src/schema/type_definition.rs | 38 ++++- crates/ndc-clickhouse-cli/src/testing.http | 56 +++++++ ...uery_builder__chinook Schema Response.snap | 73 +++++++++ ...lder__complex_columns Schema Response.snap | 146 ++++++++++++++++++ ..._builder__star_schema Schema Response.snap | 73 +++++++++ 5 files changed, 381 insertions(+), 5 deletions(-) create mode 100644 crates/ndc-clickhouse-cli/src/testing.http diff --git a/crates/common/src/schema/type_definition.rs b/crates/common/src/schema/type_definition.rs index b5a2f87..13835ba 100644 --- a/crates/common/src/schema/type_definition.rs +++ b/crates/common/src/schema/type_definition.rs @@ -34,7 +34,7 @@ impl<'a> NameSpace<'a> { } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq)] pub struct ClickHouseScalar(ClickHouseDataType); impl ClickHouseScalar { @@ -596,10 +596,38 @@ impl ClickHouseTypeDefinition { /// note that ScalarType definitions may be duplicated pub fn type_definitions(&self) -> SchemaTypeDefinitions { match self { - ClickHouseTypeDefinition::Scalar(scalar) => SchemaTypeDefinitions { - scalars: vec![(scalar.type_name(), scalar.type_definition())], - objects: vec![], - }, + ClickHouseTypeDefinition::Scalar(scalar) => { + // add the definition for this scalar, and any dependencies + fn get_dependencies( + aggregate_functions: Vec<( + ClickHouseSingleColumnAggregateFunction, + ClickHouseDataType, + )>, + scalars: &mut IndexMap, + ) { + for (_, return_type) in aggregate_functions { + let return_type = ClickHouseScalar(return_type); + if !scalars.contains_key(&return_type.type_name()) { + scalars.insert(return_type.type_name(), return_type.type_definition()); + get_dependencies(return_type.aggregate_functions(), scalars); + } + } + } + + let mut scalars = IndexMap::new(); + + scalars.insert(scalar.type_name(), scalar.type_definition()); + + get_dependencies(scalar.aggregate_functions(), &mut scalars); + + let scalars = scalars.into_iter().collect(); + + SchemaTypeDefinitions { + scalars, + objects: vec![], + } + } + ClickHouseTypeDefinition::Nullable { inner } => inner.type_definitions(), ClickHouseTypeDefinition::Array { element_type } => element_type.type_definitions(), ClickHouseTypeDefinition::Object { diff --git a/crates/ndc-clickhouse-cli/src/testing.http b/crates/ndc-clickhouse-cli/src/testing.http new file mode 100644 index 0000000..c9300f0 --- /dev/null +++ b/crates/ndc-clickhouse-cli/src/testing.http @@ -0,0 +1,56 @@ +POST http://localhost:8123 +X-ClickHouse-User: default +X-ClickHouse-Key: default + +SELECT t.table_name AS "table_name", + t.table_schema AS "table_schema", + t.table_catalog AS "table_catalog", + t.table_comment AS "table_comment", + if(empty(st.primary_key), null, st.primary_key) AS "primary_key", + toString(t.table_type) AS "table_type", + v.view_definition AS "view_definition", + cast( + c.columns, + 'Array(Tuple(column_name String, data_type String, is_nullable Bool, is_in_primary_key Bool))' + ) AS "columns" +FROM INFORMATION_SCHEMA.TABLES AS t + LEFT JOIN INFORMATION_SCHEMA.VIEWS AS v ON v.table_schema = t.table_schema + AND v.table_name = t.table_name + LEFT JOIN system.tables AS st ON st.database = t.table_schema + AND st.name = t.table_name + LEFT JOIN ( + SELECT c.table_catalog, + c.table_schema, + c.table_name, + groupArray( + tuple( + c.column_name, + c.data_type, + toBool(c.is_nullable), + toBool(sc.is_in_primary_key) + ) + ) AS "columns" + FROM INFORMATION_SCHEMA.COLUMNS AS c + LEFT JOIN system.columns AS sc ON sc.database = c.table_schema + AND sc.table = c.table_name + AND sc.name = c.column_name + GROUP BY c.table_catalog, + c.table_schema, + c.table_name + ) AS c ON t.table_catalog = c.table_catalog + AND t.table_schema = c.table_schema + AND t.table_name = c.table_name +WHERE t.table_catalog NOT IN ( + 'system', + 'INFORMATION_SCHEMA', + 'information_schema' + ) FORMAT JSON; + + +### +POST http://localhost:8123 +X-ClickHouse-User: default +X-ClickHouse-Key: default + +CREATE VIEW filtered_artists AS +SELECT * FROM Artist WHERE ArtistId >= {ArtistId: Int32} \ No newline at end of file diff --git a/crates/ndc-clickhouse/tests/snapshots/query_builder__chinook Schema Response.snap b/crates/ndc-clickhouse/tests/snapshots/query_builder__chinook Schema Response.snap index 913c802..747dbac 100644 --- a/crates/ndc-clickhouse/tests/snapshots/query_builder__chinook Schema Response.snap +++ b/crates/ndc-clickhouse/tests/snapshots/query_builder__chinook Schema Response.snap @@ -247,6 +247,79 @@ scalar_types: element_type: type: named name: Int32 + Int64: + representation: + type: int64 + aggregate_functions: + avg: + result_type: + type: named + name: Float64 + max: + result_type: + type: named + name: Int64 + min: + result_type: + type: named + name: Int64 + stddev_pop: + result_type: + type: named + name: Float64 + stddev_samp: + result_type: + type: named + name: Float64 + sum: + result_type: + type: named + name: Int64 + var_pop: + result_type: + type: named + name: Float64 + var_samp: + result_type: + type: named + name: Float64 + comparison_operators: + _eq: + type: equal + _gt: + type: custom + argument_type: + type: named + name: Int64 + _gte: + type: custom + argument_type: + type: named + name: Int64 + _in: + type: in + _lt: + type: custom + argument_type: + type: named + name: Int64 + _lte: + type: custom + argument_type: + type: named + name: Int64 + _neq: + type: custom + argument_type: + type: named + name: Int64 + _nin: + type: custom + argument_type: + type: array + element_type: + type: named + name: Int64 String: representation: type: string diff --git a/crates/ndc-clickhouse/tests/snapshots/query_builder__complex_columns Schema Response.snap b/crates/ndc-clickhouse/tests/snapshots/query_builder__complex_columns Schema Response.snap index 13e0e18..f4293ee 100644 --- a/crates/ndc-clickhouse/tests/snapshots/query_builder__complex_columns Schema Response.snap +++ b/crates/ndc-clickhouse/tests/snapshots/query_builder__complex_columns Schema Response.snap @@ -3,6 +3,79 @@ source: crates/ndc-clickhouse/tests/query_builder.rs expression: schema --- scalar_types: + Float64: + representation: + type: float64 + aggregate_functions: + avg: + result_type: + type: named + name: Float64 + max: + result_type: + type: named + name: Float64 + min: + result_type: + type: named + name: Float64 + stddev_pop: + result_type: + type: named + name: Float64 + stddev_samp: + result_type: + type: named + name: Float64 + sum: + result_type: + type: named + name: Float64 + var_pop: + result_type: + type: named + name: Float64 + var_samp: + result_type: + type: named + name: Float64 + comparison_operators: + _eq: + type: equal + _gt: + type: custom + argument_type: + type: named + name: Float64 + _gte: + type: custom + argument_type: + type: named + name: Float64 + _in: + type: in + _lt: + type: custom + argument_type: + type: named + name: Float64 + _lte: + type: custom + argument_type: + type: named + name: Float64 + _neq: + type: custom + argument_type: + type: named + name: Float64 + _nin: + type: custom + argument_type: + type: array + element_type: + type: named + name: Float64 "Map(String, String)": aggregate_functions: {} comparison_operators: {} @@ -148,6 +221,79 @@ scalar_types: element_type: type: named name: UInt32 + UInt64: + representation: + type: biginteger + aggregate_functions: + avg: + result_type: + type: named + name: Float64 + max: + result_type: + type: named + name: UInt64 + min: + result_type: + type: named + name: UInt64 + stddev_pop: + result_type: + type: named + name: Float64 + stddev_samp: + result_type: + type: named + name: Float64 + sum: + result_type: + type: named + name: UInt64 + var_pop: + result_type: + type: named + name: Float64 + var_samp: + result_type: + type: named + name: Float64 + comparison_operators: + _eq: + type: equal + _gt: + type: custom + argument_type: + type: named + name: UInt64 + _gte: + type: custom + argument_type: + type: named + name: UInt64 + _in: + type: in + _lt: + type: custom + argument_type: + type: named + name: UInt64 + _lte: + type: custom + argument_type: + type: named + name: UInt64 + _neq: + type: custom + argument_type: + type: named + name: UInt64 + _nin: + type: custom + argument_type: + type: array + element_type: + type: named + name: UInt64 object_types: TableOne: fields: diff --git a/crates/ndc-clickhouse/tests/snapshots/query_builder__star_schema Schema Response.snap b/crates/ndc-clickhouse/tests/snapshots/query_builder__star_schema Schema Response.snap index ee05939..9a0f009 100644 --- a/crates/ndc-clickhouse/tests/snapshots/query_builder__star_schema Schema Response.snap +++ b/crates/ndc-clickhouse/tests/snapshots/query_builder__star_schema Schema Response.snap @@ -52,6 +52,79 @@ scalar_types: element_type: type: named name: Date + Float64: + representation: + type: float64 + aggregate_functions: + avg: + result_type: + type: named + name: Float64 + max: + result_type: + type: named + name: Float64 + min: + result_type: + type: named + name: Float64 + stddev_pop: + result_type: + type: named + name: Float64 + stddev_samp: + result_type: + type: named + name: Float64 + sum: + result_type: + type: named + name: Float64 + var_pop: + result_type: + type: named + name: Float64 + var_samp: + result_type: + type: named + name: Float64 + comparison_operators: + _eq: + type: equal + _gt: + type: custom + argument_type: + type: named + name: Float64 + _gte: + type: custom + argument_type: + type: named + name: Float64 + _in: + type: in + _lt: + type: custom + argument_type: + type: named + name: Float64 + _lte: + type: custom + argument_type: + type: named + name: Float64 + _neq: + type: custom + argument_type: + type: named + name: Float64 + _nin: + type: custom + argument_type: + type: array + element_type: + type: named + name: Float64 Int64: representation: type: int64 From 6a6b7390c17abc1a1c54976b296b0baf39218fbf Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Wed, 2 Oct 2024 17:50:22 -0400 Subject: [PATCH 07/11] changelog --- CHANGELOG.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2494ed7..dc5bef8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [1.0.4] + +- Implement PrintSchemaAndCapabilities command +- Bug fix: scalar aggregates may return other scalars, creating dependencies. Fix schema response not including those scalar type dependencies + ## [1.0.3] - Update SDK version to enable unauthorized access to health endpoint From 285c41ff8fc2ee06c64172c6d2b82a20c928debd Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Wed, 2 Oct 2024 17:50:35 -0400 Subject: [PATCH 08/11] bump version --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 85cddc0..1e4cad1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -360,7 +360,7 @@ checksum = "97af0562545a7d7f3d9222fcf909963bec36dcb502afaacab98c6ffac8da47ce" [[package]] name = "common" -version = "1.0.3" +version = "1.0.4" dependencies = [ "bytes", "indexmap 2.5.0", @@ -1144,7 +1144,7 @@ dependencies = [ [[package]] name = "ndc-clickhouse" -version = "1.0.3" +version = "1.0.4" dependencies = [ "async-trait", "bytes", @@ -1167,7 +1167,7 @@ dependencies = [ [[package]] name = "ndc-clickhouse-cli" -version = "1.0.3" +version = "1.0.4" dependencies = [ "clap", "common", diff --git a/Cargo.toml b/Cargo.toml index b2d2d24..4c2a7db 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,7 +6,7 @@ members = [ ] resolver = "2" -package.version = "1.0.3" +package.version = "1.0.4" package.edition = "2021" # insta performs better in release mode From 92534ea2a6729b2723a9db84b8bed35d969039aa Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Thu, 3 Oct 2024 23:45:39 -0400 Subject: [PATCH 09/11] remove testing file that was never meant to be there --- crates/ndc-clickhouse-cli/src/testing.http | 56 ---------------------- 1 file changed, 56 deletions(-) delete mode 100644 crates/ndc-clickhouse-cli/src/testing.http diff --git a/crates/ndc-clickhouse-cli/src/testing.http b/crates/ndc-clickhouse-cli/src/testing.http deleted file mode 100644 index c9300f0..0000000 --- a/crates/ndc-clickhouse-cli/src/testing.http +++ /dev/null @@ -1,56 +0,0 @@ -POST http://localhost:8123 -X-ClickHouse-User: default -X-ClickHouse-Key: default - -SELECT t.table_name AS "table_name", - t.table_schema AS "table_schema", - t.table_catalog AS "table_catalog", - t.table_comment AS "table_comment", - if(empty(st.primary_key), null, st.primary_key) AS "primary_key", - toString(t.table_type) AS "table_type", - v.view_definition AS "view_definition", - cast( - c.columns, - 'Array(Tuple(column_name String, data_type String, is_nullable Bool, is_in_primary_key Bool))' - ) AS "columns" -FROM INFORMATION_SCHEMA.TABLES AS t - LEFT JOIN INFORMATION_SCHEMA.VIEWS AS v ON v.table_schema = t.table_schema - AND v.table_name = t.table_name - LEFT JOIN system.tables AS st ON st.database = t.table_schema - AND st.name = t.table_name - LEFT JOIN ( - SELECT c.table_catalog, - c.table_schema, - c.table_name, - groupArray( - tuple( - c.column_name, - c.data_type, - toBool(c.is_nullable), - toBool(sc.is_in_primary_key) - ) - ) AS "columns" - FROM INFORMATION_SCHEMA.COLUMNS AS c - LEFT JOIN system.columns AS sc ON sc.database = c.table_schema - AND sc.table = c.table_name - AND sc.name = c.column_name - GROUP BY c.table_catalog, - c.table_schema, - c.table_name - ) AS c ON t.table_catalog = c.table_catalog - AND t.table_schema = c.table_schema - AND t.table_name = c.table_name -WHERE t.table_catalog NOT IN ( - 'system', - 'INFORMATION_SCHEMA', - 'information_schema' - ) FORMAT JSON; - - -### -POST http://localhost:8123 -X-ClickHouse-User: default -X-ClickHouse-Key: default - -CREATE VIEW filtered_artists AS -SELECT * FROM Artist WHERE ArtistId >= {ArtistId: Int32} \ No newline at end of file From 3feed04a322cdbed1e5f86bee6ab078b991bd6a4 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Thu, 3 Oct 2024 23:48:04 -0400 Subject: [PATCH 10/11] improve changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc5bef8..9ad6c08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,7 +9,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [1.0.4] -- Implement PrintSchemaAndCapabilities command +- Implement PrintSchemaAndCapabilities command. Enables the ddn CLI to perform introspection without needing to start an instance of the connector, which makes introspection faster. Note the DDN CLI does not yet implement this. - Bug fix: scalar aggregates may return other scalars, creating dependencies. Fix schema response not including those scalar type dependencies ## [1.0.3] From 365bd92fd829891f2dfcaccbb0d264bfda8471e9 Mon Sep 17 00:00:00 2001 From: Benoit Ranque Date: Sat, 5 Oct 2024 10:16:43 -0400 Subject: [PATCH 11/11] add release date to changelog entry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9ad6c08..694c2b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -## [1.0.4] +## [1.0.4] - 2024-10-05 - Implement PrintSchemaAndCapabilities command. Enables the ddn CLI to perform introspection without needing to start an instance of the connector, which makes introspection faster. Note the DDN CLI does not yet implement this. - Bug fix: scalar aggregates may return other scalars, creating dependencies. Fix schema response not including those scalar type dependencies