diff --git a/.github/workflows/lint-and-test.yml b/.github/workflows/lint-and-test.yml index 628a8f59d..d97d4a435 100644 --- a/.github/workflows/lint-and-test.yml +++ b/.github/workflows/lint-and-test.yml @@ -73,6 +73,8 @@ jobs: run: cargo test --all-features - name: Dry run cargo test (proof-of-sql) (test feature only) run: cargo test -p proof-of-sql --no-run --no-default-features --features="test" + - name: Dry run cargo test (proof-of-sql) (arrow feature only) + run: cargo test -p proof-of-sql --no-run --no-default-features --features="arrow" - name: Dry run cargo test (proof-of-sql) (blitzar feature only) run: cargo test -p proof-of-sql --no-run --no-default-features --features="blitzar" - name: Dry run cargo test (proof-of-sql) (no features) diff --git a/crates/proof-of-sql-parser/Cargo.toml b/crates/proof-of-sql-parser/Cargo.toml index 745c2fa02..9ab7e4d12 100644 --- a/crates/proof-of-sql-parser/Cargo.toml +++ b/crates/proof-of-sql-parser/Cargo.toml @@ -15,7 +15,6 @@ doctest = true test = true [dependencies] -arrow = { workspace = true } arrayvec = { workspace = true, features = ["serde"] } bigdecimal = { workspace = true } chrono = { workspace = true, features = ["serde"] } diff --git a/crates/proof-of-sql-parser/src/posql_time/unit.rs b/crates/proof-of-sql-parser/src/posql_time/unit.rs index 4383240a8..28b7739e1 100644 --- a/crates/proof-of-sql-parser/src/posql_time/unit.rs +++ b/crates/proof-of-sql-parser/src/posql_time/unit.rs @@ -1,5 +1,4 @@ use super::PoSQLTimestampError; -use arrow::datatypes::TimeUnit as ArrowTimeUnit; use core::fmt; use serde::{Deserialize, Serialize}; @@ -29,28 +28,6 @@ impl TryFrom<&str> for PoSQLTimeUnit { } } -impl From for ArrowTimeUnit { - fn from(unit: PoSQLTimeUnit) -> Self { - match unit { - PoSQLTimeUnit::Second => ArrowTimeUnit::Second, - PoSQLTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, - PoSQLTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, - PoSQLTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, - } - } -} - -impl From for PoSQLTimeUnit { - fn from(unit: ArrowTimeUnit) -> Self { - match unit { - ArrowTimeUnit::Second => PoSQLTimeUnit::Second, - ArrowTimeUnit::Millisecond => PoSQLTimeUnit::Millisecond, - ArrowTimeUnit::Microsecond => PoSQLTimeUnit::Microsecond, - ArrowTimeUnit::Nanosecond => PoSQLTimeUnit::Nanosecond, - } - } -} - impl fmt::Display for PoSQLTimeUnit { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { diff --git a/crates/proof-of-sql/Cargo.toml b/crates/proof-of-sql/Cargo.toml index e6dc8b8c9..040b9f9d6 100644 --- a/crates/proof-of-sql/Cargo.toml +++ b/crates/proof-of-sql/Cargo.toml @@ -21,7 +21,7 @@ ark-ff = { workspace = true } ark-poly = { workspace = true } ark-serialize = { workspace = true } ark-std = { workspace = true } -arrow = { workspace = true } +arrow = { workspace = true, optional = true } bit-iter = { workspace = true } bigdecimal = { workspace = true } blake3 = { workspace = true } @@ -63,8 +63,12 @@ tracing-opentelemetry = { workspace = true } tracing-subscriber = { workspace = true } flexbuffers = { workspace = true } +[package.metadata.cargo-udeps.ignore] +development = ["arrow-csv"] + [features] -default = ["blitzar"] +default = ["arrow", "blitzar"] +arrow = ["dep:arrow"] test = ["dep:rand"] [lints] @@ -76,7 +80,7 @@ required-features = [ "blitzar", "test" ] [[example]] name = "posql_db" -required-features = [ "blitzar" ] +required-features = [ "arrow", "blitzar" ] [[bench]] name = "criterion_benches" diff --git a/crates/proof-of-sql/examples/posql_db/run_example.sh b/crates/proof-of-sql/examples/posql_db/run_example.sh index ab259fc8e..91f773fe4 100644 --- a/crates/proof-of-sql/examples/posql_db/run_example.sh +++ b/crates/proof-of-sql/examples/posql_db/run_example.sh @@ -1,5 +1,5 @@ cd crates/proof-of-sql/examples/posql_db -cargo run --example posql_db create -t sxt.table -c a,b -d BIGINT,VARCHAR -cargo run --example posql_db append -t sxt.table -f hello_world.csv -cargo run --example posql_db prove -q "SELECT b FROM sxt.table WHERE a = 2" -f hello.proof -cargo run --example posql_db verify -q "SELECT b FROM sxt.table WHERE a = 2" -f hello.proof \ No newline at end of file +cargo run --features="arrow blitzar" --example posql_db create -t sxt.table -c a,b -d BIGINT,VARCHAR +cargo run --features="arrow blitzar" --example posql_db append -t sxt.table -f hello_world.csv +cargo run --features="arrow blitzar" --example posql_db prove -q "SELECT b FROM sxt.table WHERE a = 2" -f hello.proof +cargo run --features="arrow blitzar" --example posql_db verify -q "SELECT b FROM sxt.table WHERE a = 2" -f hello.proof \ No newline at end of file diff --git a/crates/proof-of-sql/src/base/commitment/table_commitment.rs b/crates/proof-of-sql/src/base/commitment/table_commitment.rs index 64e0fe8c7..837a172fd 100644 --- a/crates/proof-of-sql/src/base/commitment/table_commitment.rs +++ b/crates/proof-of-sql/src/base/commitment/table_commitment.rs @@ -2,13 +2,13 @@ use super::{ committable_column::CommittableColumn, AppendColumnCommitmentsError, ColumnCommitments, ColumnCommitmentsMismatch, Commitment, DuplicateIdentifiers, }; +#[cfg(feature = "arrow")] +use crate::base::database::{ArrayRefExt, ArrowArrayToColumnConversionError}; use crate::base::{ - database::{ - ArrayRefExt, ArrowArrayToColumnConversionError, Column, ColumnField, CommitmentAccessor, - OwnedTable, TableRef, - }, + database::{Column, ColumnField, CommitmentAccessor, OwnedTable, TableRef}, scalar::Scalar, }; +#[cfg(feature = "arrow")] use arrow::record_batch::RecordBatch; use bumpalo::Bump; use proof_of_sql_parser::{Identifier, ParseError}; @@ -63,6 +63,7 @@ pub enum TableCommitmentArithmeticError { } /// Errors that can occur when trying to create or extend a [`TableCommitment`] from a record batch. +#[cfg(feature = "arrow")] #[derive(Debug, Error)] pub enum RecordBatchToColumnsError { /// Error converting from arrow array @@ -74,6 +75,7 @@ pub enum RecordBatchToColumnsError { } /// Errors that can occur when attempting to append a record batch to a [`TableCommitment`]. +#[cfg(feature = "arrow")] #[derive(Debug, Error)] pub enum AppendRecordBatchTableCommitmentError { /// During commitment operation, metadata indicates that operand tables cannot be the same. @@ -354,6 +356,7 @@ impl TableCommitment { /// The row offset is assumed to be the end of the [`TableCommitment`]'s current range. /// /// Will error on a variety of mismatches, or if the provided columns have mixed length. + #[cfg(feature = "arrow")] pub fn try_append_record_batch( &mut self, batch: &RecordBatch, @@ -380,6 +383,7 @@ impl TableCommitment { } } /// Returns a [`TableCommitment`] to the provided arrow [`RecordBatch`]. + #[cfg(feature = "arrow")] pub fn try_from_record_batch( batch: &RecordBatch, setup: &C::PublicSetup<'_>, @@ -388,6 +392,7 @@ impl TableCommitment { } /// Returns a [`TableCommitment`] to the provided arrow [`RecordBatch`] with the given row offset. + #[cfg(feature = "arrow")] pub fn try_from_record_batch_with_offset( batch: &RecordBatch, offset: usize, @@ -411,6 +416,7 @@ impl TableCommitment { } } +#[cfg(feature = "arrow")] fn batch_to_columns<'a, S: Scalar + 'a>( batch: &'a RecordBatch, alloc: &'a Bump, @@ -446,7 +452,7 @@ fn num_rows_of_columns<'a>( Ok(num_rows) } -#[cfg(all(test, feature = "blitzar"))] +#[cfg(all(test, feature = "arrow, blitzar"))] mod tests { use super::*; use crate::{ diff --git a/crates/proof-of-sql/src/base/database/column.rs b/crates/proof-of-sql/src/base/database/column.rs index a63781aaa..d8a1ae5d0 100644 --- a/crates/proof-of-sql/src/base/database/column.rs +++ b/crates/proof-of-sql/src/base/database/column.rs @@ -3,6 +3,7 @@ use crate::base::{ math::decimal::{scale_scalar, Precision}, scalar::Scalar, }; +#[cfg(feature = "arrow")] use arrow::datatypes::{DataType, Field, TimeUnit as ArrowTimeUnit}; use bumpalo::Bump; use proof_of_sql_parser::{ @@ -350,6 +351,7 @@ impl ColumnType { } /// Convert ColumnType values to some arrow DataType +#[cfg(feature = "arrow")] impl From<&ColumnType> for DataType { fn from(column_type: &ColumnType) -> Self { match column_type { @@ -363,15 +365,22 @@ impl From<&ColumnType> for DataType { } ColumnType::VarChar => DataType::Utf8, ColumnType::Scalar => unimplemented!("Cannot convert Scalar type to arrow type"), - ColumnType::TimestampTZ(timeunit, timezone) => DataType::Timestamp( - ArrowTimeUnit::from(*timeunit), - Some(Arc::from(timezone.to_string())), - ), + ColumnType::TimestampTZ(timeunit, timezone) => { + let arrow_timezone = Some(Arc::from(timezone.to_string())); + let arrow_timeunit = match timeunit { + PoSQLTimeUnit::Second => ArrowTimeUnit::Second, + PoSQLTimeUnit::Millisecond => ArrowTimeUnit::Millisecond, + PoSQLTimeUnit::Microsecond => ArrowTimeUnit::Microsecond, + PoSQLTimeUnit::Nanosecond => ArrowTimeUnit::Nanosecond, + }; + DataType::Timestamp(arrow_timeunit, arrow_timezone) + } } } } /// Convert arrow DataType values to some ColumnType +#[cfg(feature = "arrow")] impl TryFrom for ColumnType { type Error = String; @@ -385,10 +394,18 @@ impl TryFrom for ColumnType { DataType::Decimal256(precision, scale) if precision <= 75 => { Ok(ColumnType::Decimal75(Precision::new(precision)?, scale)) } - DataType::Timestamp(time_unit, timezone_option) => Ok(ColumnType::TimestampTZ( - PoSQLTimeUnit::from(time_unit), - PoSQLTimeZone::try_from(&timezone_option)?, - )), + DataType::Timestamp(time_unit, timezone_option) => { + let posql_time_unit = match time_unit { + ArrowTimeUnit::Second => PoSQLTimeUnit::Second, + ArrowTimeUnit::Millisecond => PoSQLTimeUnit::Millisecond, + ArrowTimeUnit::Microsecond => PoSQLTimeUnit::Microsecond, + ArrowTimeUnit::Nanosecond => PoSQLTimeUnit::Nanosecond, + }; + Ok(ColumnType::TimestampTZ( + posql_time_unit, + PoSQLTimeZone::try_from(&timezone_option)?, + )) + } DataType::Utf8 => Ok(ColumnType::VarChar), _ => Err(format!("Unsupported arrow data type {:?}", data_type)), } @@ -482,6 +499,7 @@ impl ColumnField { } /// Convert ColumnField values to arrow Field +#[cfg(feature = "arrow")] impl From<&ColumnField> for Field { fn from(column_field: &ColumnField) -> Self { Field::new( diff --git a/crates/proof-of-sql/src/base/database/mod.rs b/crates/proof-of-sql/src/base/database/mod.rs index 6db25c2b4..d84c53be1 100644 --- a/crates/proof-of-sql/src/base/database/mod.rs +++ b/crates/proof-of-sql/src/base/database/mod.rs @@ -21,15 +21,19 @@ pub use literal_value::LiteralValue; mod table_ref; pub use table_ref::TableRef; +#[cfg(feature = "arrow")] mod arrow_array_to_column_conversion; +#[cfg(feature = "arrow")] pub use arrow_array_to_column_conversion::{ArrayRefExt, ArrowArrayToColumnConversionError}; +#[cfg(feature = "arrow")] mod record_batch_utility; +#[cfg(feature = "arrow")] pub use record_batch_utility::ToArrow; -#[cfg(any(test, feature = "test"))] +#[cfg(all(test, feature = "arrow, test"))] mod test_accessor_utility; -#[cfg(any(test, feature = "test"))] +#[cfg(all(test, feature = "arrow, test"))] pub use test_accessor_utility::{make_random_test_accessor_data, RandomTestAccessorDescriptor}; mod owned_column; @@ -54,9 +58,11 @@ mod expression_evaluation_error; mod expression_evaluation_test; pub use expression_evaluation_error::{ExpressionEvaluationError, ExpressionEvaluationResult}; +#[cfg(feature = "arrow")] mod owned_and_arrow_conversions; +#[cfg(feature = "arrow")] pub use owned_and_arrow_conversions::OwnedArrowConversionError; -#[cfg(test)] +#[cfg(all(test, feature = "arrow"))] mod owned_and_arrow_conversions_test; #[cfg(any(test, feature = "test"))] @@ -78,6 +84,7 @@ pub use owned_table_test_accessor::OwnedTableTestAccessor; #[cfg(all(test, feature = "blitzar"))] mod owned_table_test_accessor_test; /// Contains traits for scalar <-> i256 conversions +#[cfg(feature = "arrow")] pub mod scalar_and_i256_conversions; pub(crate) mod filter_util; diff --git a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs index f9e78a83e..efc76b122 100644 --- a/crates/proof-of-sql/src/base/database/test_accessor_utility.rs +++ b/crates/proof-of-sql/src/base/database/test_accessor_utility.rs @@ -39,6 +39,7 @@ impl Default for RandomTestAccessorDescriptor { } /// Generate a DataFrame with random data +#[allow(dead_code)] pub fn make_random_test_accessor_data( rng: &mut StdRng, cols: &[(&str, ColumnType)], diff --git a/crates/proof-of-sql/src/sql/ast/dense_filter_expr_test.rs b/crates/proof-of-sql/src/sql/ast/dense_filter_expr_test.rs index 1a21582a1..b917f022e 100644 --- a/crates/proof-of-sql/src/sql/ast/dense_filter_expr_test.rs +++ b/crates/proof-of-sql/src/sql/ast/dense_filter_expr_test.rs @@ -16,13 +16,11 @@ use crate::{ }, }, }; -use arrow::datatypes::{Field, Schema}; use blitzar::proof::InnerProductProof; use bumpalo::Bump; use curve25519_dalek::RistrettoPoint; use indexmap::{IndexMap, IndexSet}; use proof_of_sql_parser::{Identifier, ResourceId}; -use std::sync::Arc; #[test] fn we_can_correctly_fetch_the_query_result_schema() { @@ -60,19 +58,13 @@ fn we_can_correctly_fetch_the_query_result_schema() { .unwrap(), ); - let column_fields: Vec = provable_ast - .get_column_result_fields() - .iter() - .map(|v| v.into()) - .collect(); - let schema = Arc::new(Schema::new(column_fields)); - + let column_fields: Vec = provable_ast.get_column_result_fields(); assert_eq!( - schema, - Arc::new(Schema::new(vec![ - Field::new("a", (&ColumnType::BigInt).into(), false,), - Field::new("b", (&ColumnType::BigInt).into(), false,) - ])) + column_fields, + vec![ + ColumnField::new("a".parse().unwrap(), ColumnType::BigInt), + ColumnField::new("b".parse().unwrap(), ColumnType::BigInt) + ] ); } diff --git a/crates/proof-of-sql/src/sql/ast/filter_expr_test.rs b/crates/proof-of-sql/src/sql/ast/filter_expr_test.rs index cf58c4161..9004b8cc5 100644 --- a/crates/proof-of-sql/src/sql/ast/filter_expr_test.rs +++ b/crates/proof-of-sql/src/sql/ast/filter_expr_test.rs @@ -18,13 +18,11 @@ use crate::{ }, }, }; -use arrow::datatypes::{Field, Schema}; use blitzar::proof::InnerProductProof; use bumpalo::Bump; use curve25519_dalek::RistrettoPoint; use indexmap::{IndexMap, IndexSet}; use proof_of_sql_parser::{Identifier, ResourceId}; -use std::sync::Arc; #[test] fn we_can_correctly_fetch_the_query_result_schema() { @@ -53,20 +51,13 @@ fn we_can_correctly_fetch_the_query_result_schema() { ) .unwrap(), ); - - let column_fields: Vec = provable_ast - .get_column_result_fields() - .iter() - .map(|v| v.into()) - .collect(); - let schema = Arc::new(Schema::new(column_fields)); - + let column_fields: Vec = provable_ast.get_column_result_fields(); assert_eq!( - schema, - Arc::new(Schema::new(vec![ - Field::new("a", (&ColumnType::BigInt).into(), false,), - Field::new("b", (&ColumnType::BigInt).into(), false,) - ])) + column_fields, + vec![ + ColumnField::new("a".parse().unwrap(), ColumnType::BigInt), + ColumnField::new("b".parse().unwrap(), ColumnType::BigInt) + ] ); } diff --git a/crates/proof-of-sql/src/sql/ast/projection_expr_test.rs b/crates/proof-of-sql/src/sql/ast/projection_expr_test.rs index 8715c7d09..f2ad71927 100644 --- a/crates/proof-of-sql/src/sql/ast/projection_expr_test.rs +++ b/crates/proof-of-sql/src/sql/ast/projection_expr_test.rs @@ -16,13 +16,11 @@ use crate::{ }, }, }; -use arrow::datatypes::{Field, Schema}; use blitzar::proof::InnerProductProof; use bumpalo::Bump; use curve25519_dalek::RistrettoPoint; use indexmap::{IndexMap, IndexSet}; use proof_of_sql_parser::{Identifier, ResourceId}; -use std::sync::Arc; #[test] fn we_can_correctly_fetch_the_query_result_schema() { @@ -50,20 +48,13 @@ fn we_can_correctly_fetch_the_query_result_schema() { ], TableExpr { table_ref }, ); - - let column_fields: Vec = provable_ast - .get_column_result_fields() - .iter() - .map(|v| v.into()) - .collect(); - let schema = Arc::new(Schema::new(column_fields)); - + let column_fields: Vec = provable_ast.get_column_result_fields(); assert_eq!( - schema, - Arc::new(Schema::new(vec![ - Field::new("a", (&ColumnType::BigInt).into(), false,), - Field::new("b", (&ColumnType::BigInt).into(), false,) - ])) + column_fields, + vec![ + ColumnField::new("a".parse().unwrap(), ColumnType::BigInt), + ColumnField::new("b".parse().unwrap(), ColumnType::BigInt), + ] ); } diff --git a/crates/proof-of-sql/src/sql/proof/mod.rs b/crates/proof-of-sql/src/sql/proof/mod.rs index 2207ae34f..c934c1d88 100644 --- a/crates/proof-of-sql/src/sql/proof/mod.rs +++ b/crates/proof-of-sql/src/sql/proof/mod.rs @@ -25,7 +25,7 @@ pub(crate) use provable_result_column::ProvableResultColumn; mod provable_query_result; pub use provable_query_result::ProvableQueryResult; -#[cfg(test)] +#[cfg(all(test, feature = "arrow"))] mod provable_query_result_test; mod sumcheck_mle_evaluations; diff --git a/crates/proof-of-sql/src/sql/proof/proof_builder_test.rs b/crates/proof-of-sql/src/sql/proof/proof_builder_test.rs index 4d283b8d0..a8f9b0a0d 100644 --- a/crates/proof-of-sql/src/sql/proof/proof_builder_test.rs +++ b/crates/proof-of-sql/src/sql/proof/proof_builder_test.rs @@ -7,6 +7,7 @@ use crate::{ }, sql::proof::{Indexes, ResultBuilder, SumcheckSubpolynomialType}, }; +#[cfg(feature = "arrow")] use arrow::{ array::Int64Array, datatypes::{Field, Schema}, @@ -123,6 +124,7 @@ fn we_can_form_an_aggregated_sumcheck_polynomial() { assert_eq!(eval, expected_eval); } +#[cfg(feature = "arrow")] #[test] fn we_can_form_the_provable_query_result() { let result_indexes = Indexes::Sparse(vec![1, 2]); diff --git a/crates/proof-of-sql/src/sql/proof/query_result.rs b/crates/proof-of-sql/src/sql/proof/query_result.rs index 052b9bad4..f8432cb71 100644 --- a/crates/proof-of-sql/src/sql/proof/query_result.rs +++ b/crates/proof-of-sql/src/sql/proof/query_result.rs @@ -3,6 +3,7 @@ use crate::base::{ proof::ProofError, scalar::Scalar, }; +#[cfg(feature = "arrow")] use arrow::{error::ArrowError, record_batch::RecordBatch}; use thiserror::Error; @@ -48,12 +49,13 @@ pub struct QueryData { } impl QueryData { - #[cfg(test)] + #[cfg(all(test, feature = "arrow"))] pub fn into_record_batch(self) -> RecordBatch { self.try_into().unwrap() } } +#[cfg(feature = "arrow")] impl TryFrom> for RecordBatch { type Error = ArrowError;