remove datafusion for faster compiles

cmu-db · Feb 13, 2024 · bc78dfc · bc78dfc
1 parent 73cc3c8
commit bc78dfc
Show file tree

Hide file tree

Showing 3 changed files with 38 additions and 38 deletions.
diff --git a/eggstrain/Cargo.toml b/eggstrain/Cargo.toml
@@ -14,7 +14,7 @@ authors = [
 anyhow = "1"
 arrow = "50"
 async-trait = "0.1"
-datafusion = "35"
+# datafusion = "35"
 serde_json = "1"
 substrait = { version = "0.24", features = ["pbjson"] }
 tokio = { version = "1", features = ["full"] }

diff --git a/eggstrain/src/main.rs b/eggstrain/src/main.rs
@@ -7,6 +7,6 @@ use execution::substrait::deserialize::get_json;
 #[tokio::main]
 async fn main() {
     println!("Hello, world!");
-    // get_json("../substrait/substrait_plan_example.json");
-    get_json("../substrait/basic_query.json");
+    get_json("../substrait/substrait_plan_example.json");
+    // get_json("../substrait/basic_query.json");
 }
diff --git a/eggstrain/src/storage_client/mod.rs b/eggstrain/src/storage_client/mod.rs
@@ -1,10 +1,10 @@
 //! Right now we have this in a submodule `storage_client.rs`, but the IO service
 //! team would probably create a crate and we could import it easily into our `Cargo.toml` file
 
-use datafusion::execution::SendableRecordBatchStream;
+// use datafusion::execution::SendableRecordBatchStream;
 
-use datafusion::common::arrow::array::{Int32Array, RecordBatch};
-use datafusion::common::arrow::datatypes::{DataType, Field, Schema};
+// use datafusion::common::arrow::array::{Int32Array, RecordBatch};
+// use datafusion::common::arrow::datatypes::{DataType, Field, Schema};
 use std::sync::Arc;
 
 // Placeholder types to let this compile
@@ -27,35 +27,35 @@ pub enum BlobData {
     Tuple(RecordId),
 }
 
-impl StorageClient {
-    /// Have some sort of way to create a `StorageClient` on our local node.
-    pub fn new(_id: usize) -> Self {
-        Self
-    }
-
-    /// The only other function we need exposed would be a way to actually get data.
-    /// What we should get is a stream of `Recordbatch`s, which is just Apache Arrow
-    /// data in memory.
-    ///
-    /// The executor node really should not know what the underlying data is on the Blob data store.
-    /// In our case it is Parquet, but since the Execution Engine is not in charge or loading
-    /// those Parquet files, it should just receive it as in-memory Arrow data
-    ///
-    /// Note that we will likely re-export the `SendableRecordBatchRecord` from DataFusion
-    /// and use that as the return type instead
-    pub async fn request_data(&self, _request: BlobData) -> SendableRecordBatchStream {
-        todo!()
-    }
-
-    pub async fn sample_request_data(_request: BlobData) -> SendableRecordBatchStream {
-        todo!("Return some sample data")
-    }
-
-    /// https://docs.rs/datafusion/latest/datafusion/common/arrow/array/struct.RecordBatch.html
-    pub async fn request_synchronous_data() -> RecordBatch {
-        let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
-
-        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array)]).unwrap()
-    }
-}
+// impl StorageClient {
+//     /// Have some sort of way to create a `StorageClient` on our local node.
+//     pub fn new(_id: usize) -> Self {
+//         Self
+//     }
+
+//     /// The only other function we need exposed would be a way to actually get data.
+//     /// What we should get is a stream of `Recordbatch`s, which is just Apache Arrow
+//     /// data in memory.
+//     ///
+//     /// The executor node really should not know what the underlying data is on the Blob data store.
+//     /// In our case it is Parquet, but since the Execution Engine is not in charge or loading
+//     /// those Parquet files, it should just receive it as in-memory Arrow data
+//     ///
+//     /// Note that we will likely re-export the `SendableRecordBatchRecord` from DataFusion
+//     /// and use that as the return type instead
+//     pub async fn request_data(&self, _request: BlobData) -> SendableRecordBatchStream {
+//         todo!()
+//     }
+
+//     pub async fn sample_request_data(_request: BlobData) -> SendableRecordBatchStream {
+//         todo!("Return some sample data")
+//     }
+
+//     /// https://docs.rs/datafusion/latest/datafusion/common/arrow/array/struct.RecordBatch.html
+//     pub async fn request_synchronous_data() -> RecordBatch {
+//         let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+//         let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
+
+//         RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array)]).unwrap()
+//     }
+// }