From bc78dfcd5b9323f590fbfe535e9448933758213e Mon Sep 17 00:00:00 2001
From: Connor Tsui <connor.tsui20@gmail.com>
Date: Tue, 13 Feb 2024 13:32:17 -0500
Subject: [PATCH] remove datafusion for faster compiles

---
 eggstrain/Cargo.toml                |  2 +-
 eggstrain/src/main.rs               |  4 +-
 eggstrain/src/storage_client/mod.rs | 70 ++++++++++++++---------------
 3 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/eggstrain/Cargo.toml b/eggstrain/Cargo.toml
index 380808a..cab8161 100644
--- a/eggstrain/Cargo.toml
+++ b/eggstrain/Cargo.toml
@@ -14,7 +14,7 @@ authors = [
 anyhow = "1"
 arrow = "50"
 async-trait = "0.1"
-datafusion = "35"
+# datafusion = "35"
 serde_json = "1"
 substrait = { version = "0.24", features = ["pbjson"] }
 tokio = { version = "1", features = ["full"] }
diff --git a/eggstrain/src/main.rs b/eggstrain/src/main.rs
index d26262e..91c5bd2 100644
--- a/eggstrain/src/main.rs
+++ b/eggstrain/src/main.rs
@@ -7,6 +7,6 @@ use execution::substrait::deserialize::get_json;
 #[tokio::main]
 async fn main() {
     println!("Hello, world!");
-    // get_json("../substrait/substrait_plan_example.json");
-    get_json("../substrait/basic_query.json");
+    get_json("../substrait/substrait_plan_example.json");
+    // get_json("../substrait/basic_query.json");
 }
diff --git a/eggstrain/src/storage_client/mod.rs b/eggstrain/src/storage_client/mod.rs
index 20b9a7f..bc3f04e 100644
--- a/eggstrain/src/storage_client/mod.rs
+++ b/eggstrain/src/storage_client/mod.rs
@@ -1,10 +1,10 @@
 //! Right now we have this in a submodule `storage_client.rs`, but the IO service
 //! team would probably create a crate and we could import it easily into our `Cargo.toml` file
 
-use datafusion::execution::SendableRecordBatchStream;
+// use datafusion::execution::SendableRecordBatchStream;
 
-use datafusion::common::arrow::array::{Int32Array, RecordBatch};
-use datafusion::common::arrow::datatypes::{DataType, Field, Schema};
+// use datafusion::common::arrow::array::{Int32Array, RecordBatch};
+// use datafusion::common::arrow::datatypes::{DataType, Field, Schema};
 use std::sync::Arc;
 
 // Placeholder types to let this compile
@@ -27,35 +27,35 @@ pub enum BlobData {
     Tuple(RecordId),
 }
 
-impl StorageClient {
-    /// Have some sort of way to create a `StorageClient` on our local node.
-    pub fn new(_id: usize) -> Self {
-        Self
-    }
-
-    /// The only other function we need exposed would be a way to actually get data.
-    /// What we should get is a stream of `Recordbatch`s, which is just Apache Arrow
-    /// data in memory.
-    ///
-    /// The executor node really should not know what the underlying data is on the Blob data store.
-    /// In our case it is Parquet, but since the Execution Engine is not in charge or loading
-    /// those Parquet files, it should just receive it as in-memory Arrow data
-    ///
-    /// Note that we will likely re-export the `SendableRecordBatchRecord` from DataFusion
-    /// and use that as the return type instead
-    pub async fn request_data(&self, _request: BlobData) -> SendableRecordBatchStream {
-        todo!()
-    }
-
-    pub async fn sample_request_data(_request: BlobData) -> SendableRecordBatchStream {
-        todo!("Return some sample data")
-    }
-
-    /// https://docs.rs/datafusion/latest/datafusion/common/arrow/array/struct.RecordBatch.html
-    pub async fn request_synchronous_data() -> RecordBatch {
-        let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
-        let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
-
-        RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array)]).unwrap()
-    }
-}
+// impl StorageClient {
+//     /// Have some sort of way to create a `StorageClient` on our local node.
+//     pub fn new(_id: usize) -> Self {
+//         Self
+//     }
+
+//     /// The only other function we need exposed would be a way to actually get data.
+//     /// What we should get is a stream of `Recordbatch`s, which is just Apache Arrow
+//     /// data in memory.
+//     ///
+//     /// The executor node really should not know what the underlying data is on the Blob data store.
+//     /// In our case it is Parquet, but since the Execution Engine is not in charge or loading
+//     /// those Parquet files, it should just receive it as in-memory Arrow data
+//     ///
+//     /// Note that we will likely re-export the `SendableRecordBatchRecord` from DataFusion
+//     /// and use that as the return type instead
+//     pub async fn request_data(&self, _request: BlobData) -> SendableRecordBatchStream {
+//         todo!()
+//     }
+
+//     pub async fn sample_request_data(_request: BlobData) -> SendableRecordBatchStream {
+//         todo!("Return some sample data")
+//     }
+
+//     /// https://docs.rs/datafusion/latest/datafusion/common/arrow/array/struct.RecordBatch.html
+//     pub async fn request_synchronous_data() -> RecordBatch {
+//         let id_array = Int32Array::from(vec![1, 2, 3, 4, 5]);
+//         let schema = Schema::new(vec![Field::new("id", DataType::Int32, false)]);
+
+//         RecordBatch::try_new(Arc::new(schema), vec![Arc::new(id_array)]).unwrap()
+//     }
+// }