-
Notifications
You must be signed in to change notification settings - Fork 19
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Implement equivalet transform fuc for arrow1 (with tests)
- Loading branch information
1 parent
7a0f1df
commit 9c43488
Showing
7 changed files
with
118 additions
and
32 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,38 @@ | ||
use crate::arrow1::error::Result; | ||
use crate::common::stream::WrappedWritableStream; | ||
use async_compat::CompatExt; | ||
use futures::StreamExt; | ||
use parquet::arrow::async_writer::AsyncArrowWriter; | ||
use wasm_bindgen_futures::spawn_local; | ||
|
||
pub fn transform_parquet_stream( | ||
batches: impl futures::Stream<Item = arrow_wasm::arrow1::RecordBatch> + 'static, | ||
writer_properties: crate::arrow1::writer_properties::WriterProperties, | ||
) -> Result<wasm_streams::readable::sys::ReadableStream> { | ||
let options = Some(writer_properties.into()); | ||
// let encoding = writer_properties.get_encoding(); | ||
|
||
let (writable_stream, output_stream) = { | ||
let raw_stream = wasm_streams::transform::sys::TransformStream::new(); | ||
let raw_writable = raw_stream.writable(); | ||
let inner_writer = wasm_streams::WritableStream::from_raw(raw_writable).into_async_write(); | ||
let writable_stream = WrappedWritableStream { | ||
stream: inner_writer, | ||
}; | ||
(writable_stream, raw_stream.readable()) | ||
}; | ||
spawn_local::<_>(async move { | ||
let mut adapted_stream = batches.peekable(); | ||
let mut pinned_stream = std::pin::pin!(adapted_stream); | ||
let first_batch = pinned_stream.as_mut().peek().await.unwrap(); | ||
let schema = first_batch.schema().into_inner(); | ||
// Need to create an encoding for each column | ||
let mut writer = | ||
AsyncArrowWriter::try_new(writable_stream.compat(), schema, 1024, options).unwrap(); | ||
while let Some(batch) = pinned_stream.next().await { | ||
let _ = writer.write(&batch.into()).await; | ||
} | ||
let _ = writer.close().await; | ||
}); | ||
Ok(output_stream) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,3 +3,6 @@ pub mod writer_properties; | |
|
||
#[cfg(feature = "async")] | ||
pub mod fetch; | ||
|
||
#[cfg(feature = "async")] | ||
pub mod stream; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
use futures::AsyncWrite; | ||
|
||
pub struct WrappedWritableStream<'writer> { | ||
pub stream: wasm_streams::writable::IntoAsyncWrite<'writer>, | ||
} | ||
|
||
impl<'writer> AsyncWrite for WrappedWritableStream<'writer> { | ||
fn poll_write( | ||
self: std::pin::Pin<&mut Self>, | ||
cx: &mut std::task::Context<'_>, | ||
buf: &[u8], | ||
) -> std::task::Poll<std::io::Result<usize>> { | ||
AsyncWrite::poll_write(std::pin::Pin::new(&mut self.get_mut().stream), cx, buf) | ||
} | ||
|
||
fn poll_flush( | ||
self: std::pin::Pin<&mut Self>, | ||
cx: &mut std::task::Context<'_>, | ||
) -> std::task::Poll<std::io::Result<()>> { | ||
AsyncWrite::poll_flush(std::pin::Pin::new(&mut self.get_mut().stream), cx) | ||
} | ||
|
||
fn poll_close( | ||
self: std::pin::Pin<&mut Self>, | ||
cx: &mut std::task::Context<'_>, | ||
) -> std::task::Poll<std::io::Result<()>> { | ||
AsyncWrite::poll_close(std::pin::Pin::new(&mut self.get_mut().stream), cx) | ||
} | ||
} | ||
|
||
unsafe impl<'writer> Send for WrappedWritableStream<'writer> {} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters