-
Notifications
You must be signed in to change notification settings - Fork 36
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Export ArrowArrayStream from polars data frame (#5)
* update polars version * getting there! * best attempt * try will's solution * apply Will's suggestion * update wrapper * closer * fix cargo lock, document * use s3_register() + Suggests for nanoarrow and arrow instead of data frame method * update renv lock * update nanoarrow in renv.lock * try refresh * minor Co-authored-by: sorhawell <[email protected]>
- Loading branch information
1 parent
ffe7772
commit 1daf373
Showing
7 changed files
with
268 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
|
||
as_nanoarrow_array_stream.DataFrame <- function(x, ..., schema = NULL) { | ||
# Don't support the schema argument yet | ||
stopifnot(is.null(schema)) | ||
|
||
stream = nanoarrow::nanoarrow_allocate_array_stream() | ||
.pr$DataFrame$export_stream(x, nanoarrow::nanoarrow_pointer_addr_chr(stream)) | ||
|
||
stream | ||
} | ||
|
||
infer_nanoarrow_schema.DataFrame <- function(x, ...) { | ||
as_nanoarrow_array_stream.DataFrame(x)$get_schema() | ||
} | ||
|
||
as_record_batch_reader.DataFrame <- function(x, ..., schema = NULL) { | ||
arrow::as_record_batch_reader(as_nanoarrow_array_stream.DataFrame(x, schema = schema)) | ||
} | ||
|
||
as_arrow_table.DataFrame <- function(x, ...) { | ||
reader = as_record_batch_reader.DataFrame(x) | ||
reader$read_table() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
|
||
test_that("as_nanoarrow_array_stream() works for DataFrame", { | ||
skip_if_not_installed("nanoarrow") | ||
|
||
df = pl$DataFrame(a = 1L, b = "two") | ||
stream <- nanoarrow::as_nanoarrow_array_stream(df) | ||
expect_s3_class(stream, "nanoarrow_array_stream") | ||
expect_identical( | ||
as.data.frame(stream), | ||
data.frame(a = 1L, b = "two") | ||
) | ||
}) | ||
|
||
test_that("infer_nanoarrow_schema() works for DataFrame", { | ||
skip_if_not_installed("nanoarrow") | ||
|
||
df = pl$DataFrame(a = 1L, b = "two") | ||
stream_schema <- nanoarrow::as_nanoarrow_array_stream(df)$get_schema() | ||
inferred_schema <- nanoarrow::infer_nanoarrow_schema(df) | ||
expect_identical(format(stream_schema), format(inferred_schema)) | ||
}) | ||
|
||
test_that("as_record_batch_reader() works for DataFrame", { | ||
skip_if_not_installed("arrow") | ||
|
||
df = pl$DataFrame(a = 1L, b = "two") | ||
reader <- arrow::as_record_batch_reader(df) | ||
expect_s3_class(reader, "RecordBatchReader") | ||
|
||
expect_identical( | ||
# two as.data.frame()s because arrow sometimes returns a tibble here | ||
as.data.frame(as.data.frame(reader)), | ||
data.frame(a = 1L, b = "two") | ||
) | ||
}) | ||
|
||
test_that("as_arrow_table() works for DataFrame", { | ||
skip_if_not_installed("arrow") | ||
|
||
df = pl$DataFrame(a = 1L, b = "two") | ||
table <- arrow::as_arrow_table(df) | ||
expect_s3_class(table, "Table") | ||
|
||
expect_identical( | ||
# two as.data.frame()s because arrow sometimes returns a tibble here | ||
as.data.frame(as.data.frame(table)), | ||
data.frame(a = 1L, b = "two") | ||
) | ||
}) |