From ad9c8a8c8bc3f1d63cfebb61e21f1c967ae7685e Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 2 Apr 2024 13:18:02 +0200 Subject: [PATCH 1/5] init --- R/extendr-wrappers.R | 2 ++ R/series__series.R | 8 +++++--- src/rust/src/series.rs | 6 ++++++ 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/R/extendr-wrappers.R b/R/extendr-wrappers.R index 90723eab9..718e1e4fd 100644 --- a/R/extendr-wrappers.R +++ b/R/extendr-wrappers.R @@ -1324,6 +1324,8 @@ RPolarsSeries$set_sorted_mut <- function(descending) invisible(.Call(wrap__RPola RPolarsSeries$struct_fields <- function() .Call(wrap__RPolarsSeries__struct_fields, self) +RPolarsSeries$struct_unnest <- function() .Call(wrap__RPolarsSeries__struct_unnest, self) + RPolarsSeries$from_arrow_array_stream_str <- function(name, robj_str) .Call(wrap__RPolarsSeries__from_arrow_array_stream_str, name, robj_str) RPolarsSeries$from_arrow_array_robj <- function(name, array) .Call(wrap__RPolarsSeries__from_arrow_array_robj, name, array) diff --git a/R/series__series.R b/R/series__series.R index 5c3e615c6..9bc1adbee 100644 --- a/R/series__series.R +++ b/R/series__series.R @@ -270,7 +270,11 @@ Series_struct = method_as_active_binding( self, expr_struct_make_sub_ns, fields = method_as_active_binding(function() { unwrap(.pr$Series$struct_fields(pl_series), "in $struct$fields:") - }) + }), + unnest = function() { + .pr$Series$struct_unnest(pl_series) |> + unwrap("in $struct$unnest():") + } ) } ) @@ -1101,8 +1105,6 @@ Series_item = function(index = NULL) { #' #' s$clear(n = 5) Series_clear = function(n = 0) { - # TODO: check whether n < 0 should be removed when resolved upstream - # https://github.com/pola-rs/polars/issues/15421 if (length(n) > 1 || !is.numeric(n) || n < 0) { Err_plain("`n` must be an integer greater or equal to 0.") |> unwrap("in $clear():") diff --git a/src/rust/src/series.rs b/src/rust/src/series.rs index 212690912..b6aed6729 100644 --- a/src/rust/src/series.rs +++ b/src/rust/src/series.rs @@ -505,6 +505,12 @@ impl RPolarsSeries { Ok(ca.fields().iter().map(|s| s.name()).collect()) } + fn struct_unnest(&self) -> RResult { + let ca = self.0.struct_().map_err(polars_to_rpolars_err)?; + let out: pl::DataFrame = ca.clone().into(); + Ok(out.into()) + } + pub fn from_arrow_array_stream_str(name: Robj, robj_str: Robj) -> RResult { let name = robj_to!(str, name)?; let s = crate::arrow_interop::to_rust::arrow_stream_to_series_internal(robj_str)? From 8a10f525511d873c6617faceab601bfb624956f7 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 2 Apr 2024 13:30:55 +0200 Subject: [PATCH 2/5] tests, docs --- NEWS.md | 1 + R/series__series.R | 8 ++++++++ man/Series_struct_unnest.Rd | 21 +++++++++++++++++++++ tests/testthat/_snaps/after-wrappers.md | 9 +++++---- tests/testthat/test-series.R | 14 ++++++++++++++ 5 files changed, 49 insertions(+), 4 deletions(-) create mode 100644 man/Series_struct_unnest.Rd diff --git a/NEWS.md b/NEWS.md index ac08d3b75..2e607dd3b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -114,6 +114,7 @@ - New methods `$select_seq()` and `$with_columns_seq()` for `DataFrame` and `LazyFrame` (#1003). - New method `$clear()` for `DataFrame`, `LazyFrame`, and `Series` (#1004). +- New method `$struct$unnest()` for `Series` (#1010). ### Bug fixes diff --git a/R/series__series.R b/R/series__series.R index 9bc1adbee..f0525019b 100644 --- a/R/series__series.R +++ b/R/series__series.R @@ -271,6 +271,14 @@ Series_struct = method_as_active_binding( fields = method_as_active_binding(function() { unwrap(.pr$Series$struct_fields(pl_series), "in $struct$fields:") }), + #' Convert this struct Series to a DataFrame with a separate column for + #' each field + #' + #' @name Series_struct_unnest + #' @return A DataFrame + #' @examples + #' s = pl$Series(values = c(1, 2), dtype = pl$Struct(foo = pl$Float64)) + #' s$struct$unnest() unnest = function() { .pr$Series$struct_unnest(pl_series) |> unwrap("in $struct$unnest():") diff --git a/man/Series_struct_unnest.Rd b/man/Series_struct_unnest.Rd new file mode 100644 index 000000000..7b4cb4c3b --- /dev/null +++ b/man/Series_struct_unnest.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/series__series.R +\name{Series_struct_unnest} +\alias{Series_struct_unnest} +\alias{Series_struct} +\title{Convert this struct Series to a DataFrame with a separate column for +each field} +\usage{ +Series_struct() +} +\value{ +A DataFrame +} +\description{ +Convert this struct Series to a DataFrame with a separate column for +each field +} +\examples{ +s = pl$Series(values = c(1, 2), dtype = pl$Struct(foo = pl$Float64)) +s$struct$unnest() +} diff --git a/tests/testthat/_snaps/after-wrappers.md b/tests/testthat/_snaps/after-wrappers.md index 4a8581ab8..3e0a1bdc1 100644 --- a/tests/testthat/_snaps/after-wrappers.md +++ b/tests/testthat/_snaps/after-wrappers.md @@ -717,10 +717,11 @@ [37] "rep" "set_sorted_mut" [39] "shape" "sleep" [41] "sort_mut" "std" - [43] "struct_fields" "sub" - [45] "sum" "to_fmt_char" - [47] "to_frame" "to_r" - [49] "value_counts" "var" + [43] "struct_fields" "struct_unnest" + [45] "sub" "sum" + [47] "to_fmt_char" "to_frame" + [49] "to_r" "value_counts" + [51] "var" # public and private methods of each class RThreadHandle diff --git a/tests/testthat/test-series.R b/tests/testthat/test-series.R index b7a887db4..225251410 100644 --- a/tests/testthat/test-series.R +++ b/tests/testthat/test-series.R @@ -679,3 +679,17 @@ test_that("$clear() works", { "greater or equal to 0" ) }) + +test_that("$struct$unnest() works", { + s = pl$Series(values = c(1, 2), dtype = pl$Struct(foo = pl$Float64)) + expect_identical( + s$struct$unnest()$to_list(), + list(foo = c(1, 2)) + ) + # empty Series + s = pl$Series(dtype = pl$Struct(foo = pl$Float64)) + expect_identical( + s$struct$unnest()$to_list(), + list(foo = numeric(0)) + ) +}) From 581bc795d25abe62a9f3ee84be99fb01c5feb2d2 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 2 Apr 2024 13:34:34 +0200 Subject: [PATCH 3/5] one additional test --- tests/testthat/test-series.R | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/testthat/test-series.R b/tests/testthat/test-series.R index 225251410..3f5035114 100644 --- a/tests/testthat/test-series.R +++ b/tests/testthat/test-series.R @@ -686,10 +686,20 @@ test_that("$struct$unnest() works", { s$struct$unnest()$to_list(), list(foo = c(1, 2)) ) + # empty Series s = pl$Series(dtype = pl$Struct(foo = pl$Float64)) expect_identical( s$struct$unnest()$to_list(), list(foo = numeric(0)) ) + + # Series "name" param is not used, only pl$Struct() name is used + s = pl$Series(name = "bar", dtype = pl$Struct(pl$Float64)) + expected = list(numeric(0)) + names(expected) = "" + expect_identical( + s$struct$unnest()$to_list(), + expected + ) }) From f7709fc026f61310b11db466c2f8de1e3d029472 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Tue, 2 Apr 2024 13:36:33 +0200 Subject: [PATCH 4/5] udpate test --- tests/testthat/test-series.R | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/testthat/test-series.R b/tests/testthat/test-series.R index 3f5035114..b9044122f 100644 --- a/tests/testthat/test-series.R +++ b/tests/testthat/test-series.R @@ -695,11 +695,9 @@ test_that("$struct$unnest() works", { ) # Series "name" param is not used, only pl$Struct() name is used - s = pl$Series(name = "bar", dtype = pl$Struct(pl$Float64)) - expected = list(numeric(0)) - names(expected) = "" + s = pl$Series(name = "bar", dtype = pl$Struct(foo = pl$Float64)) expect_identical( s$struct$unnest()$to_list(), - expected + list(foo = numeric(0)) ) }) From dae6a58669384cc1e9e53efb28482c1c5395d570 Mon Sep 17 00:00:00 2001 From: Etienne Bacher <52219252+etiennebacher@users.noreply.github.com> Date: Wed, 10 Apr 2024 13:54:01 +0200 Subject: [PATCH 5/5] fix usage section --- R/series__series.R | 2 ++ man/Series_struct_unnest.Rd | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/R/series__series.R b/R/series__series.R index 55cab0008..a41f23687 100644 --- a/R/series__series.R +++ b/R/series__series.R @@ -274,6 +274,8 @@ Series_struct = method_as_active_binding( #' Convert this struct Series to a DataFrame with a separate column for #' each field #' + #' @usage Series_struct_unnest() + #' #' @name Series_struct_unnest #' @return A DataFrame #' @examples diff --git a/man/Series_struct_unnest.Rd b/man/Series_struct_unnest.Rd index 7b4cb4c3b..d836c20f6 100644 --- a/man/Series_struct_unnest.Rd +++ b/man/Series_struct_unnest.Rd @@ -6,7 +6,7 @@ \title{Convert this struct Series to a DataFrame with a separate column for each field} \usage{ -Series_struct() +Series_struct_unnest() } \value{ A DataFrame