Skip to content

Commit

Permalink
Add $rolling_*_by() expressions (#1115)
Browse files Browse the repository at this point in the history
Co-authored-by: eitsupi <[email protected]>
  • Loading branch information
etiennebacher and eitsupi authored Jun 3, 2024
1 parent 825373d commit 3e3eece
Show file tree
Hide file tree
Showing 20 changed files with 1,317 additions and 348 deletions.
7 changes: 5 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
- All `$rolling_*()` functions lose the arguments `by`, `closed` and
`warn_if_unsorted`. Rolling computations based on `by` must be made via the
corresponding `rolling_*_by()`, e.g `rolling_mean_by()` instead of
`rolling_mean(by =)`.
`rolling_mean(by =)` (#1115).
- `pl$scan_parquet()` and `pl$read_parquet()` gain an argument `glob` which
defaults to `TRUE`. Set it to `FALSE` to avoid considering `*` as a globing
pattern.
Expand Down Expand Up @@ -50,7 +50,10 @@
(#1112).
- In `$dt$combine()`, the arguments `tm` and `tu` are renamed `time` and
`time_unit` (#1116).
- The default value of the `rechunk` argument of `pl$concat()` is changed from `TRUE` to `FALSE` (#1125).
- The default value of the `rechunk` argument of `pl$concat()` is changed from
`TRUE` to `FALSE` (#1125).
- In all `$rolling_*()` functions, the arguments `center` and `ddof` must be
named (#1115).

### New features

Expand Down
248 changes: 241 additions & 7 deletions R/expr__expr.R
Original file line number Diff line number Diff line change
Expand Up @@ -2299,6 +2299,7 @@ prepare_rolling_window_args = function(
#' - 1i (1 index count)
#' If the dynamic string language is used, the `by` and `closed` arguments must
#' also be set.
#' @inherit Expr_rolling params return
#' @param weights An optional slice with the same length as the window that will
#' be multiplied elementwise with the values in the window.
#' @param min_periods The number of values in the window that should be non-null
Expand All @@ -2309,14 +2310,14 @@ prepare_rolling_window_args = function(
#' If you want to compute multiple aggregation statistics over the same dynamic
#' window, consider using `$rolling()` this method can cache the window size
#' computation.
#' @return Expr
#' @examples
#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$
#' with_columns(roll_min = pl$col("a")$rolling_min(window_size = 2))
Expr_rolling_min = function(
window_size,
weights = NULL,
min_periods = NULL,
...,
center = FALSE) {
wargs = prepare_rolling_window_args(window_size, min_periods)
.pr$Expr$rolling_min(
Expand All @@ -2326,6 +2327,35 @@ Expr_rolling_min = function(
unwrap("in $rolling_min():")
}

#' Apply a rolling min based on another column.
#'
#' @inherit Expr_rolling_min params return details
#' @inheritParams Expr_rolling
#' @param by This column must of dtype [`Date`][pl_date] or
#' [`Datetime`][DataType_Datetime].
#'
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' df_temporal$with_columns(
#' rolling_row_min = pl$col("index")$rolling_min_by("date", window_size = "3h")
#' )
Expr_rolling_min_by = function(
by,
window_size,
...,
min_periods = 1,
closed = "right") {
.pr$Expr$rolling_min_by(
self, by, window_size, min_periods, closed
) |>
unwrap("in $rolling_min_by():")
}

#' Rolling maximum
#'
#' Compute the rolling (= moving) max over the values in this array. A window of
Expand All @@ -2340,6 +2370,7 @@ Expr_rolling_max = function(
window_size,
weights = NULL,
min_periods = NULL,
...,
center = FALSE) {
wargs = prepare_rolling_window_args(window_size, min_periods)
.pr$Expr$rolling_max(
Expand All @@ -2349,6 +2380,31 @@ Expr_rolling_max = function(
unwrap("in $rolling_max()")
}

#' Apply a rolling max based on another column.
#'
#' @inherit Expr_rolling_min_by params return details
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' df_temporal$with_columns(
#' rolling_row_max = pl$col("index")$rolling_max_by("date", window_size = "3h")
#' )
Expr_rolling_max_by = function(
by,
window_size,
...,
min_periods = 1,
closed = "right") {
.pr$Expr$rolling_max_by(
self, by, window_size, min_periods, closed
) |>
unwrap("in $rolling_max_by():")
}

#' Rolling mean
#'
#' Compute the rolling (= moving) mean over the values in this array. A window of
Expand All @@ -2363,6 +2419,7 @@ Expr_rolling_mean = function(
window_size,
weights = NULL,
min_periods = NULL,
...,
center = FALSE) {
wargs = prepare_rolling_window_args(window_size, min_periods)
.pr$Expr$rolling_mean(
Expand All @@ -2372,6 +2429,32 @@ Expr_rolling_mean = function(
unwrap("in $rolling_mean():")
}

#' Apply a rolling mean based on another column.
#'
#' @inherit Expr_rolling_min_by params return details
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' df_temporal$with_columns(
#' rolling_row_mean = pl$col("index")$rolling_mean_by("date", window_size = "3h")
#' )
Expr_rolling_mean_by = function(
by,
window_size,
...,
min_periods = 1,
closed = "right") {
.pr$Expr$rolling_mean_by(
self,
by, window_size, min_periods, closed
) |>
unwrap("in $rolling_mean_by():")
}

#' Rolling sum
#'
#' Compute the rolling (= moving) sum over the values in this array. A window of
Expand All @@ -2395,6 +2478,31 @@ Expr_rolling_sum = function(
unwrap("in $rolling_sum():")
}

#' Apply a rolling sum based on another column.
#'
#' @inherit Expr_rolling_min_by params return details
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' df_temporal$with_columns(
#' rolling_row_sum = pl$col("index")$rolling_sum_by("date", window_size = "3h")
#' )
Expr_rolling_sum_by = function(
by,
window_size,
...,
min_periods = 1,
closed = "right") {
.pr$Expr$rolling_sum_by(
self, by, window_size, min_periods, closed
) |>
unwrap("in $rolling_sum_by():")
}


#' Rolling standard deviation
#'
Expand All @@ -2404,22 +2512,57 @@ Expr_rolling_sum = function(
#' by the `weight` vector.
#'
#' @inherit Expr_rolling_min params details return
#' @inheritParams pl_std
#' @examples
#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$
#' with_columns(roll_std = pl$col("a")$rolling_std(window_size = 2))
Expr_rolling_std = function(
window_size,
weights = NULL,
min_periods = NULL,
center = FALSE) {
...,
center = FALSE,
ddof = 1) {
wargs = prepare_rolling_window_args(window_size, min_periods)
.pr$Expr$rolling_std(
self, wargs$window_size, weights,
wargs$min_periods, center
self, wargs$window_size, weights, wargs$min_periods, center, ddof
) |>
unwrap("in $rolling_std(): ")
}

#' Compute a rolling standard deviation based on another column
#'
#' @inherit Expr_rolling_min_by params return details
#' @inheritParams Expr_rolling_std
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' # Compute the rolling std with the temporal windows closed on the right (default)
#' df_temporal$with_columns(
#' rolling_row_std = pl$col("index")$rolling_std_by("date", window_size = "2h")
#' )
#'
#' # Compute the rolling std with the closure of windows on both sides
#' df_temporal$with_columns(
#' rolling_row_std = pl$col("index")$rolling_std_by("date", window_size = "2h", closed = "both")
#' )
Expr_rolling_std_by = function(
by,
window_size,
...,
min_periods = 1,
closed = "right",
ddof = 1) {
.pr$Expr$rolling_std_by(
self, by, window_size, min_periods, closed, ddof
) |>
unwrap("in $rolling_std_by():")
}

#' Rolling variance
#'
#' Compute the rolling (= moving) variance over the values in this array. A
Expand All @@ -2428,22 +2571,57 @@ Expr_rolling_std = function(
#' `weight` vector.
#'
#' @inherit Expr_rolling_min params details return
#' @inheritParams pl_std
#' @examples
#' pl$DataFrame(a = c(1, 3, 2, 4, 5, 6))$
#' with_columns(roll_var = pl$col("a")$rolling_var(window_size = 2))
Expr_rolling_var = function(
window_size,
weights = NULL,
min_periods = NULL,
center = FALSE) {
...,
center = FALSE,
ddof = 1) {
wargs = prepare_rolling_window_args(window_size, min_periods)
.pr$Expr$rolling_var(
self, wargs$window_size, weights,
wargs$min_periods, center
self, wargs$window_size, weights, wargs$min_periods, center, ddof
) |>
unwrap("in $rolling_var():")
}

#' Compute a rolling variance based on another column
#'
#' @inherit Expr_rolling_min_by params return details
#' @inheritParams Expr_rolling_var
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' # Compute the rolling var with the temporal windows closed on the right (default)
#' df_temporal$with_columns(
#' rolling_row_var = pl$col("index")$rolling_var_by("date", window_size = "2h")
#' )
#'
#' # Compute the rolling var with the closure of windows on both sides
#' df_temporal$with_columns(
#' rolling_row_var = pl$col("index")$rolling_var_by("date", window_size = "2h", closed = "both")
#' )
Expr_rolling_var_by = function(
by,
window_size,
...,
min_periods = 1,
closed = "right",
ddof = 1) {
.pr$Expr$rolling_var_by(
self, by, window_size, min_periods, closed, ddof
) |>
unwrap("in $rolling_var_by():")
}

#' Rolling median
#'
#' Compute the rolling (= moving) median over the values in this array. A window
Expand All @@ -2467,6 +2645,31 @@ Expr_rolling_median = function(
) |> unwrap("in $rolling_median():")
}

#' Apply a rolling median based on another column.
#'
#' @inherit Expr_rolling_min_by params return details
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' df_temporal$with_columns(
#' rolling_row_median = pl$col("index")$rolling_median_by("date", window_size = "3h")
#' )
Expr_rolling_median_by = function(
by,
window_size,
...,
min_periods = 1,
closed = "right") {
.pr$Expr$rolling_median_by(
self, by, window_size, min_periods, closed
) |>
unwrap("in $rolling_median_by():")
}

#' Rolling quantile
#'
#' Compute the rolling (= moving) quantile over the values in this array. A
Expand All @@ -2487,6 +2690,7 @@ Expr_rolling_quantile = function(
window_size,
weights = NULL,
min_periods = NULL,
...,
center = FALSE) {
wargs = prepare_rolling_window_args(window_size, min_periods)
.pr$Expr$rolling_quantile(
Expand All @@ -2496,6 +2700,36 @@ Expr_rolling_quantile = function(
unwrap("in $rolling_quantile():")
}

#' Compute a rolling quantile based on another column
#'
#' @inherit Expr_rolling_min_by params return details
#' @inheritParams Expr_quantile
#' @examples
#' df_temporal = pl$DataFrame(
#' date = pl$datetime_range(as.Date("2001-1-1"), as.Date("2001-1-2"), "1h")
#' )$with_row_index("index")
#'
#' df_temporal
#'
#' df_temporal$with_columns(
#' rolling_row_quantile = pl$col("index")$rolling_quantile_by(
#' "date",
#' window_size = "2h", quantile = 0.3
#' )
#' )
Expr_rolling_quantile_by = function(
by,
window_size,
...,
quantile,
interpolation = "nearest",
min_periods = 1,
closed = "right") {
.pr$Expr$rolling_quantile_by(
self, by, quantile, interpolation, window_size, min_periods, closed
) |>
unwrap("in $rolling_quantile_by():")
}

#' Rolling skew
#'
Expand Down
Loading

0 comments on commit 3e3eece

Please sign in to comment.