From a0e35dce020232c6add07abc7b40137502e3b489 Mon Sep 17 00:00:00 2001 From: Will Beasley Date: Sat, 28 Oct 2023 11:49:59 -0500 Subject: [PATCH] starting `row_sum()` ref #126 --- NAMESPACE | 1 + R/row.R | 114 +++++++++++++++++++++++++++++++++++++++++++++++++ man/row_sum.Rd | 57 +++++++++++++++++++++++++ 3 files changed, 172 insertions(+) create mode 100644 R/row.R create mode 100644 man/row_sum.Rd diff --git a/NAMESPACE b/NAMESPACE index 6a9c34f..0a85b23 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,6 +37,7 @@ export(readr_spec_aligned) export(replace_nas_with_explicit) export(replace_with_nas) export(retrieve_key_value) +export(row_sum) export(snake_case) export(trim_character) export(trim_date) diff --git a/R/row.R b/R/row.R new file mode 100644 index 0000000..9357e27 --- /dev/null +++ b/R/row.R @@ -0,0 +1,114 @@ +#' @name row_sum +#' @title Find the sum of selected columns within a row +#' +#' @description Sums across columns within a row, +#' while accounting for nonmissingness. +#' Specify the desired columns by passing their explicit column names or +#' by passing a regular expression to matches the column names. +#' +#' @param d The data.frame containing the values to sum. Required. +#' @param columns_to_average A character vector containing the columns +#' names to sum. +#' If empty, `pattern` is used to select columns. Optional. +#' @param pattern A regular expression pattern passed to [base::grep()] +#' (with `perl = TRUE`). Optional +#' @param new_column_name The name of the new column that represents the sum +#' of the specified columns. Required. +#' @param threshold_proportion Designates the minimum proportion of columns +#' that have a nonmissing values (within each row) in order to return a sum. +#' Required; defaults to to 0.75. +#' @param vebose a logical value to designate if extra information is +#' displayed in the console, +#' such as which columns are matched by `pattern`. +#' +#' @return The data.frame `d`, with the additional column containing the row sum. +#' +#' @details +#' If the specified columns are all logicals or integers, +#' the new column will be an [integer]. +#' Otherwise the new column will be a [double]. +#' +#' @note +#' @author Will Beasley +#' @examples +#' library(OuhscMunge) #Load the package into the current R session. + +#' +#' @export +row_sum <- function( + d, + columns_to_average = character(0), + pattern, + new_column_name = "row_sum", + threshold_proportion = .75, + verbose = FALSE +) { + + if (length(columns_to_average) == 0L) { + columns_to_average <- + d |> + colnames() |> + grep( + x = _, + pattern = pattern, + value = TRUE, + perl = TRUE + ) + + if (verbose) { + message( + "The following columns will be summed:\n- ", + paste(columns_to_average, collapse = "\n- ") + ) + } + } + + cast_to_integer <- + d |> + dplyr::select(!!columns_to_average) |> + purrr::every( + \(x) { + is.logical(x) | is.integer(x) + } + ) + + rs <- nonmissing_count <- NULL + d <- + d |> + dplyr::mutate( + rs = # Finding the sum (used by m4) + rowSums( + dplyr::across(!!columns_to_average), + na.rm = TRUE + ), + # rs = dplyr::if_else(cast_to_integer, as.integer(rs), rs), + nonmissing_count = + rowSums( + dplyr::across( + !!columns_to_average, + .fns = \(x) { !is.na(x) } + ) + ), + nonmissing_proportion = nonmissing_count / length(columns_to_average), + {{new_column_name}} := + dplyr::if_else( + threshold_proportion <= nonmissing_proportion, + rs, + # rs / nonmissing_count, + NA_real_ + ) + ) |> + dplyr::select( + -rs, + -nonmissing_count, + -nonmissing_proportion, + ) + # Alternatively, return just the new columns + # dplyr::pull({{new_column_name}}) + + if (cast_to_integer) { + d[[new_column_name]] <- as.integer(d[[new_column_name]]) + } + + d +} diff --git a/man/row_sum.Rd b/man/row_sum.Rd new file mode 100644 index 0000000..061712e --- /dev/null +++ b/man/row_sum.Rd @@ -0,0 +1,57 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/row.R +\name{row_sum} +\alias{row_sum} +\title{Find the sum of selected columns within a row} +\usage{ +row_sum( + d, + columns_to_average = character(0), + pattern, + new_column_name = "row_sum", + threshold_proportion = 0.75, + verbose = FALSE +) +} +\arguments{ +\item{d}{The data.frame containing the values to sum. Required.} + +\item{columns_to_average}{A character vector containing the columns +names to sum. +If empty, \code{pattern} is used to select columns. Optional.} + +\item{pattern}{A regular expression pattern passed to \code{\link[base:grep]{base::grep()}} +(with \code{perl = TRUE}). Optional} + +\item{new_column_name}{The name of the new column that represents the sum +of the specified columns. Required.} + +\item{threshold_proportion}{Designates the minimum proportion of columns +that have a nonmissing values (within each row) in order to return a sum. +Required; defaults to to 0.75.} + +\item{vebose}{a logical value to designate if extra information is +displayed in the console, +such as which columns are matched by \code{pattern}.} +} +\value{ +The data.frame \code{d}, with the additional column containing the row sum. +} +\description{ +Sums across columns within a row, +while accounting for nonmissingness. +Specify the desired columns by passing their explicit column names or +by passing a regular expression to matches the column names. +} +\details{ +If the specified columns are all logicals or integers, +the new column will be an \link{integer}. +Otherwise the new column will be a \link{double}. +} +\examples{ +library(OuhscMunge) #Load the package into the current R session. + +} +\author{ +Will Beasley +}