Skip to content

Commit

Permalink
starting row_sum()
Browse files Browse the repository at this point in the history
ref #126
  • Loading branch information
wibeasley committed Oct 28, 2023
1 parent 9fbfca2 commit a0e35dc
Show file tree
Hide file tree
Showing 3 changed files with 172 additions and 0 deletions.
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ export(readr_spec_aligned)
export(replace_nas_with_explicit)
export(replace_with_nas)
export(retrieve_key_value)
export(row_sum)
export(snake_case)
export(trim_character)
export(trim_date)
Expand Down
114 changes: 114 additions & 0 deletions R/row.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
#' @name row_sum
#' @title Find the sum of selected columns within a row
#'
#' @description Sums across columns within a row,
#' while accounting for nonmissingness.
#' Specify the desired columns by passing their explicit column names or
#' by passing a regular expression to matches the column names.
#'
#' @param d The data.frame containing the values to sum. Required.
#' @param columns_to_average A character vector containing the columns
#' names to sum.
#' If empty, `pattern` is used to select columns. Optional.
#' @param pattern A regular expression pattern passed to [base::grep()]
#' (with `perl = TRUE`). Optional
#' @param new_column_name The name of the new column that represents the sum
#' of the specified columns. Required.
#' @param threshold_proportion Designates the minimum proportion of columns
#' that have a nonmissing values (within each row) in order to return a sum.
#' Required; defaults to to 0.75.
#' @param vebose a logical value to designate if extra information is
#' displayed in the console,
#' such as which columns are matched by `pattern`.
#'
#' @return The data.frame `d`, with the additional column containing the row sum.
#'
#' @details
#' If the specified columns are all logicals or integers,
#' the new column will be an [integer].
#' Otherwise the new column will be a [double].
#'
#' @note
#' @author Will Beasley
#' @examples
#' library(OuhscMunge) #Load the package into the current R session.

#'
#' @export
row_sum <- function(
d,
columns_to_average = character(0),
pattern,
new_column_name = "row_sum",
threshold_proportion = .75,
verbose = FALSE
) {

if (length(columns_to_average) == 0L) {
columns_to_average <-
d |>
colnames() |>
grep(
x = _,
pattern = pattern,
value = TRUE,
perl = TRUE
)

if (verbose) {
message(
"The following columns will be summed:\n- ",
paste(columns_to_average, collapse = "\n- ")
)
}
}

cast_to_integer <-
d |>
dplyr::select(!!columns_to_average) |>
purrr::every(
\(x) {
is.logical(x) | is.integer(x)
}
)

rs <- nonmissing_count <- NULL
d <-
d |>
dplyr::mutate(
rs = # Finding the sum (used by m4)
rowSums(
dplyr::across(!!columns_to_average),
na.rm = TRUE
),
# rs = dplyr::if_else(cast_to_integer, as.integer(rs), rs),
nonmissing_count =
rowSums(
dplyr::across(
!!columns_to_average,
.fns = \(x) { !is.na(x) }
)
),
nonmissing_proportion = nonmissing_count / length(columns_to_average),
{{new_column_name}} :=
dplyr::if_else(
threshold_proportion <= nonmissing_proportion,
rs,
# rs / nonmissing_count,
NA_real_
)
) |>
dplyr::select(
-rs,
-nonmissing_count,
-nonmissing_proportion,
)
# Alternatively, return just the new columns
# dplyr::pull({{new_column_name}})

if (cast_to_integer) {
d[[new_column_name]] <- as.integer(d[[new_column_name]])
}

d
}
57 changes: 57 additions & 0 deletions man/row_sum.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit a0e35dc

Please sign in to comment.