diff --git a/DESCRIPTION b/DESCRIPTION index 0ffee05..ab08b56 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -2,7 +2,7 @@ Package: OuhscMunge Title: Data Manipulation Operations Description: Data manipulation operations frequently used in OUHSC BBMC projects. -Version: 1.0.0.9000 +Version: 1.0.1.9000 Authors@R: person("Will", "Beasley", email="wibeasley@hotmail.com", role=c("aut", "cre"), comment = c(ORCID = "0000-0002-5613-5006")) URL: https://github.com/OuhscBbmc/OuhscMunge, http://ouhsc.edu/bbmc/ @@ -36,4 +36,4 @@ Suggests: Encoding: UTF-8 Language: en-US Roxygen: list(markdown = TRUE) -RoxygenNote: 7.3.0 +RoxygenNote: 7.3.2 diff --git a/NAMESPACE b/NAMESPACE index 1d7661f..03dd1f8 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -40,6 +40,7 @@ export(readr_spec_aligned) export(replace_nas_with_explicit) export(replace_with_nas) export(retrieve_key_value) +export(row_mean) export(row_sum) export(snake_case) export(trim_character) diff --git a/R/row.R b/R/row.R index 18de968..6732761 100644 --- a/R/row.R +++ b/R/row.R @@ -1,4 +1,4 @@ -#' @name row_sum +#' @name row #' @title Find the sum of selected columns within a row #' #' @description Sums across columns within a row, @@ -7,8 +7,8 @@ #' by passing a regular expression to matches the column names. #' #' @param d The data.frame containing the values to sum. Required. -#' @param columns_to_average A character vector containing the columns -#' names to sum. +#' @param columns_to_process A character vector containing the columns +#' names to process (_e.g._, to average or to sum). #' If empty, `pattern` is used to select columns. Optional. #' @param pattern A regular expression pattern passed to [base::grep()] #' (with `perl = TRUE`). Optional @@ -42,17 +42,24 @@ #' @examples #' mtcars |> #' OuhscMunge::row_sum( -#' columns_to_average = c("cyl", "disp", "vs", "carb"), +#' columns_to_process = c("cyl", "disp", "vs", "carb"), #' new_column_name = "engine_sum" #' ) #' #' mtcars |> #' OuhscMunge::row_sum( -#' columns_to_average = c("cyl", "disp", "vs", "carb"), +#' columns_to_process = c("cyl", "disp", "vs", "carb"), #' new_column_name = "engine_sum", #' nonmissing_count_name = "engine_nonmissing_count" #' ) #' +#' mtcars |> +#' OuhscMunge::row_mean( +#' columns_to_process = c("cyl", "disp", "vs", "carb"), +#' new_column_name = "engine_mean", +#' nonmissing_count_name = "engine_nonmissing_count" +#' ) +#' #' if (require(tidyr)) #' tidyr::billboard |> #' OuhscMunge::row_sum( @@ -79,10 +86,11 @@ #' week_sum, #' ) +#' @rdname row #' @export row_sum <- function( d, - columns_to_average = character(0), + columns_to_process = character(0), pattern = "", new_column_name = "row_sum", threshold_proportion = .75, @@ -90,15 +98,15 @@ row_sum <- function( verbose = FALSE ) { checkmate::assert_data_frame(d) - checkmate::assert_character(columns_to_average , any.missing = FALSE) + checkmate::assert_character(columns_to_process , any.missing = FALSE) checkmate::assert_character(pattern , len = 1) checkmate::assert_character(new_column_name , len = 1) checkmate::assert_double( threshold_proportion, len = 1) checkmate::assert_character(nonmissing_count_name, len = 1, min.chars = 1, any.missing = TRUE) checkmate::assert_logical( verbose , len = 1) - if (length(columns_to_average) == 0L) { - columns_to_average <- + if (length(columns_to_process) == 0L) { + columns_to_process <- d |> colnames() |> grep( @@ -110,15 +118,15 @@ row_sum <- function( if (verbose) { message( - "The following columns will be summed:\n- ", - paste(columns_to_average, collapse = "\n- ") + "The following columns will be processed:\n- ", + paste(columns_to_process, collapse = "\n- ") ) } } cast_to_integer <- d |> - dplyr::select(!!columns_to_average) |> + dplyr::select(!!columns_to_process) |> purrr::every( \(x) { is.logical(x) | is.integer(x) @@ -131,19 +139,19 @@ row_sum <- function( dplyr::mutate( .rs = rowSums( - dplyr::across(!!columns_to_average), + dplyr::across(!!columns_to_process), na.rm = TRUE ), .nonmissing_count = rowSums( dplyr::across( - !!columns_to_average, + !!columns_to_process, .fns = \(x) { !is.na(x) } ) ), - .nonmissing_proportion = .nonmissing_count / length(columns_to_average), + .nonmissing_proportion = .nonmissing_count / length(columns_to_process), {{new_column_name}} := dplyr::if_else( threshold_proportion <= .nonmissing_proportion, @@ -177,3 +185,90 @@ row_sum <- function( d } + +#' @rdname row +#' @export +row_mean <- function( + d, + columns_to_process = character(0), + pattern = "", + new_column_name = "row_mean", + threshold_proportion = .75, + nonmissing_count_name = NA_character_, + verbose = FALSE +) { + checkmate::assert_data_frame(d) + checkmate::assert_character(columns_to_process , any.missing = FALSE) + checkmate::assert_character(pattern , len = 1) + checkmate::assert_character(new_column_name , len = 1) + checkmate::assert_double( threshold_proportion, len = 1) + checkmate::assert_character(nonmissing_count_name, len = 1, min.chars = 1, any.missing = TRUE) + checkmate::assert_logical( verbose , len = 1) + + if (length(columns_to_process) == 0L) { + columns_to_process <- + d |> + colnames() |> + grep( + x = _, + pattern = pattern, + value = TRUE, + perl = TRUE + ) + + if (verbose) { + message( + "The following columns will be processed:\n- ", + paste(columns_to_process, collapse = "\n- ") + ) + } + } + + .rm <- .nonmissing_count <- .nonmissing_proportion <- NULL + d <- + d |> + dplyr::mutate( + .rm = + rowMeans( + dplyr::across(!!columns_to_process), + na.rm = TRUE + ), + .nonmissing_count = + rowSums( + dplyr::across( + !!columns_to_process, + .fns = \(x) { + !is.na(x) + } + ) + ), + .nonmissing_proportion = .nonmissing_count / length(columns_to_process), + {{new_column_name}} := + dplyr::if_else( + threshold_proportion <= .nonmissing_proportion, + .rm, + # .rs / .nonmissing_count, + NA_real_ + ) + ) + + if (!is.na(nonmissing_count_name)) { + d <- + d |> + dplyr::mutate( + {{nonmissing_count_name}} := .nonmissing_count, + ) + } + + d <- + d |> + dplyr::select( + -.rm, + -.nonmissing_count, + -.nonmissing_proportion, + ) + # Alternatively, return just the new columns + # dplyr::pull({{new_column_name}}) + + d +} diff --git a/README.md b/README.md index d4d91e3..ff94758 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ We encourage input and collaboration from the overall community. If you're fami * *Oklahoma Shared Clinical and Translational Resources*, sponsored by [NIH NIGMS; U54 GM104938](https://grantome.com/grant/NIH/U54-GM104938). Judith A. James, PI, OUHSC; 2013-2018. * *Oklahoma Shared Clinical and Translational Resources*, sponsored by [NIH U54GM104938](https://taggs.hhs.gov/Detail/AwardDetail?arg_AwardNum=U54GM104938&arg_ProgOfficeCode=127); 2020-2021. -* *OUHSC CCAN Independent Evaluation of the State of Oklahoma Competitive Maternal, Infant, and Early Childhood Home Visiting ([MIECHV](https://mchb.hrsa.gov/programs-impact/programs/home-visiting)) Project.*: Evaluates MIECHV expansion and enhancement of Evidence-based Home Visitation programs in four Oklahoma counties. [HRSA/ACF D89MC23154](https://perf-data.hrsa.gov/mchb/DGISReports/Abstract/AbstractDetails.aspx?Source=TVIS&GrantNo=D89MC23154&FY=2012). +* *OUHSC CCAN Independent Evaluation of the State of Oklahoma Competitive Maternal, Infant, and Early Childhood Home Visiting ([MIECHV](https://mchb.hrsa.gov/programs-impact/programs/home-visiting/maternal-infant-early-childhood-home-visiting-miechv-program)) Project.*: Evaluates MIECHV expansion and enhancement of Evidence-based Home Visitation programs in four Oklahoma counties. [HRSA/ACF D89MC23154](https://perf-data.hrsa.gov/mchb/DGISReports/Abstract/AbstractDetails.aspx?Source=TVIS&GrantNo=D89MC23154&FY=2012). (So far) the primary developers of OuhscMunge are the external evaluators for [Oklahoma's MIECHV](https://www.ok.gov/health/Child_and_Family_Health/Family_Support_and_Prevention_Service/MIECHV_Program_-_Federal_Home_Visiting_Grant/MIECHV_Program_Resources/index.html) program. @@ -45,5 +45,5 @@ Dev Branch: | [GitHub](https://github.com/OuhscBbmc/OuhscMunge) | [Travis-CI](https://app.travis-ci.com/OuhscBbmc/OuhscMunge/builds) | [CodeCov](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/) | | :----- | :---------------------------: | :-------: | -| [Main](https://github.com/OuhscBbmc/OuhscMunge/tree/main) | [![R-CMD-check](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml/badge.svg)](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml) | [![codecov](https://codecov.io/gh/OuhscBbmc/OuhscMunge/branch/main/graph/badge.svg?token=O1mkr31GRw)](https://codecov.io/gh/OuhscBbmc/OuhscMunge) | -| [Dev](https://github.com/OuhscBbmc/OuhscMunge/tree/dev) | [![R-CMD-check](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml/badge.svg?branch=dev)](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml) | [![codecov](https://codecov.io/gh/OuhscBbmc/OuhscMunge/branch/dev/graph/badge.svg?token=O1mkr31GRw)](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/tree/dev) | +| [Main](https://github.com/OuhscBbmc/OuhscMunge/tree/main) | [![R-CMD-check](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml/badge.svg)](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml) | [![codecov](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/branch/main/graph/badge.svg?token=O1mkr31GRw)](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge) | +| [Dev](https://github.com/OuhscBbmc/OuhscMunge/tree/dev) | [![R-CMD-check](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml/badge.svg?branch=dev)](https://github.com/OuhscBbmc/OuhscMunge/actions/workflows/check-release.yaml) | [![codecov](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/branch/dev/graph/badge.svg?token=O1mkr31GRw)](https://app.codecov.io/gh/OuhscBbmc/OuhscMunge/tree/dev) | diff --git a/man/OuhscMunge.Rd b/man/OuhscMunge.Rd index a52e99b..4877f4c 100644 --- a/man/OuhscMunge.Rd +++ b/man/OuhscMunge.Rd @@ -7,7 +7,7 @@ Thanks to Funders, including \href{https://perf-data.hrsa.gov/mchb/DGISReports/Abstract/AbstractDetails.aspx?Source=TVIS&GrantNo=D89MC23154&FY=2012}{HRSA/ACF D89MC23154} \emph{OUHSC CCAN Independent Evaluation of the State of Oklahoma Competitive Maternal, Infant, and Early Childhood Home Visiting -(\href{http://mchb.hrsa.gov/programs/homevisiting/}{MIECHV}) Project.}, which +(\href{https://mchb.hrsa.gov/programs-impact/programs/home-visiting/maternal-infant-early-childhood-home-visiting-miechv-program}{MIECHV}) Project.}, which evaluates MIECHV expansion and enhancement of Evidence-based Home Visitation programs in four Oklahoma counties. } \note{ diff --git a/man/hash_and_salt_sha_256.Rd b/man/hash_and_salt_sha_256.Rd index ff2a19f..fec3bed 100644 --- a/man/hash_and_salt_sha_256.Rd +++ b/man/hash_and_salt_sha_256.Rd @@ -34,7 +34,7 @@ This approach protects the actual value of \code{x}, while still allowing a down to determine which cells were derived from the same \code{x}. For example, suppose a patient's -\href{https://ushik.ahrq.gov/ViewItemDetails?system=ps&itemKey=88720000}{mrn} +\href{https://www.ahrq.gov:443/data/ushik.html}{mrn} of '111' is hashed, and the output is 'abc'. (To view it's real value, execute \code{OuhscMunge::hash_and_salt_sha_256(111)}.) When given the value 'abc', is it computational infeasible to determine the input diff --git a/man/row_sum.Rd b/man/row.Rd similarity index 79% rename from man/row_sum.Rd rename to man/row.Rd index b9fbeb0..c1c063a 100644 --- a/man/row_sum.Rd +++ b/man/row.Rd @@ -1,24 +1,36 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/row.R -\name{row_sum} +\name{row} +\alias{row} \alias{row_sum} +\alias{row_mean} \title{Find the sum of selected columns within a row} \usage{ row_sum( d, - columns_to_average = character(0), + columns_to_process = character(0), pattern = "", new_column_name = "row_sum", threshold_proportion = 0.75, nonmissing_count_name = NA_character_, verbose = FALSE ) + +row_mean( + d, + columns_to_process = character(0), + pattern = "", + new_column_name = "row_mean", + threshold_proportion = 0.75, + nonmissing_count_name = NA_character_, + verbose = FALSE +) } \arguments{ \item{d}{The data.frame containing the values to sum. Required.} -\item{columns_to_average}{A character vector containing the columns -names to sum. +\item{columns_to_process}{A character vector containing the columns +names to process (\emph{e.g.}, to average or to sum). If empty, \code{pattern} is used to select columns. Optional.} \item{pattern}{A regular expression pattern passed to \code{\link[base:grep]{base::grep()}} @@ -62,17 +74,24 @@ Otherwise the new column will be a \link{double}. \examples{ mtcars |> OuhscMunge::row_sum( - columns_to_average = c("cyl", "disp", "vs", "carb"), + columns_to_process = c("cyl", "disp", "vs", "carb"), new_column_name = "engine_sum" ) mtcars |> OuhscMunge::row_sum( - columns_to_average = c("cyl", "disp", "vs", "carb"), + columns_to_process = c("cyl", "disp", "vs", "carb"), new_column_name = "engine_sum", nonmissing_count_name = "engine_nonmissing_count" ) +mtcars |> + OuhscMunge::row_mean( + columns_to_process = c("cyl", "disp", "vs", "carb"), + new_column_name = "engine_mean", + nonmissing_count_name = "engine_nonmissing_count" + ) + if (require(tidyr)) tidyr::billboard |> OuhscMunge::row_sum( diff --git a/man/snake_case.Rd b/man/snake_case.Rd index 74cd1ae..c120cb7 100644 --- a/man/snake_case.Rd +++ b/man/snake_case.Rd @@ -14,8 +14,8 @@ A vector of converted names. } \description{ This function attempts to convert variables to snake_case, even if it's already in snake_case. -The important regex lines were posted by Stack Overflow user \href{http://stackoverflow.com/users/129879/epost}{epost} -in \href{http://stackoverflow.com/a/1176023/1082435}{"Elegant Python function to convert CamelCase to snake_case?"}. +The important regex lines were posted by Stack Overflow user \href{https://stackoverflow.com/users/129879/epost}{epost} +in \href{https://stackoverflow.com/a/1176023/1082435}{"Elegant Python function to convert CamelCase to snake_case?"}. } \note{ This series of regexes has an advantages over the current diff --git a/tests/testthat/test-row-mean.R b/tests/testthat/test-row-mean.R new file mode 100644 index 0000000..e40f8a8 --- /dev/null +++ b/tests/testthat/test-row-mean.R @@ -0,0 +1,316 @@ +library(testthat) + +test_that("mtcars -engine_mean", { + expected <- structure( + list(mpg = c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, + 24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, + 30.4, 33.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8, + 19.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, + 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4), + disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8, + 167.6, 167.6, 275.8, 275.8, 275.8, 472, 460, 440, 78.7, 75.7, + 71.1, 120.1, 318, 304, 350, 400, 79, 120.3, 95.1, 351, 145, + 301, 121), hp = c(110, 110, 93, 110, 175, 105, 245, 62, 95, + 123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150, + 150, 245, 175, 66, 91, 113, 264, 175, 335, 109), drat = c(3.9, + 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, + 3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76, + 3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11 + ), wt = c(2.62, 2.875, 2.32, 3.215, 3.44, 3.46, 3.57, 3.19, + 3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2, + 1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14, + 1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46, 17.02, 18.61, + 19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3, 18.9, 17.4, 17.6, + 18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87, + 17.3, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6 + ), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), am = c(1, + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), gear = c(4, 4, 4, 3, + 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, + 3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4, 4, 1, 1, 2, 1, 4, + 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, + 2, 2, 4, 6, 8, 2), engine_mean = c(42.5, 42.5, 28.5, 66.5, + 92.5, 58.25, 93, 38.425, 36.95, 44.65, 44.65, 71.7, 71.7, + 71.7, 121, 118, 113, 21.175, 20.675, 19.275, 31.525, 82, + 78.5, 90.5, 102.5, 21.25, 31.575, 25.525, 90.75, 39.25, 79.25, + 32)), row.names = c("Mazda RX4", "Mazda RX4 Wag", "Datsun 710", + "Hornet 4 Drive", "Hornet Sportabout", "Valiant", "Duster 360", + "Merc 240D", "Merc 230", "Merc 280", "Merc 280C", "Merc 450SE", + "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood", "Lincoln Continental", + "Chrysler Imperial", "Fiat 128", "Honda Civic", "Toyota Corolla", + "Toyota Corona", "Dodge Challenger", "AMC Javelin", "Camaro Z28", + "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2", "Lotus Europa", + "Ford Pantera L", "Ferrari Dino", "Maserati Bora", "Volvo 142E" + ), class = "data.frame" + ) + + actual <- + mtcars |> + row_mean( + columns_to_process = c("cyl", "disp", "vs", "carb"), + new_column_name = "engine_mean" + ) + + # expected <- dput(actual) + testthat::expect_equal(actual, expected) +}) + +test_that("mtcars -engine_mean & nonmissing count", { + expected <- + structure( + list(mpg = c(21, 21, 22.8, 21.4, 18.7, 18.1, 14.3, + 24.4, 22.8, 19.2, 17.8, 16.4, 17.3, 15.2, 10.4, 10.4, 14.7, 32.4, + 30.4, 33.9, 21.5, 15.5, 15.2, 13.3, 19.2, 27.3, 26, 30.4, 15.8, + 19.7, 15, 21.4), cyl = c(6, 6, 4, 6, 8, 6, 8, 4, 4, 6, 6, 8, + 8, 8, 8, 8, 8, 4, 4, 4, 4, 8, 8, 8, 8, 4, 4, 4, 8, 6, 8, 4), + disp = c(160, 160, 108, 258, 360, 225, 360, 146.7, 140.8, + 167.6, 167.6, 275.8, 275.8, 275.8, 472, 460, 440, 78.7, 75.7, + 71.1, 120.1, 318, 304, 350, 400, 79, 120.3, 95.1, 351, 145, + 301, 121), hp = c(110, 110, 93, 110, 175, 105, 245, 62, 95, + 123, 123, 180, 180, 180, 205, 215, 230, 66, 52, 65, 97, 150, + 150, 245, 175, 66, 91, 113, 264, 175, 335, 109), drat = c(3.9, + 3.9, 3.85, 3.08, 3.15, 2.76, 3.21, 3.69, 3.92, 3.92, 3.92, + 3.07, 3.07, 3.07, 2.93, 3, 3.23, 4.08, 4.93, 4.22, 3.7, 2.76, + 3.15, 3.73, 3.08, 4.08, 4.43, 3.77, 4.22, 3.62, 3.54, 4.11 + ), wt = c(2.62, 2.875, 2.32, 3.215, 3.44, 3.46, 3.57, 3.19, + 3.15, 3.44, 3.44, 4.07, 3.73, 3.78, 5.25, 5.424, 5.345, 2.2, + 1.615, 1.835, 2.465, 3.52, 3.435, 3.84, 3.845, 1.935, 2.14, + 1.513, 3.17, 2.77, 3.57, 2.78), qsec = c(16.46, 17.02, 18.61, + 19.44, 17.02, 20.22, 15.84, 20, 22.9, 18.3, 18.9, 17.4, 17.6, + 18, 17.98, 17.82, 17.42, 19.47, 18.52, 19.9, 20.01, 16.87, + 17.3, 15.41, 17.05, 18.9, 16.7, 16.9, 14.5, 15.5, 14.6, 18.6 + ), vs = c(0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, + 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1), am = c(1, + 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1), gear = c(4, 4, 4, 3, + 3, 3, 3, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 4, 4, 4, 3, 3, 3, + 3, 3, 4, 5, 5, 5, 5, 5, 4), carb = c(4, 4, 1, 1, 2, 1, 4, + 2, 2, 4, 4, 3, 3, 3, 4, 4, 4, 1, 2, 1, 1, 2, 2, 4, 2, 1, + 2, 2, 4, 6, 8, 2), engine_mean = c(42.5, 42.5, 28.5, 66.5, + 92.5, 58.25, 93, 38.425, 36.95, 44.65, 44.65, 71.7, 71.7, + 71.7, 121, 118, 113, 21.175, 20.675, 19.275, 31.525, 82, + 78.5, 90.5, 102.5, 21.25, 31.575, 25.525, 90.75, 39.25, 79.25, + 32), engine_nonmissing_count = c(4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4)), row.names = c("Mazda RX4", + "Mazda RX4 Wag", "Datsun 710", "Hornet 4 Drive", "Hornet Sportabout", + "Valiant", "Duster 360", "Merc 240D", "Merc 230", "Merc 280", + "Merc 280C", "Merc 450SE", "Merc 450SL", "Merc 450SLC", "Cadillac Fleetwood", + "Lincoln Continental", "Chrysler Imperial", "Fiat 128", "Honda Civic", + "Toyota Corolla", "Toyota Corona", "Dodge Challenger", "AMC Javelin", + "Camaro Z28", "Pontiac Firebird", "Fiat X1-9", "Porsche 914-2", + "Lotus Europa", "Ford Pantera L", "Ferrari Dino", "Maserati Bora", + "Volvo 142E"), class = "data.frame" + ) + + actual <- + mtcars |> + row_mean( + columns_to_process = c("cyl", "disp", "vs", "carb"), + new_column_name = "engine_mean", + nonmissing_count_name = "engine_nonmissing_count" + ) + + # expected <- dput(actual) + testthat::expect_equal(actual, expected) +}) + +test_that("billboard -all weeks", { + expected <- + c( + NA, NA, 26.4716981132075, 67.1, 56.2222222222222, 37.65, NA, + 52.05, 16.65625, 67.75, 76.2727272727273, 19.952380952381, 24.1363636363636, + 19.2916666666667, 51.25, NA, 80.2758620689655, NA, 82.05, 23.46875, + 47.85, 32.35, 30.6451612903226, 28.05, 23.75, 60.2, 32.35, 62.5, + 33.9523809523809, 72.4, 87.5555555555556, NA, 85.5333333333333, + 62.9411764705882, 65, 21.5862068965517, 74, 85.3333333333333, + 33.304347826087, 72.5833333333333, 51.6, 21, 57.15, NA, NA, 45.8, + 76.8421052631579, NA, 84.375, 86, 90.4, NA, 37, 66.2666666666667, + NA, 73.8181818181818, 59.45, 69, 84.25, NA, 65.7368421052632, + 47.3, 36.859649122807, 33.8297872340426, 88.6923076923077, NA, + 47.4117647058824, 41.7619047619048, 74.6, 82, 75.6666666666667, + 48.4, 68.55, NA, 14.8214285714286, 22.9375, 20.875, 77.4285714285714, + NA, 24.8571428571429, 75.3, 48.45, 50.6, 47.1, 43.25, 43.25, + 89.3846153846154, 26.8928571428571, 71.1428571428571, NA, 30.7, + 14.3333333333333, 68.0666666666667, 32.8421052631579, 69.8, NA, + NA, 54.7, NA, 60.875, 31.1904761904762, 62.7647058823529, 86.0833333333333, + 34.15, 39.6666666666667, NA, NA, NA, 69.15, 64.5263157894737, + 69.2666666666667, 86.5, 50, 21.6666666666667, 58.8, 49.9090909090909, + NA, 97.1666666666667, 71.3, 74.95, 96.625, 23.0188679245283, + 69.5, 49.4285714285714, NA, 78.8461538461538, 72.2631578947368, + 56, 17.8571428571429, 76.1111111111111, 49.1, 55.6666666666667, + 65.6111111111111, 15.85, 61.1176470588235, 63.4117647058824, + 31.15, 58.1, 67.5882352941177, 63.4, 34.9, 31.375, 19.4166666666667, + 69.375, 31.3, 81.5555555555556, 87.8666666666667, 23.5238095238095, + 60, 21.2045454545455, 77.7647058823529, 75.2, 45.65, 28.4375, + NA, 37.1666666666667, 69.6, 45.45, 67.8333333333333, NA, 39, + 80.4444444444444, 76.5, NA, NA, NA, NA, 65.75, NA, 85.375, 45.75, + 86.3636363636364, 86.6666666666667, 77.1, NA, 57.9230769230769, + 86.2727272727273, 92.7222222222222, 78.9411764705882, 26.7272727272727, + 60.3, 49.9, 70.2941176470588, 84.2142857142857, NA, 60.1052631578947, + 38.8636363636364, 62.3333333333333, 79.6666666666667, 92.65, + 43, 13.4583333333333, NA, 53.6111111111111, 37.1111111111111, + 55.9, NA, 63.15, 70.8461538461538, 71.95, 56.05, 47.3333333333333, + 44.2, 65, 88, 67.8461538461538, 43.2, 86.1, 84.4, NA, 54.5, 92.1111111111111, + 64.1739130434783, 25.0909090909091, 50.65, 19.6333333333333, + 71.4705882352941, 33.05, 14.2608695652174, 21.68, 19.6153846153846, + 73.25, 30.8823529411765, 28.1904761904762, 26.4814814814815, + NA, 62.3846153846154, 94.4444444444444, 79.1111111111111, NA, + 54.9, 65.25, NA, 21.7777777777778, 18.625, 75.625, NA, 80.15, + 47.25, NA, NA, 67.5714285714286, 59, 61.8, 77.6842105263158, + 36.1818181818182, 40.8, 82.95, 35.6, 63.9, 19.9230769230769, + 52.85, 10.5, 46.55, 17.3636363636364, NA, 63.05, 68.2222222222222, + NA, 81.0666666666667, 44.875, NA, 62.05, 24.3076923076923, 19.1785714285714, + 82.6, 28.7692307692308, NA, 49.5, 22.125, 41.5833333333333, 54.5, + 48.1818181818182, 36, NA, 74, 38, 86.7692307692308, 73.8235294117647, + 60.7, 44.7, 58.45, NA, NA, NA, 79.8333333333333, 25.8636363636364, + 66.2, 36.4, 85.2727272727273, 66.95, 36.9259259259259, 90.0909090909091, + NA, 47.8181818181818, NA, 72, NA, 66.2631578947368, 57.15, 21.4390243902439, + 37, 53.5833333333333, 68.5555555555556, 90.1818181818182, 63.5, + NA, 43.35, 75.4210526315789, 59.8333333333333, 85.5333333333333, + 77.3, 83.125, NA, 88.8571428571429, NA, 18.6410256410256 + ) + + actual <- + tidyr::billboard |> + row_mean( + pattern = "^wk\\d{1,2}$", + new_column_name = "week_mean", + threshold_proportion = .1, + verbose = TRUE + ) |> + dplyr::pull(week_mean) + + # expected <- dput(actual) + testthat::expect_equal(actual, expected) +}) + +test_that("billboard -all weeks nonmissing count", { + expected <- + c( + 7, 3, 53, 20, 18, 20, 5, 20, 32, 20, 11, 21, 22, 24, 20, 5, + 29, 3, 20, 32, 20, 20, 31, 20, 24, 15, 20, 20, 21, 15, 9, 3, + 15, 17, 20, 29, 15, 9, 23, 12, 20, 37, 20, 3, 3, 20, 19, 6, 8, + 11, 10, 7, 20, 15, 7, 11, 20, 17, 12, 6, 19, 20, 57, 47, 13, + 5, 17, 21, 20, 11, 18, 20, 20, 3, 28, 32, 32, 14, 6, 28, 10, + 20, 15, 20, 20, 20, 13, 28, 14, 2, 20, 21, 15, 19, 10, 4, 1, + 20, 5, 16, 21, 17, 12, 20, 21, 1, 7, 1, 20, 19, 15, 12, 20, 27, + 20, 11, 7, 12, 20, 20, 8, 53, 14, 14, 4, 13, 19, 11, 28, 9, 20, + 12, 18, 20, 17, 17, 20, 20, 17, 15, 20, 24, 24, 8, 20, 9, 15, + 21, 19, 44, 17, 15, 20, 32, 6, 24, 15, 20, 12, 5, 20, 9, 10, + 5, 4, 2, 3, 20, 5, 8, 20, 11, 9, 10, 7, 13, 11, 18, 17, 55, 20, + 20, 17, 14, 7, 19, 22, 12, 18, 20, 9, 24, 5, 18, 18, 20, 1, 20, + 13, 20, 20, 21, 20, 14, 8, 13, 20, 20, 10, 6, 20, 9, 23, 22, + 20, 30, 17, 20, 23, 25, 26, 16, 34, 21, 27, 5, 13, 9, 9, 4, 20, + 20, 6, 27, 32, 8, 4, 20, 20, 5, 5, 14, 20, 20, 19, 22, 20, 20, + 25, 20, 26, 20, 26, 20, 33, 2, 20, 9, 5, 15, 16, 6, 20, 26, 28, + 20, 26, 4, 26, 24, 24, 20, 11, 20, 3, 12, 26, 13, 17, 20, 20, + 20, 7, 6, 5, 12, 22, 20, 20, 11, 20, 27, 11, 4, 22, 2, 16, 7, + 19, 20, 41, 21, 12, 9, 11, 20, 6, 20, 19, 18, 15, 10, 8, 6, 14, + 2, 39 + ) + + actual <- + tidyr::billboard |> + row_mean( + pattern = "^wk\\d{1,2}$", + new_column_name = "week_mean", + threshold_proportion = .1, + nonmissing_count_name = "nonmissing_count", + verbose = FALSE + ) |> + dplyr::pull(nonmissing_count) + + # expected <- dput(actual) + testthat::expect_equal(actual, expected) +}) + +test_that("billboard -subset", { + expected <- + c( + 85.4285714285714, NA, 63, 66.7777777777778, 35.4444444444444, + 22.4444444444444, NA, 46.8888888888889, 28.7777777777778, 67.3333333333333, + 73.7777777777778, 27.5555555555556, 26.7777777777778, 23, 46.2222222222222, + NA, 96.5, NA, 77.4444444444444, 37.2222222222222, 42.3333333333333, + 33.5555555555556, 48.4444444444444, 17.3333333333333, 31.1111111111111, + 43.3333333333333, 38, 71.5555555555556, 49.2222222222222, 63.4444444444444, + 87.5555555555556, NA, 81, 52.4444444444444, 59.6666666666667, + 34.7777777777778, 73.2222222222222, 83.5, 57.1111111111111, 65, + 46.6666666666667, 30.1111111111111, 45.2222222222222, NA, NA, + 56, 82.8888888888889, NA, 84.375, 85.7777777777778, 90.2222222222222, + 62, 44.2222222222222, 56.7777777777778, 90.5714285714286, 70, + 56.2222222222222, 67.5555555555556, 83.7777777777778, NA, 52.5555555555556, + 52, 64.1111111111111, 74.2222222222222, 89.8888888888889, NA, + 36.7777777777778, 52.2222222222222, 80, 78.6666666666667, 64.7777777777778, + 48.6666666666667, 82.7777777777778, NA, 30.4444444444444, 53.1111111111111, + 36.1111111111111, 68.4444444444444, NA, 45.6666666666667, 73.5555555555556, + 56.4444444444444, 26, 55.8888888888889, 40.6666666666667, 47.5555555555556, + 85.7777777777778, 55.2222222222222, 64.2222222222222, NA, 19.3333333333333, + 12.4444444444444, 58.2222222222222, 19, 68, NA, NA, 55.6666666666667, + NA, 54.3333333333333, 40.3333333333333, 60, 85.5555555555556, + 49.2222222222222, 55.1111111111111, NA, 93.5714285714286, NA, + 68.2222222222222, 66.8888888888889, 65.2222222222222, 87.4444444444444, + 41.7777777777778, 31.8888888888889, 62.6666666666667, 42.2222222222222, + 51.7142857142857, 97.8888888888889, 79.5555555555556, 67.5555555555556, + 96.7142857142857, 47, 68.4444444444444, 40.2222222222222, NA, + 72, 62.3333333333333, 51.2222222222222, 28, 76.1111111111111, + 45.7777777777778, 50.2222222222222, 63.5555555555556, 24.3333333333333, + 58.7777777777778, 66.7777777777778, 39.7777777777778, 56.5555555555556, + 64.3333333333333, 54.1111111111111, 23, 45.8888888888889, 30, + 69.375, 34.8888888888889, 81.5555555555556, 87.7777777777778, + 27.4444444444444, 55.5555555555556, 51.2222222222222, 68.6666666666667, + 62.7777777777778, 51.5555555555556, 64, NA, 49.6666666666667, + 67.1111111111111, 51.3333333333333, 64.6666666666667, NA, 36, + 80.4444444444444, 73.8888888888889, NA, NA, NA, NA, 56.7777777777778, + NA, 85.375, 25.8888888888889, 85.3333333333333, 84, 74.8888888888889, + 98.1428571428571, 49.7777777777778, 84, 93.8888888888889, 70.2222222222222, + 42.1111111111111, 62.8888888888889, 56.5555555555556, 61.3333333333333, + 87.6666666666667, 86.8571428571429, 56.1111111111111, 52.6666666666667, + 51.1111111111111, 79.4444444444444, 93.2222222222222, 43, 11.3333333333333, + NA, 49, 20, 51.8888888888889, NA, 57.8888888888889, 69, 69.4444444444444, + 65.6666666666667, 60.3333333333333, 47, 63.5555555555556, 88, + 66.1111111111111, 50.1111111111111, 81.7777777777778, 83.2222222222222, + NA, 51.6666666666667, 92.1111111111111, 45.1111111111111, 41.5555555555556, + 59.6666666666667, 40, 61.5555555555556, 44.8888888888889, 14.5555555555556, + 35.3333333333333, 18.8888888888889, 65.3333333333333, 60.1111111111111, + 41.2222222222222, 44.7777777777778, NA, 48.8888888888889, 94.4444444444444, + 79.1111111111111, NA, 48.5555555555556, 66.8888888888889, NA, + 36.7777777777778, 12.6666666666667, 75.625, NA, 83.2222222222222, + 23.8888888888889, NA, NA, 66, 67.4444444444444, 70.6666666666667, + 77, 53.1111111111111, 47.8888888888889, 83.2222222222222, 61.7777777777778, + 56.6666666666667, 22.8888888888889, 47.1111111111111, 4.88888888888889, + 39.7777777777778, 27, NA, 59.3333333333333, 68.2222222222222, + NA, 77.6666666666667, 33.2222222222222, NA, 60, 42.7777777777778, + 32.8888888888889, 70.8888888888889, 38.1111111111111, NA, 72.5555555555556, + 28.2222222222222, 57.2222222222222, 37.8888888888889, 38.2222222222222, + 23.1111111111111, NA, 67.1111111111111, 58.6666666666667, 83.5555555555556, + 67.8888888888889, 54.8888888888889, 49.7777777777778, 42.6666666666667, + 63.2857142857143, NA, NA, 74.7777777777778, 28.1111111111111, + 57.1111111111111, 39.1111111111111, 83.2222222222222, 55.3333333333333, + 49.6666666666667, 89.1111111111111, NA, 66.6666666666667, NA, + 67.7777777777778, 69.4285714285714, 57.4444444444444, 57.8888888888889, + 42.4444444444444, 39.7777777777778, 50.7777777777778, 68.5555555555556, + 89.1111111111111, 53.1111111111111, NA, 58.3333333333333, 65.7777777777778, + 47.6666666666667, 87.2222222222222, 75, 83.125, NA, 84.8888888888889, + NA, 26.6666666666667 + ) + + expected_message <- "The following columns will be processed:\n- wk1\n- wk2\n- wk3\n- wk4\n- wk5\n- wk6\n- wk7\n- wk8\n- wk9" + + expect_message( + regexp = expected_message,{ + actual <- + tidyr::billboard |> + row_mean( + pattern = "^wk\\d$", + new_column_name = "week_mean", + nonmissing_count_name = "nonmissing_count", + verbose = TRUE + ) |> + dplyr::pull(week_mean) + }) + + # expected <- dput(actual) + testthat::expect_equal(actual, expected) +}) diff --git a/tests/testthat/test-row.R b/tests/testthat/test-row-sum.R similarity index 97% rename from tests/testthat/test-row.R rename to tests/testthat/test-row-sum.R index dd2926a..ba88252 100644 --- a/tests/testthat/test-row.R +++ b/tests/testthat/test-row-sum.R @@ -47,7 +47,7 @@ test_that("mtcars -engine_sum", { actual <- mtcars |> row_sum( - columns_to_average = c("cyl", "disp", "vs", "carb"), + columns_to_process = c("cyl", "disp", "vs", "carb"), new_column_name = "engine_sum" ) @@ -105,7 +105,7 @@ test_that("mtcars -engine_sum & nonmissing count", { actual <- mtcars |> row_sum( - columns_to_average = c("cyl", "disp", "vs", "carb"), + columns_to_process = c("cyl", "disp", "vs", "carb"), new_column_name = "engine_sum", nonmissing_count_name = "engine_nonmissing_count" ) @@ -229,7 +229,7 @@ test_that("billboard -subset", { NA, 764, NA, 240 ) - expected_message <- "The following columns will be summed:\n- wk1\n- wk2\n- wk3\n- wk4\n- wk5\n- wk6\n- wk7\n- wk8\n- wk9" + expected_message <- "The following columns will be processed:\n- wk1\n- wk2\n- wk3\n- wk4\n- wk5\n- wk6\n- wk7\n- wk8\n- wk9" expect_message( regexp = expected_message,{