From 374aeec764c10ada3cc7612283cee0aa8041a6ae Mon Sep 17 00:00:00 2001
From: "Dr. Hannah De los Santos" <HDELOSSANTOS@MITRE.ORG>
Date: Wed, 13 Sep 2023 08:56:13 -0400
Subject: [PATCH] Clean up trues, update infants naming

---
 R/growth.R        | 46 +++++++++++++++++++++++-----------------------
 R/infants_clean.R | 24 ++++++++++++------------
 2 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/R/growth.R b/R/growth.R
index a004f19..8bef7ed 100644
--- a/R/growth.R
+++ b/R/growth.R
@@ -84,7 +84,7 @@
 #' @param adult_columns_filename Name of file to save original adult data, with additional output columns to
 #' as CSV. Defaults to "", for which this data will not be saved. Useful
 #' for post-analysis. For more information on this output, please see README.
-#' @param infants TRUE/FALSE. Run the beta-release of the infants algorithm (expands pediatric algorithm to clean 0 - 2). Defaults to FALSE.
+#' @param prelim_infants TRUE/FALSE. Run the in-development release of the infants algorithm (expands pediatric algorithm to improve performance for children 0 – 2 years). Not recommended for use in research. For more information regarding the logic of the algorithm, see the vignette 'Preliminary Infants Algorithm.' Defaults to FALSE.
 #'
 #' @return Vector of exclusion codes for each of the input measurements.
 #'
@@ -157,7 +157,7 @@ cleangrowth <- function(subjid,
                         adult_cutpoint = 20,
                         weight_cap = Inf,
                         adult_columns_filename = "",
-                        infants = FALSE) {
+                        prelim_infants = FALSE) {
   # avoid "no visible binding" warnings
   N <- age_years <- batch <- exclude <- index <- line <- NULL
   newbatch <- sd.median <- sd.orig <- tanner.months <- tbc.sd <- NULL
@@ -168,7 +168,7 @@ cleangrowth <- function(subjid,
   sd.orig_uncorr <- agemonths <- intwt <- fengadays <- pmagedays <- cagedays <-
     unmod_zscore <- fen_wt_m <- fen_wt_l <- fen_wt_s <- cwho_cv <- ccdc_cv <-
     sd.c_cdc <- sd.c_who <- sd.c <- sd.corr <- seq_win <- sd.corr_abssumdiff <-
-    sd.orig_abssumdiff <- ..orig_colnames <- ctbc.sd <- sum_sde <- no_sde <-
+    sd.orig_abssumdiff <- orig_colnames <- ctbc.sd <- sum_sde <- no_sde <-
     sum_val <- no_dup_val <- no_outliers <- no_bigdiff <- nottoofar <- nnte <-
     nnte_full <- NULL
 
@@ -227,7 +227,7 @@ cleangrowth <- function(subjid,
 
   # constants for pediatric
   # enumerate the different exclusion levels
-  if (infants){
+  if (prelim_infants){
     # different for infants
     exclude.levels.peds <- c(
       'Include',
@@ -372,7 +372,7 @@ cleangrowth <- function(subjid,
       if (is.na(num.batches)) {
         num.batches <- getDoParWorkers()
       }
-      if (infants){
+      if (prelim_infants){
         # variables needed for parallel workers
         var_for_par <- c("temporary_extraneous", "valid", "swap_parameters",
                          "na_as_false", "ewma", "read_anthro", "as_matrix_delta",
@@ -426,7 +426,7 @@ cleangrowth <- function(subjid,
     tanner.fields <- colnames(tanner.ht.vel)
     tanner.fields <- tanner.fields[!tanner.fields %in% c('sex', 'tanner.months')]
 
-    if (!infants){
+    if (!prelim_infants){
       who_max_ht_vel_path <- ifelse(
         ref.data.path == "",
         system.file(file.path("extdata", "who_ht_maxvel_3sd.csv.gz"), package = "growthcleanr"),
@@ -510,15 +510,15 @@ cleangrowth <- function(subjid,
     data.all[param == 'LENGTHCM', param := 'HEIGHTCM']
 
     # calculate z/sd scores
-    if(infants){
+    if(prelim_infants){
       if (!quietly)
         cat(sprintf("[%s] Calculating z-scores...\n", Sys.time()))
       # removing z calculations, as they are not used
       # for infants, use z and who
       measurement.to.z <- read_anthro(ref.data.path, cdc.only = TRUE,
-                                      infants = T)
+                                      prelim_infants = TRUE)
       measurement.to.z_who <- read_anthro(ref.data.path, cdc.only = FALSE,
-                                          infants = T)
+                                          prelim_infants = TRUE)
 
       # calculate "standard deviation" scores
       if (!quietly)
@@ -589,7 +589,7 @@ cleangrowth <- function(subjid,
       # merge with fenton curves
       data.all <- merge(
         data.all, fentlms_foraga, by = c("sex", "intwt"),
-        all.x = T)
+        all.x = TRUE)
 
       data.all[fengadays < 259, pmagedays := agedays + fengadays]
       data.all[fengadays < 259, cagedays := pmagedays - 280]
@@ -599,7 +599,7 @@ cleangrowth <- function(subjid,
       # merge with fenton curves
       data.all <- merge(
         data.all, fentlms_forz, by = c("sex", "fengadays"),
-        all.x = T)
+        all.x = TRUE)
 
       # add unmodified zscore using weight in unrounded grams
       data.all[, unmod_zscore :=
@@ -617,11 +617,11 @@ cleangrowth <- function(subjid,
       data.all <- merge(data.all, growthfile_who,
                         by.x = c("sex", "cagedays"),
                         by.y = c("sex", "agedays"),
-                        all.x = T)
+                        all.x = TRUE)
       data.all <- merge(data.all, growthfile_cdc,
                         by.x = c("sex", "cagedays"),
                         by.y = c("sex", "agedays"),
-                        all.x = T)
+                        all.x = TRUE)
 
       # adjust WHO and CDC heights based on age
       data.all[, cwho_cv := v]
@@ -759,7 +759,7 @@ cleangrowth <- function(subjid,
     if (!is.data.table(sd.recenter)) {
       # INFANTS CHANGES:
       # use recentering file derived from work, independent of sex
-      if (infants){
+      if (prelim_infants){
         infants_reference_medians_path <- ifelse(
           ref.data.path == "",
           system.file(file.path("extdata",
@@ -838,7 +838,7 @@ cleangrowth <- function(subjid,
 
     setkey(data.all, subjid, param, agedays)
     data.all[, tbc.sd := sd.orig - sd.median]
-    if (infants){
+    if (prelim_infants){
       # separate out corrected and noncorrected values
       data.all[, ctbc.sd := sd.corr - sd.median]
     }
@@ -871,7 +871,7 @@ cleangrowth <- function(subjid,
     # safety check: treat observations where tbc.sd cannot be calculated as missing
     data.all[is.na(tbc.sd), exclude := 'Missing']
 
-    if (infants){
+    if (prelim_infants){
       # 4: identify subset that don't need to be cleaned (nnte) ----
 
       # identify those meeting all subjects meeting these criteria as "no need
@@ -894,7 +894,7 @@ cleangrowth <- function(subjid,
       data.all[, no_outliers := no_outliers == 1]
       # all max - min tbd.sc < 2.5
       data.all[, no_bigdiff :=
-                 rep((abs(max(tbc.sd, na.rm = T) - min(tbc.sd, na.rm = T)) < 2.5),
+                 rep((abs(max(tbc.sd, na.rm = TRUE) - min(tbc.sd, na.rm = TRUE)) < 2.5),
                      .N),
                by = c("subjid", "param")]
       # the previous value can't be too far from the current value
@@ -930,7 +930,7 @@ cleangrowth <- function(subjid,
         num.batches
       ))
     if (num.batches == 1) {
-      if (!infants){
+      if (!prelim_infants){
         ret.df <- cleanbatch(data.all,
                              log.path = log.path,
                              quietly = quietly,
@@ -975,7 +975,7 @@ cleangrowth <- function(subjid,
         ifelse(!dir.exists(log.path), dir.create(log.path, recursive = TRUE), FALSE)
       }
 
-      if (!infants){
+      if (!prelim_infants){
         ret.df <- ddply(
           data.all,
           .(batch),
@@ -1152,7 +1152,7 @@ cleangrowth <- function(subjid,
 #'
 #' @param path Path to supplied reference anthro data. Defaults to package anthro tables.
 #' @param cdc.only Whether or not only CDC data should be used. Defaults to false.
-#' @param infants TRUE/FALSE. Run the beta-release of the infants algorithm (expands pediatric algorithm to clean 0 - 2). Defaults to FALSE.
+#' @param prelim_infants TRUE/FALSE. Run the in-development release of the infants algorithm (expands pediatric algorithm to improve performance for children 0 – 2 years). Not recommended for use in research. For more information regarding the logic of the algorithm, see the vignette 'Preliminary Infants Algorithm.' Defaults to FALSE.
 #'
 #' @return Function for calculating BMI based on measurement, age in days, sex, and measurement value.
 #' @export
@@ -1165,13 +1165,13 @@ cleangrowth <- function(subjid,
 #' # Return calculating function while specifying a path and using only CDC data
 #' afunc <- read_anthro(path = system.file("extdata", package = "growthcleanr"),
 #'                      cdc.only = TRUE)
-read_anthro <- function(path = "", cdc.only = FALSE, infants = FALSE) {
+read_anthro <- function(path = "", cdc.only = FALSE, prelim_infants = FALSE) {
   # avoid "no visible bindings" warning
   src <- param <- sex <- age <- ret <- m <- NULL
   csdneg <- csdpos <- s <- NULL
 
   # set correct path based on input reference table path (if any)
-  if (!infants){
+  if (!prelim_infants){
     weianthro_path <- ifelse(
       path == "",
       system.file(file.path("extdata", "weianthro.txt.gz"), package = "growthcleanr"),
@@ -1207,7 +1207,7 @@ read_anthro <- function(path = "", cdc.only = FALSE, infants = FALSE) {
   }
   growth_cdc_ext <- read.csv(gzfile(growth_cdc_ext_path))
 
-  l <- if (!infants){
+  l <- if (!prelim_infants){
     list(
       with(
         read.table(gzfile(weianthro_path), header = TRUE),
diff --git a/R/infants_clean.R b/R/infants_clean.R
index a1269ac..f83e48b 100644
--- a/R/infants_clean.R
+++ b/R/infants_clean.R
@@ -365,7 +365,7 @@ cleanbatch_infants <- function(data.df,
       upd.df <- copy(df)
       upd.df <- calc_oob_evil_twins(upd.df)
       # count the amount of oobs for each subject/param and distribute it out
-      upd.df[, `:=` (sum_oob = sum(oob, na.rm = T)), by =.(subjid, param)]
+      upd.df[, `:=` (sum_oob = sum(oob, na.rm = TRUE)), by =.(subjid, param)]
 
       any_oob <- any(upd.df$sum_oob >= 2)
       # while there are multiple oob, we want to remove
@@ -373,7 +373,7 @@ cleanbatch_infants <- function(data.df,
 
         # 9D
         # now calculate the maximum difference from the median tbc.sd
-        upd.df[, `:=` (sd_med = median(tbc.sd, na.rm = T)), by =.(subjid, param)]
+        upd.df[, `:=` (sd_med = median(tbc.sd, na.rm = TRUE)), by =.(subjid, param)]
         upd.df[, `:=` (med_diff = abs(tbc.sd - sd_med)), by =.(subjid, param)]
         upd.df[, `:=` (max_diff = med_diff  == max(med_diff)), by =.(subjid, param)]
         # for ones with no tbc.sd, mark as false
@@ -386,7 +386,7 @@ cleanbatch_infants <- function(data.df,
         #9E
         # reupdate valid (to recalculate OOB -- others are not included)
         upd.df <- calc_oob_evil_twins(df[valid(df),])
-        upd.df[, `:=` (sum_oob = sum(oob, na.rm = T)), by =.(subjid, param)]
+        upd.df[, `:=` (sum_oob = sum(oob, na.rm = TRUE)), by =.(subjid, param)]
 
         any_oob <- any(upd.df$sum_oob >= 2)
 
@@ -445,7 +445,7 @@ cleanbatch_infants <- function(data.df,
           "before" = abs(df_sub$agedays - c(NA, df_sub$agedays[1:(nrow(df_sub)-1)])),
           "after" = abs(df_sub$agedays - c(df_sub$agedays[2:(nrow(df_sub))], NA))
         )
-        maxdiff <- sapply(1:nrow(tmp), function(x){max(tmp[x,], na.rm = T)})
+        maxdiff <- sapply(1:nrow(tmp), function(x){max(tmp[x,], na.rm = TRUE)})
         exp_vals <- rep(-1.5, nrow(tmp))
         exp_vals[maxdiff > 365.25] <- -2.5
         exp_vals[maxdiff > 730.5] <- -3.5
@@ -652,7 +652,7 @@ cleanbatch_infants <- function(data.df,
         idx_roll <- c(embed(1:nrow(subj_df),4)[adjacent,, drop = FALSE])
         idx_adj <- which(subj_df$agedays %in% subj_df$agedays[idx_roll])
       } else {
-        adjacent <- F
+        adjacent <- FALSE
         idx_adj <- idx_roll <- c()
       }
 
@@ -680,7 +680,7 @@ cleanbatch_infants <- function(data.df,
             # only exclude sdes that are adjacent
             subj_df$agedays %in% subj_df$agedays[idx_roll]
           } else {
-            rep(F, nrow(subj_df))
+            rep(FALSE, nrow(subj_df))
           }
       # re-include similar groups
       criteria[subj_df$index %in% similar_ids] <- FALSE
@@ -709,7 +709,7 @@ cleanbatch_infants <- function(data.df,
 
         # check for SDEs by EWMA -- alternate calculate excluding other SDEs
         all_sdes <- duplicated(subj_df$agedays) |
-          duplicated(subj_df$agedays, fromLast = T)
+          duplicated(subj_df$agedays, fromLast = TRUE)
 
         # first, calculate which exponent we want to put through (pass a different
         # on for each exp)
@@ -717,7 +717,7 @@ cleanbatch_infants <- function(data.df,
           "before" = abs(subj_df$agedays - c(NA, subj_df$agedays[1:(nrow(subj_df)-1)])),
           "after" = abs(subj_df$agedays - c(subj_df$agedays[2:(nrow(subj_df))], NA))
         )
-        maxdiff <- sapply(1:nrow(tmp), function(x){max(tmp[x,], na.rm = T)})
+        maxdiff <- sapply(1:nrow(tmp), function(x){max(tmp[x,], na.rm = TRUE)})
         exp_vals <- rep(-1.5, nrow(tmp))
         exp_vals[maxdiff > 365.25] <- -2.5
         exp_vals[maxdiff > 730.5] <- -3.5
@@ -792,7 +792,7 @@ cleanbatch_infants <- function(data.df,
       if (any(subj_df$extraneous)){
         # check for SDEs
         all_sdes <- duplicated(subj_df$agedays) |
-          duplicated(subj_df$agedays, fromLast = T)
+          duplicated(subj_df$agedays, fromLast = TRUE)
 
         rem_ids <- c()
         rem_ids_extreme <- c()
@@ -930,7 +930,7 @@ cleanbatch_infants <- function(data.df,
           "before" = abs(df_sub$agedays - c(NA, df_sub$agedays[1:(nrow(df_sub)-1)])),
           "after" = abs(df_sub$agedays - c(df_sub$agedays[2:(nrow(df_sub))], NA))
         )
-        maxdiff <- sapply(1:nrow(tmp), function(x){max(tmp[x,], na.rm = T)})
+        maxdiff <- sapply(1:nrow(tmp), function(x){max(tmp[x,], na.rm = TRUE)})
         exp_vals <- rep(-1.5, nrow(tmp))
         exp_vals[maxdiff > 365.25] <- -2.5
         exp_vals[maxdiff > 730.5] <- -3.5
@@ -1191,7 +1191,7 @@ cleanbatch_infants <- function(data.df,
             "before" = abs(df_sub$agedays - c(NA, df_sub$agedays[1:(nrow(df_sub)-1)])),
             "after" = abs(df_sub$agedays - c(df_sub$agedays[2:(nrow(df_sub))], NA))
           )
-          maxdiff <- sapply(1:nrow(tmp), function(x){max(tmp[x,], na.rm = T)})
+          maxdiff <- sapply(1:nrow(tmp), function(x){max(tmp[x,], na.rm = TRUE)})
           exp_vals <- rep(-1.5, nrow(tmp))
           exp_vals[maxdiff > 365.25] <- -2.5
           exp_vals[maxdiff > 730.5] <- -3.5
@@ -1556,7 +1556,7 @@ cleanbatch_infants <- function(data.df,
         "before" = abs(df$agedays - c(NA, df$agedays[1:(nrow(df)-1)])),
         "after" = abs(df$agedays - c(df$agedays[2:(nrow(df))], NA))
       )
-      maxdiff_e <- sapply(1:nrow(tmp), function(x){max(tmp[x,], na.rm = T)})
+      maxdiff_e <- sapply(1:nrow(tmp), function(x){max(tmp[x,], na.rm = TRUE)})
       exp_vals <- rep(-1.5, nrow(tmp))
       exp_vals[maxdiff_e > 365.25] <- -2.5
       exp_vals[maxdiff_e > 730.5] <- -3.5