From 2474fafe370105712312842717d2a5febee9b88b Mon Sep 17 00:00:00 2001 From: Melkiades Date: Thu, 21 Nov 2024 13:18:34 +0100 Subject: [PATCH 01/18] feat: adding stat_names and support for row/col splits and summarize_row_groups --- R/00tabletrees.R | 3 +- R/colby_constructors.R | 9 +- R/tree_accessors.R | 28 ++++ R/tt_afun_utils.R | 29 +++- R/tt_as_df.R | 115 ++++++++++--- R/utils.R | 6 +- tests/testthat/test-result_data_frame.R | 207 ++++++++++++++++++++++++ vignettes/ard_how_to.Rmd | 111 ++++++++++--- 8 files changed, 447 insertions(+), 61 deletions(-) diff --git a/R/00tabletrees.R b/R/00tabletrees.R index 26eac6a83..72cd85724 100644 --- a/R/00tabletrees.R +++ b/R/00tabletrees.R @@ -1933,7 +1933,7 @@ RefFootnote <- function(note, index = NA_integer_, symbol = NA_character_) { ## indent_mod: indent modifier to be used for parent row CellValue <- function(val, format = NULL, colspan = 1L, label = NULL, indent_mod = NULL, footnotes = NULL, - align = NULL, format_na_str = NULL) { + align = NULL, format_na_str = NULL, stat_names = NULL) { if (is.null(colspan)) { colspan <- 1L } @@ -1957,6 +1957,7 @@ CellValue <- function(val, format = NULL, colspan = 1L, label = NULL, indent_mod = indent_mod, footnotes = footnotes, align = align, format_na_str = format_na_str, + stat_names = stat_names, class = "CellValue" ) ret diff --git a/R/colby_constructors.R b/R/colby_constructors.R index d23d243a1..bed7cded6 100644 --- a/R/colby_constructors.R +++ b/R/colby_constructors.R @@ -1488,7 +1488,8 @@ setMethod( } ret <- rcell(cnt, format = format, - label = label + label = label, + stat_names = "n" ) ret } @@ -1511,11 +1512,11 @@ setMethod( cnt <- sum(!is.na(df)) } ## the formatter does the *100 so we don't here. - ## TODO name elements of this so that ARD generation has access to them - ## ret <- rcell(c(n = cnt, pct = cnt / .N_col), + ## Elements are named with stat_names so that ARD generation has access to them ret <- rcell(c(cnt, cnt / .N_col), format = format, - label = label + label = label, + stat_names = c("n", "p") ) ret } diff --git a/R/tree_accessors.R b/R/tree_accessors.R index 462feb3d9..24f9c33f1 100644 --- a/R/tree_accessors.R +++ b/R/tree_accessors.R @@ -4322,3 +4322,31 @@ setMethod( obj } ) + +# stat_names for ARD ----------------------------------------------------------- +# +#' @rdname int_methods +#' @export +setGeneric("obj_stat_names", function(obj) standardGeneric("obj_stat_names")) +# +#' @rdname int_methods +#' @export +setGeneric("obj_stat_names<-", function(obj, value) standardGeneric("obj_stat_names<-")) + +#' @rdname int_methods +#' @export +setMethod("obj_stat_names<-", "CellValue", function(obj, value) { + attr(obj, "stat_names") <- value + obj +}) + +#' @rdname int_methods +#' @export +setMethod("obj_stat_names", "CellValue", function(obj) attr(obj, "stat_names")) + +#' @rdname int_methods +#' @export +setMethod( + "obj_stat_names", "RowsVerticalSection", + function(obj) lapply(obj, obj_stat_names) +) \ No newline at end of file diff --git a/R/tt_afun_utils.R b/R/tt_afun_utils.R index 5ebd80992..6594ec25e 100644 --- a/R/tt_afun_utils.R +++ b/R/tt_afun_utils.R @@ -10,10 +10,16 @@ #' @param label (`string` or `NULL`)\cr label. If non-`NULL`, it will be looked at when determining row labels. #' @param colspan (`integer(1)`)\cr column span value. #' @param footnotes (`list` or `NULL`)\cr referential footnote messages for the cell. +#' @param stat_names (`character`)\cr names for the statistics in the cell. It can be a vector of strings. #' #' @inherit CellValue return #' #' @note Currently column spanning is only supported for defining header structure. +#' +#' @examples +#' rcell(1, format = "xx.x") +#' rcell(c(1, 2), format = c("xx - xx")) +#' rcell(c(1, 2), stat_names = c("Rand1", "Rand2")) #' #' @rdname rcell #' @export @@ -24,7 +30,9 @@ rcell <- function(x, indent_mod = NULL, footnotes = NULL, align = NULL, - format_na_str = NULL) { + format_na_str = NULL, + stat_names = NULL) { + checkmate::assert_character(stat_names, null.ok = TRUE) if (!is.null(align)) { check_aligns(align) } @@ -47,6 +55,9 @@ rcell <- function(x, if (!is.null(format_na_str)) { obj_na_str(x) <- format_na_str } + if (!is.null(stat_names)) { + obj_stat_names(x) <- stat_names + } ret <- x } else { if (is.null(label)) { @@ -66,7 +77,8 @@ rcell <- function(x, label = label, indent_mod = indent_mod, footnotes = footnotes, - format_na_str = format_na_str + format_na_str = format_na_str, + stat_names = stat_names ) # RefFootnote(footnote)) } if (!is.null(align)) { @@ -113,6 +125,8 @@ non_ref_rcell <- function(x, is_ref, format = NULL, colspan = 1L, #' @param .aligns (`character` or `NULL`)\cr alignments for the cells. Standard for `NULL` is `"center"`. #' See [formatters::list_valid_aligns()] for currently supported alignments. #' @param .format_na_strs (`character` or `NULL`)\cr NA strings for the cells. +#' @param .stat_names (`character` or `NULL`)\cr names for the statistics in the cells. +#' It can be a vector of values. #' #' @note In post-processing, referential footnotes can also be added using row and column #' paths with [`fnotes_at_path<-`]. @@ -126,6 +140,8 @@ non_ref_rcell <- function(x, is_ref, format = NULL, colspan = 1L, #' in_rows(1, 2, 3, .names = c("a", "b", "c")) #' in_rows(1, 2, 3, .labels = c("a", "b", "c")) #' in_rows(1, 2, 3, .names = c("a", "b", "c"), .labels = c("AAA", "BBB", "CCC")) +#' in_rows(.list = list(a = c(NA, NA)), .formats = "xx - xx", .format_na_strs = list(c("asda", "lkjklj"))) +#' in_rows(.list = list(a = c(NA, NA)), .format_na_strs = c("asda", "lkjklj")) #' #' in_rows(.list = list(a = 1, b = 2, c = 3)) #' in_rows(1, 2, .list = list(3), .names = c("a", "b", "c")) @@ -150,7 +166,8 @@ in_rows <- function(..., .list = NULL, .names = NULL, .cell_footnotes = list(NULL), .row_footnotes = list(NULL), .aligns = NULL, - .format_na_strs = NULL) { + .format_na_strs = NULL, + .stat_names = list(NULL)) { if (is.function(.formats)) { .formats <- list(.formats) } @@ -172,11 +189,12 @@ in_rows <- function(..., .list = NULL, .names = NULL, length(.formats) > 0 || length(.names) > 0 || length(.indent_mods) > 0 || - length(.format_na_strs) > 0 + length(.format_na_strs) > 0 || + length(.stat_names) > 0 ) { stop( "in_rows got 0 rows but length >0 of at least one of ", - ".labels, .formats, .names, .indent_mods, .format_na_strs. ", + ".labels, .formats, .names, .indent_mods, .format_na_strs, .stat_names. ", "Does your analysis/summary function handle the 0 row ", "df/length 0 x case?" ) @@ -213,6 +231,7 @@ in_rows <- function(..., .list = NULL, .names = NULL, footnotes = .cell_footnotes %||% list(NULL), align = .aligns, format_na_str = .format_na_strs %||% list(NULL), + stat_names = .stat_names %||% list(NULL), SIMPLIFY = FALSE ) } diff --git a/R/tt_as_df.R b/R/tt_as_df.R index 409ad660a..a1de7a04a 100644 --- a/R/tt_as_df.R +++ b/R/tt_as_df.R @@ -63,22 +63,10 @@ as_result_df <- function(tt, spec = NULL, } if (is.null(spec)) { - raw_cvals <- cell_values(tt) - ## if the table has one row and multiple columns, sometimes the cell values returns a list of the cell values - ## rather than a list of length 1 representing the single row. This is bad but may not be changeable - ## at this point. - if (nrow(tt) == 1 && length(raw_cvals) > 1) { - raw_cvals <- list(raw_cvals) - } - - # Flatten the list of lists (rows) of cell values into a data frame - cellvals <- as.data.frame(do.call(rbind, raw_cvals)) - row.names(cellvals) <- NULL - - if (nrow(tt) == 1 && ncol(tt) == 1) { - colnames(cellvals) <- names(raw_cvals) - } - + # raw values + rawvals <- cell_values(tt) + cellvals <- .make_df_from_raw_data(rawvals, nr = nrow(tt), nc = ncol(tt)) + if (data_format %in% c("strings", "numeric")) { # we keep previous calculations to check the format of the data mf_tt <- matrix_form(tt) @@ -104,9 +92,6 @@ as_result_df <- function(tt, spec = NULL, } rdf <- make_row_df(tt) - cinfo_df <- col_info(tt) - ci_coltree <- coltree(cinfo_df) - column_split_names <- .get_column_split_name(ci_coltree) # used only in make_ard df <- rdf[, c("name", "label", "abs_rownumber", "path", "reprint_inds", "node_class")] # Removing initial root elements from path (out of the loop -> right maxlen) @@ -141,6 +126,12 @@ as_result_df <- function(tt, spec = NULL, cellvals ) } + + # Fix for content rows analysis variable label + if (any(ret$node_class == "ContentRow")) { + where_to <- ret$node_class == "ContentRow" + ret$avar_name[where_to] <- ret$spl_value_1[where_to] + } # If we want to expand colnames if (expand_colnames) { @@ -176,6 +167,10 @@ as_result_df <- function(tt, spec = NULL, # ARD part for one stat per row if (make_ard) { + cinfo_df <- col_info(tt) + ci_coltree <- coltree(cinfo_df) + column_split_names <- .get_column_split_name(ci_coltree) # used only in make_ard + # Unnecessary columns ret_tmp <- ret[, !colnames(ret) %in% c("row_num", "is_group_summary", "node_class")] @@ -184,18 +179,57 @@ as_result_df <- function(tt, spec = NULL, # Core row names col_label_rows <- grepl("", ret_tmp$avar_name)) core_row_names <- ret_tmp[!col_label_rows, -only_col_indexes] + colnames_to_rename <- colnames(core_row_names) %in% c("avar_name", "row_name", "label_name") + # instead of avar_name row_name label_name ("variable_label" is not present in ARDs) + colnames(core_row_names)[colnames_to_rename] <- c("variable", "variable_level", "variable_label") + + # Adding stats_names if present + raw_stat_names <- .get_stat_names_from_table(tt, add.labrows = keep_label_rows) + cell_stat_names <- .make_df_from_raw_data(raw_stat_names, nr = nrow(tt), nc = ncol(tt)) # Moving colnames to rows (flattening) ret_w_cols <- NULL + # Looping on statistical columns for (col_i in only_col_indexes) { - tmp_ret_by_col_i <- cbind( - group1 = column_split_names[[ret_tmp[, col_i][[1]]]], - group1_level = ret_tmp[, col_i][[1]], - # instead of avar_name row_name label_name ("variable_label" is not present in ARDs) - setNames(core_row_names, c("variable", "variable_level", "variable_label")), # missing stat_name xxx - stat = I(setNames(ret_tmp[!col_label_rows, col_i], NULL)) + # Making row splits into row specifications (group1 group1_level) + current_col_split_level <- unlist(ret_tmp[seq_len(number_of_col_splits), col_i], use.names = FALSE) + flattened_cols_names <- c(column_split_names[[1]][[1]], current_col_split_level) + names(flattened_cols_names) <- c( + paste0("group", seq_along(column_split_names[[1]][[1]])), + paste0("group", seq_along(current_col_split_level), "_level") + ) + + tmp_core_ret_by_col_i <- cbind( + t(data.frame(flattened_cols_names)), + core_row_names, + row.names = NULL ) + + # retrieving stat names and stats + stat_name <- setNames(cell_stat_names[, col_i - min(only_col_indexes) + 1, drop = TRUE], NULL) + stat <- setNames(ret_tmp[!col_label_rows, col_i, drop = TRUE], NULL) + necessary_stat_lengths <- sapply(stat, length) + + # Truncating or adding NA if stat names has more or less elements than stats + stat_name <- lapply(seq_along(stat_name), function(sn_i) { + stat_name[[sn_i]][seq_len(necessary_stat_lengths[sn_i])] + }) + + # unnesting stat_name and stat + tmp_ret_by_col_i <- NULL + for (row_i in seq_along(stat)){ + tmp_ret_by_col_i <- rbind( + tmp_ret_by_col_i, + cbind( + tmp_core_ret_by_col_i[row_i,], + stat_name = stat_name[[row_i]], + stat = stat[[row_i]], + row.names = NULL + ) + ) + } ret_w_cols <- rbind(ret_w_cols, tmp_ret_by_col_i) } @@ -220,12 +254,41 @@ as_result_df <- function(tt, spec = NULL, out } +# Helper function used to structure the raw values into a dataframe +.make_df_from_raw_data <- function(raw_vals, nr, nc) { + ## if the table has one row and multiple columns, sometimes the cell values returns a list of the cell values + ## rather than a list of length 1 representing the single row. This is bad but may not be changeable + ## at this point. + if (nr == 1 && length(raw_vals) > 1) { + raw_vals <- list(raw_vals) + } + + # Flatten the list of lists (rows) of cell values into a data frame + cellvals <- as.data.frame(do.call(rbind, raw_vals)) + row.names(cellvals) <- NULL + + if (nr == 1 && nc == 1) { + colnames(cellvals) <- names(raw_vals) + } + + cellvals +} + +# Amazing helper function to get the statistic names from row cells! +.get_stat_names_from_table <- function(tt, add.labrows = FALSE) { + # omit_labrows # omit label rows + rows <- collect_leaves(tt, incl.cont = TRUE, add.labrows = add.labrows) + lapply(rows, function(ri) { + lapply(row_cells(ri), obj_stat_names) + }) +} + # Helper function to get column split names .get_column_split_name <- function(ci_coltree) { # ci stands for column information if (is(ci_coltree, "LayoutAxisTree")) { kids <- tree_children(ci_coltree) - return(unlist(lapply(kids, .get_column_split_name))) + return(lapply(kids, .get_column_split_name)) } sapply(pos_splits(tree_pos(ci_coltree)), spl_payload) } diff --git a/R/utils.R b/R/utils.R index 84441850b..933e160c7 100644 --- a/R/utils.R +++ b/R/utils.R @@ -31,21 +31,21 @@ setGeneric("simple_analysis", function(x, ...) standardGeneric("simple_analysis" #' @exportMethod simple_analysis setMethod( "simple_analysis", "numeric", - function(x, ...) in_rows("Mean" = rcell(mean(x, ...), format = "xx.xx")) + function(x, ...) in_rows("Mean" = rcell(mean(x, ...), stat_names = "mean", format = "xx.xx")) ) #' @rdname rtinner #' @exportMethod simple_analysis setMethod( "simple_analysis", "logical", - function(x, ...) in_rows("Count" = rcell(sum(x, ...), format = "xx")) + function(x, ...) in_rows("Count" = rcell(sum(x, ...), stat_names = "n", format = "xx")) ) #' @rdname rtinner #' @exportMethod simple_analysis setMethod( "simple_analysis", "factor", - function(x, ...) in_rows(.list = as.list(table(x))) + function(x, ...) in_rows(.list = as.list(table(x)), .stat_names = "n") ) #' @rdname rtinner diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index 0e34647fb..f7622d1cd 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -238,3 +238,210 @@ test_that("as_result_df works fine with empty tables and no character(0) is allo "a" ) }) + +test_that("make_ard produces realisting ARD output with as_result_df", { + # Testing fundamental getters/setters + rc <- rcell(c(1, 2), stat_names = c("Rand1", "Rand2")) + expect_equal(obj_stat_names(rc), c("Rand1", "Rand2")) + + rc_row <- in_rows( + .list = list(a = c(NA, 1), b = c(1, NA)), + .formats = c("xx - xx", "xx.x - xx.x"), + .format_na_strs = list(c("asda", "lkjklj")), + .stat_names = list(c("A", "B"), c("B", "C")) # if c("A", "B") one each row, if single list duplicated + ) %>% print() + + expect_equal( + list("a" = c("A", "B"), "b" = c("B", "C")), # now it is named + lapply(rc_row, obj_stat_names) + ) + + # Lets make a custom function and check ARDs + mean_sd_custom <- function(x) { + mean <- mean(x, na.rm = FALSE) + sd <- sd(x, na.rm = FALSE) + + rcell(c(mean, sd), label = "Mean (SD)", format = "xx.x (xx.x)", + stat_names = c("Mean", "SD")) + } + counts_percentage_custom <- function(x) { + # browser() + cnts <- table(x) + out <- lapply(cnts, function(x) { + perc <- x / sum(cnts) + rcell(c(x, perc), format = "xx. (xx.%)") + }) + in_rows(.list = as.list(out), .labels = names(cnts), + .stat_names = list(c("Count", "Percentage"))) + } + + lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx") %>% + split_cols_by("ARM", split_fun = keep_split_levels(c("A: Drug X", "B: Placebo"))) %>% + analyze(vars = "AGE", afun = mean_sd_custom) %>% + analyze(vars = "SEX", afun = counts_percentage_custom) + + tbl <- build_table(lyt, ex_adsl) + ard_out <- as_result_df(tbl, make_ard = TRUE) + + # Numeric output + expect_equal( + ard_out[2, , drop = TRUE], + list( + group1 = "ARM", + group1_level = "A: Drug X", + variable = "AGE", + variable_level = "Mean (SD)", + variable_label = "Mean (SD)", + stat_name = "SD", + stat = 6.553326 + ), + tolerance = 10e-6 + ) + + # Percentage output + expect_equal( + ard_out[14, , drop = TRUE], + list( + group1 = "ARM", + group1_level = "B: Placebo", + variable = "SEX", + variable_level = "F", + variable_label = "F", + stat_name = "Percentage", + stat = 0.5746269 + ), + tolerance = 10e-6 + ) + + # Default values + lyt <- basic_table() %>% + split_cols_by("ARM") %>% + analyze(c("AGE", "SEX")) + + tbl <- build_table(lyt, ex_adsl) + ard_out <- as_result_df(tbl, make_ard = TRUE) + + expect_equal(unique(ard_out$stat_name), c("mean", "n")) +}) + +test_that("make_ard works with multiple row levels", { + lyt <- basic_table() %>% + split_rows_by("STRATA1") %>% + split_rows_by("STRATA2") %>% + split_cols_by("ARM") %>% + analyze(c("AGE", "SEX")) + + tbl <- build_table(lyt, ex_adsl) + ard_out <- as_result_df(tbl, make_ard = TRUE) + + expect_equal(unique(ard_out$stat_name), c("mean", "n")) + expect_contains(colnames(ard_out), c("spl_var_2", "spl_value_2")) + + # Count output + expect_equal( + ard_out[90, , drop = TRUE], + list( + group1 = "ARM", + group1_level = "C: Combination", + spl_var_1 = "STRATA1", + spl_value_1 = "C", + spl_var_2 = "STRATA2", + spl_value_2 = "S2", + variable = "SEX", + variable_level = "UNDIFFERENTIATED", + variable_label = "UNDIFFERENTIATED", + stat_name = "n", + stat = 0 + ), + tolerance = 10e-6 + ) +}) + +test_that("make_ard works with multiple column levels", { + lyt <- basic_table() %>% + split_rows_by("STRATA1") %>% + split_cols_by("ARM") %>% + split_cols_by("STRATA2") %>% + analyze(c("AGE", "SEX")) + + tbl <- build_table(lyt, ex_adsl) + ard_out <- as_result_df(tbl, make_ard = TRUE) + + expect_equal(unique(ard_out$stat_name), c("mean", "n")) + expect_contains(colnames(ard_out), c("spl_var_1", "spl_value_1")) + expect_contains(colnames(ard_out), c("group2", "group2_level")) + + # Count output + expect_equal( + ard_out[16, , drop = TRUE], + list( + group1 = "ARM", + group2 = "STRATA2", + group1_level = "A: Drug X", + group2_level = "S2", + spl_var_1 = "STRATA1", + spl_value_1 = "A", + variable = "AGE", + variable_level = "Mean", + variable_label = "Mean", + stat_name = "mean", + stat = 34.4 + ), + tolerance = 10e-6 + ) +}) + +test_that("make_ard works with summarize_row_groups", { + lyt <- basic_table() %>% + split_rows_by("STRATA2") %>% + summarize_row_groups() %>% + split_cols_by("ARM") %>% + split_cols_by("STRATA1") %>% + analyze(c("AGE", "SEX")) + + tbl <- build_table(lyt, ex_adsl) + ard_out <- as_result_df(tbl, make_ard = TRUE) + + expect_contains(unique(ard_out$stat_name), c("mean", "n", "p")) + expect_contains(colnames(ard_out), c("spl_var_1", "spl_value_1")) + expect_contains(colnames(ard_out), c("group2", "group2_level")) + + # label row output + expect_equal( + ard_out[1, , drop = TRUE], + list( + group1 = "ARM", + group2 = "STRATA1", + group1_level = "A: Drug X", + group2_level = "A", + spl_var_1 = "STRATA2", + spl_value_1 = "S1", + variable = "S1", + variable_level = "S1", + variable_label = "S1", + stat_name = "n", + stat = 18 + ), + tolerance = 10e-6 + ) + + # label row output + expect_equal( + ard_out[86, , drop = TRUE], + list( + group1 = "ARM", + group2 = "STRATA1", + group1_level = "C: Combination", + group2_level = "A", + spl_var_1 = "STRATA2", + spl_value_1 = "S1", + variable = "S1", + variable_level = "S1", + variable_label = "S1", + stat_name = "p", + stat = 0.35 + ), + tolerance = 10e-6 + ) +}) + diff --git a/vignettes/ard_how_to.Rmd b/vignettes/ard_how_to.Rmd index 22bf7d118..187b27d1b 100644 --- a/vignettes/ard_how_to.Rmd +++ b/vignettes/ard_how_to.Rmd @@ -46,28 +46,11 @@ First of all we need a table to retrieve all the necessary information. Borrowin ```{r} library(rtables) ADSL <- ex_adsl # Example ADSL dataset -mean_sd_custom <- function(x) { - mean <- mean(x, na.rm = FALSE) - sd <- sd(x, na.rm = FALSE) - - rcell(c(mean, sd), label = "Mean (SD)", format = "xx.x (xx.x)") -} -counts_percentage_custom <- function(x) { - # browser() - cnts <- table(x) - out <- lapply(cnts, function(x) { - perc <- x / sum(cnts) - rcell(c(x, perc), format = "xx. (xx.%)") - }) - in_rows(.list = as.list(out), .labels = names(cnts)) -} -lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx") %>% - # split_rows_by("STRATA1", split_fun = keep_split_levels(c("A"))) %>% - # split_cols_by("STRATA2") %>% - split_cols_by("ARM", split_fun = keep_split_levels(c("A: Drug X", "B: Placebo"))) %>% - analyze(vars = "AGE", afun = mean_sd_custom) %>% - analyze(vars = "SEX", afun = counts_percentage_custom) +# Very simple table +lyt <- basic_table() %>% + split_cols_by("ARM") %>% + analyze(c("AGE", "SEX")) tbl <- build_table(lyt, ADSL) tbl @@ -75,7 +58,7 @@ tbl ## Convert the table to a result data frame -The `as_result_df` function is the one that converts a table to a result data frame. The result data frame is a data frame that contains the result of the summary table and is ready to be used for quality control purposes. This may differ for different standard and lets see how to produce different outputs. Final goal is having clearly one result for row. Lets play with different options. +The `as_result_df` function is the one that converts a table to a result data frame. The result data frame is a data frame that contains the result of the summary table and is ready to be used for quality control purposes. This may differ for different standard and lets see how to produce different outputs. The following default ouputs are available for visual appreciation, or to transform into a table again with `df_to_tt()`. ```{r} as_result_df(tbl) @@ -90,3 +73,87 @@ Now lets get the final `ARD` output. This is the one that is ready to be used fo ```{r} as_result_df(tbl, make_ard = TRUE) ``` + +## Customizing the output + +`as_result_df` and `ARD` outputs follow the content of the table, so it is possible to modify those to get some custom outputs. For example, we can add some user-defined statistics with their own statistical names: + +```{r} +# rcell and in_rows are the core of any analysis function +rc <- rcell(c(1, 2), stat_names = c("Rand1", "Rand2")) +print(obj_stat_names(rc)) # c("Rand1", "Rand2") + +rc_row <- in_rows( + .list = list(a = c(NA, 1), b = c(1, NA)), + .formats = c("xx - xx", "xx.x - xx.x"), + .format_na_strs = list(c("asda", "lkjklj")), + .stat_names = list(c("A", "B"), c("B", "C")) +) + +# Only a getter for this object +print(obj_stat_names(rc_row)) # list(a = c("A", "B"), b = c("B", "C")) + +# if c("A", "B") one each row, if single list duplicated +rc_row <- in_rows( + .list = list(a = c(NA, 1), b = c(1, NA)), + .formats = c("xx - xx", "xx.x - xx.x"), + .format_na_strs = list(c("asda", "lkjklj")), + .stat_names = c("A", "B") +) +print(obj_stat_names(rc_row)) # c("A", "B") # one for each row +print(lapply(rc_row, obj_stat_names)) # identical to above + row names + +rc_row <- in_rows( + .list = list(a = c(NA, 1), b = c(1, NA)), + .formats = c("xx - xx", "xx.x - xx.x"), + .format_na_strs = list(c("asda", "lkjklj")), + .stat_names = list(c("A", "B")) # It is duplicated, check it your self! +) +``` + +Lets put it into practice: + +```{r} +mean_sd_custom <- function(x) { + mean <- mean(x, na.rm = FALSE) + sd <- sd(x, na.rm = FALSE) + + rcell(c(mean, sd), label = "Mean (SD)", format = "xx.x (xx.x)", + stat_names = c("Mean", "SD")) + } + counts_percentage_custom <- function(x) { + # browser() + cnts <- table(x) + out <- lapply(cnts, function(x) { + perc <- x / sum(cnts) + rcell(c(x, perc), format = "xx. (xx.%)") + }) + in_rows(.list = as.list(out), .labels = names(cnts), + .stat_names = list(c("Count", "Percentage"))) + } + + lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx") %>% + split_cols_by("ARM", split_fun = keep_split_levels(c("A: Drug X", "B: Placebo"))) %>% + analyze(vars = "AGE", afun = mean_sd_custom) %>% + analyze(vars = "SEX", afun = counts_percentage_custom) + + tbl <- build_table(lyt, ex_adsl) + + as_result_df(tbl, make_ard = TRUE) +``` + +# More complex outputs + +Lets add hierarchical row and column splits. +```{r} +lyt <- basic_table() %>% + split_rows_by("STRATA2") %>% + summarize_row_groups() %>% + split_cols_by("ARM") %>% + split_cols_by("STRATA1") %>% + analyze(c("AGE", "SEX")) + +tbl <- build_table(lyt, ex_adsl) + +as_result_df(tbl, make_ard = TRUE) +``` From a24897deee9bb03849eccab42ff2e0ef8798d9bc Mon Sep 17 00:00:00 2001 From: Melkiades Date: Thu, 21 Nov 2024 15:44:58 +0100 Subject: [PATCH 02/18] fix: style --- R/tree_accessors.R | 6 +- R/tt_afun_utils.R | 2 +- R/tt_as_df.R | 30 +++++----- tests/testthat/test-result_data_frame.R | 75 +++++++++++++------------ vignettes/ard_how_to.Rmd | 68 +++++++++++----------- 5 files changed, 94 insertions(+), 87 deletions(-) diff --git a/R/tree_accessors.R b/R/tree_accessors.R index 24f9c33f1..9b3b3b573 100644 --- a/R/tree_accessors.R +++ b/R/tree_accessors.R @@ -4324,11 +4324,11 @@ setMethod( ) # stat_names for ARD ----------------------------------------------------------- -# +# #' @rdname int_methods #' @export setGeneric("obj_stat_names", function(obj) standardGeneric("obj_stat_names")) -# +# #' @rdname int_methods #' @export setGeneric("obj_stat_names<-", function(obj, value) standardGeneric("obj_stat_names<-")) @@ -4349,4 +4349,4 @@ setMethod("obj_stat_names", "CellValue", function(obj) attr(obj, "stat_names")) setMethod( "obj_stat_names", "RowsVerticalSection", function(obj) lapply(obj, obj_stat_names) -) \ No newline at end of file +) diff --git a/R/tt_afun_utils.R b/R/tt_afun_utils.R index 6594ec25e..d38907c74 100644 --- a/R/tt_afun_utils.R +++ b/R/tt_afun_utils.R @@ -15,7 +15,7 @@ #' @inherit CellValue return #' #' @note Currently column spanning is only supported for defining header structure. -#' +#' #' @examples #' rcell(1, format = "xx.x") #' rcell(c(1, 2), format = c("xx - xx")) diff --git a/R/tt_as_df.R b/R/tt_as_df.R index a1de7a04a..7ab26d8af 100644 --- a/R/tt_as_df.R +++ b/R/tt_as_df.R @@ -66,7 +66,7 @@ as_result_df <- function(tt, spec = NULL, # raw values rawvals <- cell_values(tt) cellvals <- .make_df_from_raw_data(rawvals, nr = nrow(tt), nc = ncol(tt)) - + if (data_format %in% c("strings", "numeric")) { # we keep previous calculations to check the format of the data mf_tt <- matrix_form(tt) @@ -126,7 +126,7 @@ as_result_df <- function(tt, spec = NULL, cellvals ) } - + # Fix for content rows analysis variable label if (any(ret$node_class == "ContentRow")) { where_to <- ret$node_class == "ContentRow" @@ -170,7 +170,7 @@ as_result_df <- function(tt, spec = NULL, cinfo_df <- col_info(tt) ci_coltree <- coltree(cinfo_df) column_split_names <- .get_column_split_name(ci_coltree) # used only in make_ard - + # Unnecessary columns ret_tmp <- ret[, !colnames(ret) %in% c("row_num", "is_group_summary", "node_class")] @@ -184,7 +184,7 @@ as_result_df <- function(tt, spec = NULL, colnames_to_rename <- colnames(core_row_names) %in% c("avar_name", "row_name", "label_name") # instead of avar_name row_name label_name ("variable_label" is not present in ARDs) colnames(core_row_names)[colnames_to_rename] <- c("variable", "variable_level", "variable_label") - + # Adding stats_names if present raw_stat_names <- .get_stat_names_from_table(tt, add.labrows = keep_label_rows) cell_stat_names <- .make_df_from_raw_data(raw_stat_names, nr = nrow(tt), nc = ncol(tt)) @@ -200,36 +200,36 @@ as_result_df <- function(tt, spec = NULL, paste0("group", seq_along(column_split_names[[1]][[1]])), paste0("group", seq_along(current_col_split_level), "_level") ) - + tmp_core_ret_by_col_i <- cbind( t(data.frame(flattened_cols_names)), core_row_names, row.names = NULL ) - + # retrieving stat names and stats stat_name <- setNames(cell_stat_names[, col_i - min(only_col_indexes) + 1, drop = TRUE], NULL) stat <- setNames(ret_tmp[!col_label_rows, col_i, drop = TRUE], NULL) necessary_stat_lengths <- sapply(stat, length) - + # Truncating or adding NA if stat names has more or less elements than stats stat_name <- lapply(seq_along(stat_name), function(sn_i) { stat_name[[sn_i]][seq_len(necessary_stat_lengths[sn_i])] }) - + # unnesting stat_name and stat tmp_ret_by_col_i <- NULL - for (row_i in seq_along(stat)){ + for (row_i in seq_along(stat)) { tmp_ret_by_col_i <- rbind( tmp_ret_by_col_i, cbind( - tmp_core_ret_by_col_i[row_i,], - stat_name = stat_name[[row_i]], + tmp_core_ret_by_col_i[row_i, ], + stat_name = stat_name[[row_i]], stat = stat[[row_i]], row.names = NULL ) ) - } + } ret_w_cols <- rbind(ret_w_cols, tmp_ret_by_col_i) } @@ -262,15 +262,15 @@ as_result_df <- function(tt, spec = NULL, if (nr == 1 && length(raw_vals) > 1) { raw_vals <- list(raw_vals) } - + # Flatten the list of lists (rows) of cell values into a data frame cellvals <- as.data.frame(do.call(rbind, raw_vals)) row.names(cellvals) <- NULL - + if (nr == 1 && nc == 1) { colnames(cellvals) <- names(raw_vals) } - + cellvals } diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index f7622d1cd..e3f8051b5 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -243,26 +243,28 @@ test_that("make_ard produces realisting ARD output with as_result_df", { # Testing fundamental getters/setters rc <- rcell(c(1, 2), stat_names = c("Rand1", "Rand2")) expect_equal(obj_stat_names(rc), c("Rand1", "Rand2")) - + rc_row <- in_rows( - .list = list(a = c(NA, 1), b = c(1, NA)), + .list = list(a = c(NA, 1), b = c(1, NA)), .formats = c("xx - xx", "xx.x - xx.x"), .format_na_strs = list(c("asda", "lkjklj")), .stat_names = list(c("A", "B"), c("B", "C")) # if c("A", "B") one each row, if single list duplicated ) %>% print() - + expect_equal( list("a" = c("A", "B"), "b" = c("B", "C")), # now it is named lapply(rc_row, obj_stat_names) ) - + # Lets make a custom function and check ARDs mean_sd_custom <- function(x) { mean <- mean(x, na.rm = FALSE) sd <- sd(x, na.rm = FALSE) - - rcell(c(mean, sd), label = "Mean (SD)", format = "xx.x (xx.x)", - stat_names = c("Mean", "SD")) + + rcell(c(mean, sd), + label = "Mean (SD)", format = "xx.x (xx.x)", + stat_names = c("Mean", "SD") + ) } counts_percentage_custom <- function(x) { # browser() @@ -271,18 +273,20 @@ test_that("make_ard produces realisting ARD output with as_result_df", { perc <- x / sum(cnts) rcell(c(x, perc), format = "xx. (xx.%)") }) - in_rows(.list = as.list(out), .labels = names(cnts), - .stat_names = list(c("Count", "Percentage"))) + in_rows( + .list = as.list(out), .labels = names(cnts), + .stat_names = list(c("Count", "Percentage")) + ) } - + lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx") %>% split_cols_by("ARM", split_fun = keep_split_levels(c("A: Drug X", "B: Placebo"))) %>% analyze(vars = "AGE", afun = mean_sd_custom) %>% analyze(vars = "SEX", afun = counts_percentage_custom) - + tbl <- build_table(lyt, ex_adsl) ard_out <- as_result_df(tbl, make_ard = TRUE) - + # Numeric output expect_equal( ard_out[2, , drop = TRUE], @@ -294,10 +298,10 @@ test_that("make_ard produces realisting ARD output with as_result_df", { variable_label = "Mean (SD)", stat_name = "SD", stat = 6.553326 - ), + ), tolerance = 10e-6 ) - + # Percentage output expect_equal( ard_out[14, , drop = TRUE], @@ -309,18 +313,18 @@ test_that("make_ard produces realisting ARD output with as_result_df", { variable_label = "F", stat_name = "Percentage", stat = 0.5746269 - ), + ), tolerance = 10e-6 ) - + # Default values lyt <- basic_table() %>% split_cols_by("ARM") %>% analyze(c("AGE", "SEX")) - + tbl <- build_table(lyt, ex_adsl) ard_out <- as_result_df(tbl, make_ard = TRUE) - + expect_equal(unique(ard_out$stat_name), c("mean", "n")) }) @@ -330,13 +334,13 @@ test_that("make_ard works with multiple row levels", { split_rows_by("STRATA2") %>% split_cols_by("ARM") %>% analyze(c("AGE", "SEX")) - + tbl <- build_table(lyt, ex_adsl) ard_out <- as_result_df(tbl, make_ard = TRUE) - + expect_equal(unique(ard_out$stat_name), c("mean", "n")) expect_contains(colnames(ard_out), c("spl_var_2", "spl_value_2")) - + # Count output expect_equal( ard_out[90, , drop = TRUE], @@ -352,7 +356,7 @@ test_that("make_ard works with multiple row levels", { variable_label = "UNDIFFERENTIATED", stat_name = "n", stat = 0 - ), + ), tolerance = 10e-6 ) }) @@ -363,14 +367,14 @@ test_that("make_ard works with multiple column levels", { split_cols_by("ARM") %>% split_cols_by("STRATA2") %>% analyze(c("AGE", "SEX")) - + tbl <- build_table(lyt, ex_adsl) ard_out <- as_result_df(tbl, make_ard = TRUE) - + expect_equal(unique(ard_out$stat_name), c("mean", "n")) expect_contains(colnames(ard_out), c("spl_var_1", "spl_value_1")) expect_contains(colnames(ard_out), c("group2", "group2_level")) - + # Count output expect_equal( ard_out[16, , drop = TRUE], @@ -386,26 +390,26 @@ test_that("make_ard works with multiple column levels", { variable_label = "Mean", stat_name = "mean", stat = 34.4 - ), + ), tolerance = 10e-6 ) }) test_that("make_ard works with summarize_row_groups", { lyt <- basic_table() %>% - split_rows_by("STRATA2") %>% - summarize_row_groups() %>% + split_rows_by("STRATA2") %>% + summarize_row_groups() %>% split_cols_by("ARM") %>% - split_cols_by("STRATA1") %>% + split_cols_by("STRATA1") %>% analyze(c("AGE", "SEX")) - + tbl <- build_table(lyt, ex_adsl) ard_out <- as_result_df(tbl, make_ard = TRUE) - + expect_contains(unique(ard_out$stat_name), c("mean", "n", "p")) expect_contains(colnames(ard_out), c("spl_var_1", "spl_value_1")) expect_contains(colnames(ard_out), c("group2", "group2_level")) - + # label row output expect_equal( ard_out[1, , drop = TRUE], @@ -421,10 +425,10 @@ test_that("make_ard works with summarize_row_groups", { variable_label = "S1", stat_name = "n", stat = 18 - ), + ), tolerance = 10e-6 ) - + # label row output expect_equal( ard_out[86, , drop = TRUE], @@ -440,8 +444,7 @@ test_that("make_ard works with summarize_row_groups", { variable_label = "S1", stat_name = "p", stat = 0.35 - ), + ), tolerance = 10e-6 ) }) - diff --git a/vignettes/ard_how_to.Rmd b/vignettes/ard_how_to.Rmd index 187b27d1b..d79d6ac71 100644 --- a/vignettes/ard_how_to.Rmd +++ b/vignettes/ard_how_to.Rmd @@ -84,10 +84,10 @@ rc <- rcell(c(1, 2), stat_names = c("Rand1", "Rand2")) print(obj_stat_names(rc)) # c("Rand1", "Rand2") rc_row <- in_rows( - .list = list(a = c(NA, 1), b = c(1, NA)), + .list = list(a = c(NA, 1), b = c(1, NA)), .formats = c("xx - xx", "xx.x - xx.x"), .format_na_strs = list(c("asda", "lkjklj")), - .stat_names = list(c("A", "B"), c("B", "C")) + .stat_names = list(c("A", "B"), c("B", "C")) ) # Only a getter for this object @@ -95,7 +95,7 @@ print(obj_stat_names(rc_row)) # list(a = c("A", "B"), b = c("B", "C")) # if c("A", "B") one each row, if single list duplicated rc_row <- in_rows( - .list = list(a = c(NA, 1), b = c(1, NA)), + .list = list(a = c(NA, 1), b = c(1, NA)), .formats = c("xx - xx", "xx.x - xx.x"), .format_na_strs = list(c("asda", "lkjklj")), .stat_names = c("A", "B") @@ -104,7 +104,7 @@ print(obj_stat_names(rc_row)) # c("A", "B") # one for each row print(lapply(rc_row, obj_stat_names)) # identical to above + row names rc_row <- in_rows( - .list = list(a = c(NA, 1), b = c(1, NA)), + .list = list(a = c(NA, 1), b = c(1, NA)), .formats = c("xx - xx", "xx.x - xx.x"), .format_na_strs = list(c("asda", "lkjklj")), .stat_names = list(c("A", "B")) # It is duplicated, check it your self! @@ -115,31 +115,35 @@ Lets put it into practice: ```{r} mean_sd_custom <- function(x) { - mean <- mean(x, na.rm = FALSE) - sd <- sd(x, na.rm = FALSE) - - rcell(c(mean, sd), label = "Mean (SD)", format = "xx.x (xx.x)", - stat_names = c("Mean", "SD")) - } - counts_percentage_custom <- function(x) { - # browser() - cnts <- table(x) - out <- lapply(cnts, function(x) { - perc <- x / sum(cnts) - rcell(c(x, perc), format = "xx. (xx.%)") - }) - in_rows(.list = as.list(out), .labels = names(cnts), - .stat_names = list(c("Count", "Percentage"))) - } - - lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx") %>% - split_cols_by("ARM", split_fun = keep_split_levels(c("A: Drug X", "B: Placebo"))) %>% - analyze(vars = "AGE", afun = mean_sd_custom) %>% - analyze(vars = "SEX", afun = counts_percentage_custom) - - tbl <- build_table(lyt, ex_adsl) - - as_result_df(tbl, make_ard = TRUE) + mean <- mean(x, na.rm = FALSE) + sd <- sd(x, na.rm = FALSE) + + rcell(c(mean, sd), + label = "Mean (SD)", format = "xx.x (xx.x)", + stat_names = c("Mean", "SD") + ) +} +counts_percentage_custom <- function(x) { + # browser() + cnts <- table(x) + out <- lapply(cnts, function(x) { + perc <- x / sum(cnts) + rcell(c(x, perc), format = "xx. (xx.%)") + }) + in_rows( + .list = as.list(out), .labels = names(cnts), + .stat_names = list(c("Count", "Percentage")) + ) +} + +lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx") %>% + split_cols_by("ARM", split_fun = keep_split_levels(c("A: Drug X", "B: Placebo"))) %>% + analyze(vars = "AGE", afun = mean_sd_custom) %>% + analyze(vars = "SEX", afun = counts_percentage_custom) + +tbl <- build_table(lyt, ex_adsl) + +as_result_df(tbl, make_ard = TRUE) ``` # More complex outputs @@ -147,10 +151,10 @@ mean_sd_custom <- function(x) { Lets add hierarchical row and column splits. ```{r} lyt <- basic_table() %>% - split_rows_by("STRATA2") %>% - summarize_row_groups() %>% + split_rows_by("STRATA2") %>% + summarize_row_groups() %>% split_cols_by("ARM") %>% - split_cols_by("STRATA1") %>% + split_cols_by("STRATA1") %>% analyze(c("AGE", "SEX")) tbl <- build_table(lyt, ex_adsl) From d92a64a124b9c9cf26c1f427e5d1cae63a8b1473 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Thu, 21 Nov 2024 15:51:55 +0100 Subject: [PATCH 03/18] fix: document and spelling --- NAMESPACE | 4 ++++ inst/WORDLIST | 2 ++ man/CellValue.Rd | 5 ++++- man/in_rows.Rd | 8 +++++++- man/int_methods.Rd | 15 +++++++++++++++ man/rcell.Rd | 11 ++++++++++- vignettes/ard_how_to.Rmd | 4 ++-- 7 files changed, 44 insertions(+), 5 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index a53328608..412daa2b5 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -20,6 +20,7 @@ export("header_section_div<-") export("horizontal_sep<-") export("indent_mod<-") export("label_at_path<-") +export("obj_stat_names<-") export("ref_index<-") export("ref_symbol<-") export("row_footnotes<-") @@ -125,6 +126,7 @@ export(manual_cols) export(no_colinfo) export(non_ref_rcell) export(obj_avar) +export(obj_stat_names) export(pag_tt_indices) export(paginate_table) export(path_enriched_df) @@ -221,6 +223,7 @@ exportMethods("obj_format<-") exportMethods("obj_label<-") exportMethods("obj_na_str<-") exportMethods("obj_name<-") +exportMethods("obj_stat_names<-") exportMethods("prov_footer<-") exportMethods("ref_index<-") exportMethods("ref_symbol<-") @@ -267,6 +270,7 @@ exportMethods(obj_format) exportMethods(obj_label) exportMethods(obj_na_str) exportMethods(obj_name) +exportMethods(obj_stat_names) exportMethods(prov_footer) exportMethods(rbind) exportMethods(rbind2) diff --git a/inst/WORDLIST b/inst/WORDLIST index 913654388..c1da7f3bd 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -56,12 +56,14 @@ mandatorily monospace multivariable orderable +ouputs pathing postfix postprocessing pre priori programmatically +quartiles reindexed repo repped diff --git a/man/CellValue.Rd b/man/CellValue.Rd index e2627590d..ca147f8e6 100644 --- a/man/CellValue.Rd +++ b/man/CellValue.Rd @@ -12,7 +12,8 @@ CellValue( indent_mod = NULL, footnotes = NULL, align = NULL, - format_na_str = NULL + format_na_str = NULL, + stat_names = NULL ) } \arguments{ @@ -37,6 +38,8 @@ corresponds to the unmodified default behavior.} \item{format_na_str}{(\code{string})\cr string which should be displayed when formatted if this cell's value(s) are all \code{NA}.} + +\item{stat_names}{(\code{character})\cr names for the statistics in the cell. It can be a vector of strings.} } \value{ An object representing the value within a single cell within a populated table. The underlying structure diff --git a/man/in_rows.Rd b/man/in_rows.Rd index 7ad68b0e9..d7bf51373 100644 --- a/man/in_rows.Rd +++ b/man/in_rows.Rd @@ -14,7 +14,8 @@ in_rows( .cell_footnotes = list(NULL), .row_footnotes = list(NULL), .aligns = NULL, - .format_na_strs = NULL + .format_na_strs = NULL, + .stat_names = list(NULL) ) } \arguments{ @@ -38,6 +39,9 @@ in_rows( See \code{\link[formatters:list_formats]{formatters::list_valid_aligns()}} for currently supported alignments.} \item{.format_na_strs}{(\code{character} or \code{NULL})\cr NA strings for the cells.} + +\item{.stat_names}{(\code{character} or \code{NULL})\cr names for the statistics in the cells. +It can be a vector of values.} } \value{ A \code{RowsVerticalSection} object (or \code{NULL}). The details of this object should be considered an @@ -54,6 +58,8 @@ paths with \code{\link{fnotes_at_path<-}}. in_rows(1, 2, 3, .names = c("a", "b", "c")) in_rows(1, 2, 3, .labels = c("a", "b", "c")) in_rows(1, 2, 3, .names = c("a", "b", "c"), .labels = c("AAA", "BBB", "CCC")) +in_rows(.list = list(a = c(NA, NA)), .formats = "xx - xx", .format_na_strs = list(c("asda", "lkjklj"))) +in_rows(.list = list(a = c(NA, NA)), .format_na_strs = c("asda", "lkjklj")) in_rows(.list = list(a = 1, b = 2, c = 3)) in_rows(1, 2, .list = list(3), .names = c("a", "b", "c")) diff --git a/man/int_methods.Rd b/man/int_methods.Rd index 1cb0ce878..e67995f18 100644 --- a/man/int_methods.Rd +++ b/man/int_methods.Rd @@ -351,6 +351,11 @@ \alias{ref_msg,RefFootnote-method} \alias{fnotes_at_path<-,VTableTree,character-method} \alias{fnotes_at_path<-,VTableTree,NULL-method} +\alias{obj_stat_names} +\alias{obj_stat_names<-} +\alias{obj_stat_names<-,CellValue-method} +\alias{obj_stat_names,CellValue-method} +\alias{obj_stat_names,RowsVerticalSection-method} \alias{rbind2,VTableNodeInfo,missing-method} \alias{tt_at_path,VTableTree-method} \alias{tt_at_path<-,VTableTree,ANY,VTableTree-method} @@ -1118,6 +1123,16 @@ spl_varnames(object) <- value \S4method{fnotes_at_path}{VTableTree,NULL}(obj, rowpath = NULL, colpath = NULL, reset_idx = TRUE) <- value +obj_stat_names(obj) + +obj_stat_names(obj) <- value + +\S4method{obj_stat_names}{CellValue}(obj) <- value + +\S4method{obj_stat_names}{CellValue}(obj) + +\S4method{obj_stat_names}{RowsVerticalSection}(obj) + \S4method{rbind2}{VTableNodeInfo,missing}(x, y) \S4method{tt_at_path}{VTableTree}(tt, path, ...) diff --git a/man/rcell.Rd b/man/rcell.Rd index 8df42204d..54ca58694 100644 --- a/man/rcell.Rd +++ b/man/rcell.Rd @@ -13,7 +13,8 @@ rcell( indent_mod = NULL, footnotes = NULL, align = NULL, - format_na_str = NULL + format_na_str = NULL, + stat_names = NULL ) non_ref_rcell( @@ -50,6 +51,8 @@ corresponds to the unmodified default behavior.} \item{format_na_str}{(\code{string})\cr string which should be displayed when formatted if this cell's value(s) are all \code{NA}.} +\item{stat_names}{(\code{character})\cr names for the statistics in the cell. It can be a vector of strings.} + \item{is_ref}{(\code{flag})\cr whether function is being used in the reference column (i.e. \code{.in_ref_col} should be passed to this argument).} @@ -69,3 +72,9 @@ be passed the value of \code{.in_ref_col} when it is used. \note{ Currently column spanning is only supported for defining header structure. } +\examples{ +rcell(1, format = "xx.x") +rcell(c(1, 2), format = c("xx - xx")) +rcell(c(1, 2), stat_names = c("Rand1", "Rand2")) + +} diff --git a/vignettes/ard_how_to.Rmd b/vignettes/ard_how_to.Rmd index d79d6ac71..162ec19e6 100644 --- a/vignettes/ard_how_to.Rmd +++ b/vignettes/ard_how_to.Rmd @@ -58,7 +58,7 @@ tbl ## Convert the table to a result data frame -The `as_result_df` function is the one that converts a table to a result data frame. The result data frame is a data frame that contains the result of the summary table and is ready to be used for quality control purposes. This may differ for different standard and lets see how to produce different outputs. The following default ouputs are available for visual appreciation, or to transform into a table again with `df_to_tt()`. +The `as_result_df` function is the one that converts a table to a result data frame. The result data frame is a data frame that contains the result of the summary table and is ready to be used for quality control purposes. This may differ for different standard and lets see how to produce different result `data.frame`. The following default ouputs are available for visual appreciation, or to transform into a table again with `df_to_tt()`. ```{r} as_result_df(tbl) @@ -76,7 +76,7 @@ as_result_df(tbl, make_ard = TRUE) ## Customizing the output -`as_result_df` and `ARD` outputs follow the content of the table, so it is possible to modify those to get some custom outputs. For example, we can add some user-defined statistics with their own statistical names: +`as_result_df` and `ARD` depend on the content of the table, so it is possible to modify the table to get some custom output. For example, we can add some user-defined statistics with their own statistical names: ```{r} # rcell and in_rows are the core of any analysis function From d093ff97e109a21ca361babd6260d28bc948e8a5 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Thu, 21 Nov 2024 15:53:03 +0100 Subject: [PATCH 04/18] fix: remove print --- tests/testthat/test-result_data_frame.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index e3f8051b5..21a0cfc21 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -249,7 +249,7 @@ test_that("make_ard produces realisting ARD output with as_result_df", { .formats = c("xx - xx", "xx.x - xx.x"), .format_na_strs = list(c("asda", "lkjklj")), .stat_names = list(c("A", "B"), c("B", "C")) # if c("A", "B") one each row, if single list duplicated - ) %>% print() + ) expect_equal( list("a" = c("A", "B"), "b" = c("B", "C")), # now it is named From e348e6c1856dfd71e4dfacc2ade5e396dcd40f13 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Thu, 21 Nov 2024 16:55:07 +0100 Subject: [PATCH 05/18] fix: cicd --- R/tt_afun_utils.R | 6 +++++- man/in_rows.Rd | 6 +++++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/R/tt_afun_utils.R b/R/tt_afun_utils.R index d38907c74..038b3cc57 100644 --- a/R/tt_afun_utils.R +++ b/R/tt_afun_utils.R @@ -140,7 +140,11 @@ non_ref_rcell <- function(x, is_ref, format = NULL, colspan = 1L, #' in_rows(1, 2, 3, .names = c("a", "b", "c")) #' in_rows(1, 2, 3, .labels = c("a", "b", "c")) #' in_rows(1, 2, 3, .names = c("a", "b", "c"), .labels = c("AAA", "BBB", "CCC")) -#' in_rows(.list = list(a = c(NA, NA)), .formats = "xx - xx", .format_na_strs = list(c("asda", "lkjklj"))) +#' in_rows( +#' .list = list(a = c(NA, NA)), +#' .formats = "xx - xx", +#' .format_na_strs = list(c("asda", "lkjklj")) +#' ) #' in_rows(.list = list(a = c(NA, NA)), .format_na_strs = c("asda", "lkjklj")) #' #' in_rows(.list = list(a = 1, b = 2, c = 3)) diff --git a/man/in_rows.Rd b/man/in_rows.Rd index d7bf51373..baa9fe0ce 100644 --- a/man/in_rows.Rd +++ b/man/in_rows.Rd @@ -58,7 +58,11 @@ paths with \code{\link{fnotes_at_path<-}}. in_rows(1, 2, 3, .names = c("a", "b", "c")) in_rows(1, 2, 3, .labels = c("a", "b", "c")) in_rows(1, 2, 3, .names = c("a", "b", "c"), .labels = c("AAA", "BBB", "CCC")) -in_rows(.list = list(a = c(NA, NA)), .formats = "xx - xx", .format_na_strs = list(c("asda", "lkjklj"))) +in_rows( + .list = list(a = c(NA, NA)), + .formats = "xx - xx", + .format_na_strs = list(c("asda", "lkjklj")) +) in_rows(.list = list(a = c(NA, NA)), .format_na_strs = c("asda", "lkjklj")) in_rows(.list = list(a = 1, b = 2, c = 3)) From bee359c7396244ce3e700971768c3693f0650261 Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 21 Nov 2024 15:58:00 +0000 Subject: [PATCH 06/18] [skip style] [skip vbump] Restyle files --- R/tt_afun_utils.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/tt_afun_utils.R b/R/tt_afun_utils.R index 038b3cc57..d36809505 100644 --- a/R/tt_afun_utils.R +++ b/R/tt_afun_utils.R @@ -141,8 +141,8 @@ non_ref_rcell <- function(x, is_ref, format = NULL, colspan = 1L, #' in_rows(1, 2, 3, .labels = c("a", "b", "c")) #' in_rows(1, 2, 3, .names = c("a", "b", "c"), .labels = c("AAA", "BBB", "CCC")) #' in_rows( -#' .list = list(a = c(NA, NA)), -#' .formats = "xx - xx", +#' .list = list(a = c(NA, NA)), +#' .formats = "xx - xx", #' .format_na_strs = list(c("asda", "lkjklj")) #' ) #' in_rows(.list = list(a = c(NA, NA)), .format_na_strs = c("asda", "lkjklj")) From b8d065c913f45c866adc0d8b349fb4c0d00bdb2c Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 21 Nov 2024 16:02:24 +0000 Subject: [PATCH 07/18] [skip roxygen] [skip vbump] Roxygen Man Pages Auto Update --- man/in_rows.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/in_rows.Rd b/man/in_rows.Rd index baa9fe0ce..d71a041d1 100644 --- a/man/in_rows.Rd +++ b/man/in_rows.Rd @@ -59,8 +59,8 @@ in_rows(1, 2, 3, .names = c("a", "b", "c")) in_rows(1, 2, 3, .labels = c("a", "b", "c")) in_rows(1, 2, 3, .names = c("a", "b", "c"), .labels = c("AAA", "BBB", "CCC")) in_rows( - .list = list(a = c(NA, NA)), - .formats = "xx - xx", + .list = list(a = c(NA, NA)), + .formats = "xx - xx", .format_na_strs = list(c("asda", "lkjklj")) ) in_rows(.list = list(a = c(NA, NA)), .format_na_strs = c("asda", "lkjklj")) From ed3c2e2d543f1ed8ae10f48136088dcf3bae8baf Mon Sep 17 00:00:00 2001 From: Melkiades Date: Thu, 21 Nov 2024 17:16:15 +0100 Subject: [PATCH 08/18] empty From 82aefcf0d336fb82dd979c215520a52b276419d5 Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Wed, 27 Nov 2024 18:49:24 -0500 Subject: [PATCH 09/18] Some minor improvements to the ARD vignette --- vignettes/ard_how_to.Rmd | 38 +++++++++++++++++++++----------------- 1 file changed, 21 insertions(+), 17 deletions(-) diff --git a/vignettes/ard_how_to.Rmd b/vignettes/ard_how_to.Rmd index 162ec19e6..12a956525 100644 --- a/vignettes/ard_how_to.Rmd +++ b/vignettes/ard_how_to.Rmd @@ -1,10 +1,10 @@ --- -title: "How to generate QC-ready result data frames from tables" +title: "Generating QC-Ready Result Data Frames (ARDs) from Tables" author: "Davide Garolini" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{How to generate QC-ready result data frames from tables} + %\VignetteIndexEntry{Generating QC-Ready Result Data Frames (ARDs) from Tables} %\VignetteEncoding{UTF-8} %\VignetteEngine{knitr::rmarkdown} editor_options: @@ -29,19 +29,19 @@ knitr::opts_chunk$set( knitr::opts_chunk$set(comment = "#") ``` - ```{css, echo=FALSE} .reveal .r code { white-space: pre; } ``` + # Disclaimer -This vignette is a work in progress. +This vignette is a work in progress and subject to change. -## Create the example table +## Creating an Example Table -First of all we need a table to retrieve all the necessary information. Borrowing one from the [vignette](https://insightsengineering.github.io/rtables/latest-tag/articles/clinical_trials.html) about clinical trials. +In order to generate an ARD (Analysis Results Dataset), we first need to create a table from which all the necessary information will be retrieved. We will borrow a simple table from [this vignette](https://insightsengineering.github.io/rtables/latest-tag/articles/clinical_trials.html) about clinical trials. ```{r} library(rtables) @@ -56,9 +56,11 @@ tbl <- build_table(lyt, ADSL) tbl ``` -## Convert the table to a result data frame +## Converting the Table to a Result Data Frame (ARD) + +The `as_result_df()` function is used to convert a table to a result data frame. The result data frame is a data frame that contains the result of the summary table and is ready to be used for quality control purposes. This may be customized according to different standards. -The `as_result_df` function is the one that converts a table to a result data frame. The result data frame is a data frame that contains the result of the summary table and is ready to be used for quality control purposes. This may differ for different standard and lets see how to produce different result `data.frame`. The following default ouputs are available for visual appreciation, or to transform into a table again with `df_to_tt()`. +Let's see how we can produce different result `data.frame`s. The following outputs can be returned by setting different parameters in the `as_results_df()` function, and these results can be transformed back into a table using the `df_to_tt()` function. ```{r} as_result_df(tbl) @@ -69,14 +71,15 @@ as_result_df(tbl, simplify = TRUE, keep_label_rows = TRUE) as_result_df(tbl, simplify = TRUE, keep_label_rows = TRUE, expand_colnames = TRUE) ``` -Now lets get the final `ARD` output. This is the one that is ready to be used for quality control purposes. +Now let's generate our final ARD output, which is ready to be used for quality control purposes. + ```{r} as_result_df(tbl, make_ard = TRUE) ``` -## Customizing the output +## Customizing the Output -`as_result_df` and `ARD` depend on the content of the table, so it is possible to modify the table to get some custom output. For example, we can add some user-defined statistics with their own statistical names: +`as_result_df()` and ARDs depend on the content of the table, so it is possible to modify the table to customize the output. For example, we can add some user-defined statistics with custom names: ```{r} # rcell and in_rows are the core of any analysis function @@ -93,7 +96,8 @@ rc_row <- in_rows( # Only a getter for this object print(obj_stat_names(rc_row)) # list(a = c("A", "B"), b = c("B", "C")) -# if c("A", "B") one each row, if single list duplicated +# if c("A", "B"), one for each row +# if single list, duplicated rc_row <- in_rows( .list = list(a = c(NA, 1), b = c(1, NA)), .formats = c("xx - xx", "xx.x - xx.x"), @@ -107,11 +111,11 @@ rc_row <- in_rows( .list = list(a = c(NA, 1), b = c(1, NA)), .formats = c("xx - xx", "xx.x - xx.x"), .format_na_strs = list(c("asda", "lkjklj")), - .stat_names = list(c("A", "B")) # It is duplicated, check it your self! + .stat_names = list(c("A", "B")) # It is duplicated, check it yourself! ) ``` -Lets put it into practice: +Let's put it into practice: ```{r} mean_sd_custom <- function(x) { @@ -124,7 +128,6 @@ mean_sd_custom <- function(x) { ) } counts_percentage_custom <- function(x) { - # browser() cnts <- table(x) out <- lapply(cnts, function(x) { perc <- x / sum(cnts) @@ -146,9 +149,10 @@ tbl <- build_table(lyt, ex_adsl) as_result_df(tbl, make_ard = TRUE) ``` -# More complex outputs +# More Complex Outputs + +Let's add hierarchical row and column splits: -Lets add hierarchical row and column splits. ```{r} lyt <- basic_table() %>% split_rows_by("STRATA2") %>% From 8a9aed06c1ec74da7452d370b3e1827a21f2970e Mon Sep 17 00:00:00 2001 From: Emily de la Rua Date: Wed, 27 Nov 2024 18:50:32 -0500 Subject: [PATCH 10/18] Add "ARD" to WORDLIST --- inst/WORDLIST | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/WORDLIST b/inst/WORDLIST index c1da7f3bd..bcfac9e44 100644 --- a/inst/WORDLIST +++ b/inst/WORDLIST @@ -1,3 +1,5 @@ +ARD +ARDs Bové CRAN's Carreras @@ -56,14 +58,12 @@ mandatorily monospace multivariable orderable -ouputs pathing postfix postprocessing pre priori programmatically -quartiles reindexed repo repped From 242b9acf5bf1d9b3f6a5471b6410614b8e5c1bb3 Mon Sep 17 00:00:00 2001 From: Davide Garolini Date: Thu, 28 Nov 2024 11:45:29 +0100 Subject: [PATCH 11/18] Update tests/testthat/test-result_data_frame.R Co-authored-by: Emily de la Rua <59304861+edelarua@users.noreply.github.com> Signed-off-by: Davide Garolini --- tests/testthat/test-result_data_frame.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index 21a0cfc21..71166b95e 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -239,7 +239,7 @@ test_that("as_result_df works fine with empty tables and no character(0) is allo ) }) -test_that("make_ard produces realisting ARD output with as_result_df", { +test_that("make_ard produces realistic ARD output with as_result_df", { # Testing fundamental getters/setters rc <- rcell(c(1, 2), stat_names = c("Rand1", "Rand2")) expect_equal(obj_stat_names(rc), c("Rand1", "Rand2")) From 8cd1acbcddbc154eba4280ebff6c5b25f6b1812b Mon Sep 17 00:00:00 2001 From: Davide Garolini Date: Thu, 28 Nov 2024 11:45:35 +0100 Subject: [PATCH 12/18] Update tests/testthat/test-result_data_frame.R Co-authored-by: Emily de la Rua <59304861+edelarua@users.noreply.github.com> Signed-off-by: Davide Garolini --- tests/testthat/test-result_data_frame.R | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index 71166b95e..53ebb94ad 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -267,7 +267,6 @@ test_that("make_ard produces realistic ARD output with as_result_df", { ) } counts_percentage_custom <- function(x) { - # browser() cnts <- table(x) out <- lapply(cnts, function(x) { perc <- x / sum(cnts) From 700d542ebbde30c031765a9d984f09ffa11ddc46 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Thu, 28 Nov 2024 13:08:38 +0100 Subject: [PATCH 13/18] fixes after review --- R/tt_afun_utils.R | 5 +- R/tt_as_df.R | 62 +++++++++++++----- tests/testthat/test-result_data_frame.R | 87 +++++++++++++++---------- 3 files changed, 102 insertions(+), 52 deletions(-) diff --git a/R/tt_afun_utils.R b/R/tt_afun_utils.R index d36809505..d67f31924 100644 --- a/R/tt_afun_utils.R +++ b/R/tt_afun_utils.R @@ -10,7 +10,8 @@ #' @param label (`string` or `NULL`)\cr label. If non-`NULL`, it will be looked at when determining row labels. #' @param colspan (`integer(1)`)\cr column span value. #' @param footnotes (`list` or `NULL`)\cr referential footnote messages for the cell. -#' @param stat_names (`character`)\cr names for the statistics in the cell. It can be a vector of strings. +#' @param stat_names (`character` or `NULL`)\cr names for the statistics in the cell. It can be a vector of strings. +#' If `NULL`, statistic names are not specified. #' #' @inherit CellValue return #' @@ -126,7 +127,7 @@ non_ref_rcell <- function(x, is_ref, format = NULL, colspan = 1L, #' See [formatters::list_valid_aligns()] for currently supported alignments. #' @param .format_na_strs (`character` or `NULL`)\cr NA strings for the cells. #' @param .stat_names (`character` or `NULL`)\cr names for the statistics in the cells. -#' It can be a vector of values. +#' It can be a vector of values. If `NULL`, statistic names are not specified. #' #' @note In post-processing, referential footnotes can also be added using row and column #' paths with [`fnotes_at_path<-`]. diff --git a/R/tt_as_df.R b/R/tt_as_df.R index 7ab26d8af..df1727fe3 100644 --- a/R/tt_as_df.R +++ b/R/tt_as_df.R @@ -129,8 +129,17 @@ as_result_df <- function(tt, spec = NULL, # Fix for content rows analysis variable label if (any(ret$node_class == "ContentRow")) { - where_to <- ret$node_class == "ContentRow" - ret$avar_name[where_to] <- ret$spl_value_1[where_to] + where_to <- which(ret$node_class == "ContentRow") + for (crow_i in where_to) { + # For each Content row, extract the row split that is used as analysis variable + tmp_tbl <- ret[crow_i, , drop = FALSE] + na_labels <- lapply(tmp_tbl, is.na) %>% unlist(use.names = FALSE) + group_to_take <- colnames(tmp_tbl[, !na_labels]) + group_to_take <- group_to_take[grep("^group[0-9]+$", group_to_take)] + + # Final assignment of each Content row to its correct analysis label + ret$avar_name[crow_i] <- ret[[group_to_take[length(group_to_take)]]][crow_i] + } } # If we want to expand colnames @@ -173,6 +182,15 @@ as_result_df <- function(tt, spec = NULL, # Unnecessary columns ret_tmp <- ret[, !colnames(ret) %in% c("row_num", "is_group_summary", "node_class")] + n_row_groups <- sapply(colnames(ret), function(x) { + if (grepl("^group", x)) { + # Extract the number after "group" using regex + return(as.numeric(sub("group(\\d+).*", "\\1", x))) + } else { + return(0) # Return 0 if no "group" is found + } + }) %>% + max() # Indexes of real columns (visible in the output, but no row names) only_col_indexes <- seq(which(colnames(ret_tmp) == "label_name") + 1, ncol(ret_tmp)) @@ -186,7 +204,7 @@ as_result_df <- function(tt, spec = NULL, colnames(core_row_names)[colnames_to_rename] <- c("variable", "variable_level", "variable_label") # Adding stats_names if present - raw_stat_names <- .get_stat_names_from_table(tt, add.labrows = keep_label_rows) + raw_stat_names <- .get_stat_names_from_table(tt, add_labrows = keep_label_rows) cell_stat_names <- .make_df_from_raw_data(raw_stat_names, nr = nrow(tt), nc = ncol(tt)) # Moving colnames to rows (flattening) @@ -195,17 +213,26 @@ as_result_df <- function(tt, spec = NULL, for (col_i in only_col_indexes) { # Making row splits into row specifications (group1 group1_level) current_col_split_level <- unlist(ret_tmp[seq_len(number_of_col_splits), col_i], use.names = FALSE) - flattened_cols_names <- c(column_split_names[[1]][[1]], current_col_split_level) - names(flattened_cols_names) <- c( - paste0("group", seq_along(column_split_names[[1]][[1]])), - paste0("group", seq_along(current_col_split_level), "_level") + flattened_cols_names <- .c_alternated(column_split_names[[1]][[1]], current_col_split_level) + names(flattened_cols_names) <- .c_alternated( + paste0("group", seq_along(column_split_names[[1]][[1]]) + n_row_groups), + paste0("group", seq_along(current_col_split_level) + n_row_groups, "_level") ) - tmp_core_ret_by_col_i <- cbind( - t(data.frame(flattened_cols_names)), - core_row_names, - row.names = NULL - ) + if (n_row_groups > 0) { + tmp_core_ret_by_col_i <- cbind( + core_row_names[, seq(n_row_groups * 2)], + t(data.frame(flattened_cols_names)), + core_row_names[, -seq(n_row_groups * 2)], + row.names = NULL + ) + } else { + tmp_core_ret_by_col_i <- cbind( + t(data.frame(flattened_cols_names)), + core_row_names, + row.names = NULL + ) + } # retrieving stat names and stats stat_name <- setNames(cell_stat_names[, col_i - min(only_col_indexes) + 1, drop = TRUE], NULL) @@ -274,10 +301,15 @@ as_result_df <- function(tt, spec = NULL, cellvals } +# Is there a better alternative? +.c_alternated <- function(v1, v2) { + unlist(mapply(c, v1, v2, SIMPLIFY = FALSE)) +} + # Amazing helper function to get the statistic names from row cells! -.get_stat_names_from_table <- function(tt, add.labrows = FALSE) { +.get_stat_names_from_table <- function(tt, add_labrows = FALSE) { # omit_labrows # omit label rows - rows <- collect_leaves(tt, incl.cont = TRUE, add.labrows = add.labrows) + rows <- collect_leaves(tt, incl.cont = TRUE, add.labrows = add_labrows) lapply(rows, function(ri) { lapply(row_cells(ri), obj_stat_names) }) @@ -333,7 +365,7 @@ make_result_df_md_colnames <- function(maxlen) { spllen <- floor((maxlen - 2) / 2) ret <- character() if (spllen > 0) { - ret <- paste(c("spl_var", "spl_value"), rep(seq_len(spllen), rep(2, spllen)), sep = "_") + ret <- paste("group", rep(seq_len(spllen), each = 2), c("", "_level"), sep = "") } ret <- c(ret, c("avar_name", "row_name", "label_name", "row_num", "is_group_summary", "node_class")) } diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index 53ebb94ad..3149bda07 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -18,7 +18,7 @@ test_that("Result Data Frame generation works v0", { expect_identical( names(result_df)[1:5], - c("spl_var_1", "spl_value_1", "spl_var_2", "spl_value_2", "avar_name") + c("group1", "group1_level", "group2", "group2_level", "avar_name") ) ## handle multiple analyses @@ -338,18 +338,18 @@ test_that("make_ard works with multiple row levels", { ard_out <- as_result_df(tbl, make_ard = TRUE) expect_equal(unique(ard_out$stat_name), c("mean", "n")) - expect_contains(colnames(ard_out), c("spl_var_2", "spl_value_2")) + expect_contains(colnames(ard_out), c("group2", "group2_level")) # Count output expect_equal( ard_out[90, , drop = TRUE], list( - group1 = "ARM", - group1_level = "C: Combination", - spl_var_1 = "STRATA1", - spl_value_1 = "C", - spl_var_2 = "STRATA2", - spl_value_2 = "S2", + group1 = "STRATA1", + group1_level = "C", + group2 = "STRATA2", + group2_level = "S2", + group3 = "ARM", + group3_level = "C: Combination", variable = "SEX", variable_level = "UNDIFFERENTIATED", variable_label = "UNDIFFERENTIATED", @@ -371,19 +371,19 @@ test_that("make_ard works with multiple column levels", { ard_out <- as_result_df(tbl, make_ard = TRUE) expect_equal(unique(ard_out$stat_name), c("mean", "n")) - expect_contains(colnames(ard_out), c("spl_var_1", "spl_value_1")) + expect_contains(colnames(ard_out), c("group1", "group1_level")) expect_contains(colnames(ard_out), c("group2", "group2_level")) # Count output expect_equal( ard_out[16, , drop = TRUE], list( - group1 = "ARM", - group2 = "STRATA2", - group1_level = "A: Drug X", - group2_level = "S2", - spl_var_1 = "STRATA1", - spl_value_1 = "A", + group1 = "STRATA1", + group1_level = "A", + group2 = "ARM", + group2_level = "A: Drug X", + group3 = "STRATA2", + group3_level = "S2", variable = "AGE", variable_level = "Mean", variable_label = "Mean", @@ -398,6 +398,7 @@ test_that("make_ard works with summarize_row_groups", { lyt <- basic_table() %>% split_rows_by("STRATA2") %>% summarize_row_groups() %>% + split_rows_by("ARM") %>% split_cols_by("ARM") %>% split_cols_by("STRATA1") %>% analyze(c("AGE", "SEX")) @@ -406,20 +407,22 @@ test_that("make_ard works with summarize_row_groups", { ard_out <- as_result_df(tbl, make_ard = TRUE) expect_contains(unique(ard_out$stat_name), c("mean", "n", "p")) - expect_contains(colnames(ard_out), c("spl_var_1", "spl_value_1")) + expect_contains(colnames(ard_out), c("group1", "group1_level")) expect_contains(colnames(ard_out), c("group2", "group2_level")) # label row output expect_equal( ard_out[1, , drop = TRUE], list( - group1 = "ARM", - group2 = "STRATA1", - group1_level = "A: Drug X", - group2_level = "A", - spl_var_1 = "STRATA2", - spl_value_1 = "S1", - variable = "S1", + group1 = "STRATA2", + group1_level = "S1", + group2 = NA_character_, + group2_level = NA_character_, + group3 = "ARM", + group3_level = "A: Drug X", + group4 = "STRATA1", + group4_level = "A", + variable = "STRATA2", variable_level = "S1", variable_label = "S1", stat_name = "n", @@ -428,21 +431,35 @@ test_that("make_ard works with summarize_row_groups", { tolerance = 10e-6 ) + # Testing different placements of summarize_row_groups + lyt <- basic_table() %>% + split_rows_by("STRATA2") %>% + split_rows_by("ARM") %>% + summarize_row_groups() %>% + split_cols_by("ARM") %>% + split_cols_by("STRATA1") %>% + analyze(c("AGE", "SEX")) + + tbl <- build_table(lyt, ex_adsl) + ard_out <- as_result_df(tbl, make_ard = TRUE) + # label row output expect_equal( - ard_out[86, , drop = TRUE], + ard_out[1, , drop = TRUE], list( - group1 = "ARM", - group2 = "STRATA1", - group1_level = "C: Combination", - group2_level = "A", - spl_var_1 = "STRATA2", - spl_value_1 = "S1", - variable = "S1", - variable_level = "S1", - variable_label = "S1", - stat_name = "p", - stat = 0.35 + group1 = "STRATA2", + group1_level = "S1", + group2 = "ARM", + group2_level = "A: Drug X", + group3 = "ARM", + group3_level = "A: Drug X", + group4 = "STRATA1", + group4_level = "A", + variable = "ARM", + variable_level = "A: Drug X", + variable_label = "A: Drug X", + stat_name = "n", + stat = 18 ), tolerance = 10e-6 ) From 41c9ea912df1d91f69e0a91a9a8acee9b3c0879d Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 28 Nov 2024 12:12:07 +0000 Subject: [PATCH 14/18] [skip roxygen] [skip vbump] Roxygen Man Pages Auto Update --- man/CellValue.Rd | 3 ++- man/in_rows.Rd | 2 +- man/rcell.Rd | 3 ++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/man/CellValue.Rd b/man/CellValue.Rd index ca147f8e6..57839d8c7 100644 --- a/man/CellValue.Rd +++ b/man/CellValue.Rd @@ -39,7 +39,8 @@ corresponds to the unmodified default behavior.} \item{format_na_str}{(\code{string})\cr string which should be displayed when formatted if this cell's value(s) are all \code{NA}.} -\item{stat_names}{(\code{character})\cr names for the statistics in the cell. It can be a vector of strings.} +\item{stat_names}{(\code{character} or \code{NULL})\cr names for the statistics in the cell. It can be a vector of strings. +If \code{NULL}, statistic names are not specified.} } \value{ An object representing the value within a single cell within a populated table. The underlying structure diff --git a/man/in_rows.Rd b/man/in_rows.Rd index d71a041d1..9c050060e 100644 --- a/man/in_rows.Rd +++ b/man/in_rows.Rd @@ -41,7 +41,7 @@ See \code{\link[formatters:list_formats]{formatters::list_valid_aligns()}} for c \item{.format_na_strs}{(\code{character} or \code{NULL})\cr NA strings for the cells.} \item{.stat_names}{(\code{character} or \code{NULL})\cr names for the statistics in the cells. -It can be a vector of values.} +It can be a vector of values. If \code{NULL}, statistic names are not specified.} } \value{ A \code{RowsVerticalSection} object (or \code{NULL}). The details of this object should be considered an diff --git a/man/rcell.Rd b/man/rcell.Rd index 54ca58694..859dbd427 100644 --- a/man/rcell.Rd +++ b/man/rcell.Rd @@ -51,7 +51,8 @@ corresponds to the unmodified default behavior.} \item{format_na_str}{(\code{string})\cr string which should be displayed when formatted if this cell's value(s) are all \code{NA}.} -\item{stat_names}{(\code{character})\cr names for the statistics in the cell. It can be a vector of strings.} +\item{stat_names}{(\code{character} or \code{NULL})\cr names for the statistics in the cell. It can be a vector of strings. +If \code{NULL}, statistic names are not specified.} \item{is_ref}{(\code{flag})\cr whether function is being used in the reference column (i.e. \code{.in_ref_col} should be passed to this argument).} From a4a76f52281e5d2f70ebde907e9d94c7fcb9dcce Mon Sep 17 00:00:00 2001 From: Melkiades Date: Thu, 28 Nov 2024 13:20:26 +0100 Subject: [PATCH 15/18] fix default for stat_names --- R/00tabletrees.R | 2 +- R/tt_afun_utils.R | 10 +++++----- man/CellValue.Rd | 6 +++--- man/in_rows.Rd | 2 +- man/rcell.Rd | 6 +++--- tests/testthat/test-result_data_frame.R | 19 +++++++++++++++++++ vignettes/ard_how_to.Rmd | 4 ++-- 7 files changed, 34 insertions(+), 15 deletions(-) diff --git a/R/00tabletrees.R b/R/00tabletrees.R index 72cd85724..2f5ebfab1 100644 --- a/R/00tabletrees.R +++ b/R/00tabletrees.R @@ -1933,7 +1933,7 @@ RefFootnote <- function(note, index = NA_integer_, symbol = NA_character_) { ## indent_mod: indent modifier to be used for parent row CellValue <- function(val, format = NULL, colspan = 1L, label = NULL, indent_mod = NULL, footnotes = NULL, - align = NULL, format_na_str = NULL, stat_names = NULL) { + align = NULL, format_na_str = NULL, stat_names = NA_character_) { if (is.null(colspan)) { colspan <- 1L } diff --git a/R/tt_afun_utils.R b/R/tt_afun_utils.R index d67f31924..b11def08f 100644 --- a/R/tt_afun_utils.R +++ b/R/tt_afun_utils.R @@ -10,8 +10,8 @@ #' @param label (`string` or `NULL`)\cr label. If non-`NULL`, it will be looked at when determining row labels. #' @param colspan (`integer(1)`)\cr column span value. #' @param footnotes (`list` or `NULL`)\cr referential footnote messages for the cell. -#' @param stat_names (`character` or `NULL`)\cr names for the statistics in the cell. It can be a vector of strings. -#' If `NULL`, statistic names are not specified. +#' @param stat_names (`character` or `NA`)\cr names for the statistics in the cell. It can be a vector of strings. +#' If `NA`, statistic names are not specified. #' #' @inherit CellValue return #' @@ -32,7 +32,7 @@ rcell <- function(x, footnotes = NULL, align = NULL, format_na_str = NULL, - stat_names = NULL) { + stat_names = NA_character_) { checkmate::assert_character(stat_names, null.ok = TRUE) if (!is.null(align)) { check_aligns(align) @@ -172,7 +172,7 @@ in_rows <- function(..., .list = NULL, .names = NULL, .row_footnotes = list(NULL), .aligns = NULL, .format_na_strs = NULL, - .stat_names = list(NULL)) { + .stat_names = list(NA_character_)) { if (is.function(.formats)) { .formats <- list(.formats) } @@ -236,7 +236,7 @@ in_rows <- function(..., .list = NULL, .names = NULL, footnotes = .cell_footnotes %||% list(NULL), align = .aligns, format_na_str = .format_na_strs %||% list(NULL), - stat_names = .stat_names %||% list(NULL), + stat_names = .stat_names %||% list(NA_character_), SIMPLIFY = FALSE ) } diff --git a/man/CellValue.Rd b/man/CellValue.Rd index 57839d8c7..3c96fbd90 100644 --- a/man/CellValue.Rd +++ b/man/CellValue.Rd @@ -13,7 +13,7 @@ CellValue( footnotes = NULL, align = NULL, format_na_str = NULL, - stat_names = NULL + stat_names = NA_character_ ) } \arguments{ @@ -39,8 +39,8 @@ corresponds to the unmodified default behavior.} \item{format_na_str}{(\code{string})\cr string which should be displayed when formatted if this cell's value(s) are all \code{NA}.} -\item{stat_names}{(\code{character} or \code{NULL})\cr names for the statistics in the cell. It can be a vector of strings. -If \code{NULL}, statistic names are not specified.} +\item{stat_names}{(\code{character} or \code{NA})\cr names for the statistics in the cell. It can be a vector of strings. +If \code{NA}, statistic names are not specified.} } \value{ An object representing the value within a single cell within a populated table. The underlying structure diff --git a/man/in_rows.Rd b/man/in_rows.Rd index 9c050060e..94258d70f 100644 --- a/man/in_rows.Rd +++ b/man/in_rows.Rd @@ -15,7 +15,7 @@ in_rows( .row_footnotes = list(NULL), .aligns = NULL, .format_na_strs = NULL, - .stat_names = list(NULL) + .stat_names = list(NA_character_) ) } \arguments{ diff --git a/man/rcell.Rd b/man/rcell.Rd index 859dbd427..bd31d7785 100644 --- a/man/rcell.Rd +++ b/man/rcell.Rd @@ -14,7 +14,7 @@ rcell( footnotes = NULL, align = NULL, format_na_str = NULL, - stat_names = NULL + stat_names = NA_character_ ) non_ref_rcell( @@ -51,8 +51,8 @@ corresponds to the unmodified default behavior.} \item{format_na_str}{(\code{string})\cr string which should be displayed when formatted if this cell's value(s) are all \code{NA}.} -\item{stat_names}{(\code{character} or \code{NULL})\cr names for the statistics in the cell. It can be a vector of strings. -If \code{NULL}, statistic names are not specified.} +\item{stat_names}{(\code{character} or \code{NA})\cr names for the statistics in the cell. It can be a vector of strings. +If \code{NA}, statistic names are not specified.} \item{is_ref}{(\code{flag})\cr whether function is being used in the reference column (i.e. \code{.in_ref_col} should be passed to this argument).} diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index 3149bda07..ffa006aae 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -464,3 +464,22 @@ test_that("make_ard works with summarize_row_groups", { tolerance = 10e-6 ) }) + +test_that("make_ard works if there are no stat_names", { + mean_sd_custom <- function(x) { + mean <- mean(x, na.rm = FALSE) + sd <- sd(x, na.rm = FALSE) + + rcell(c(mean, sd), + label = "Mean (SD)", format = "xx.x (xx.x)" + ) + } + + lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx") %>% + split_cols_by("ARM", split_fun = keep_split_levels(c("A: Drug X", "B: Placebo"))) %>% + analyze(vars = "AGE", afun = mean_sd_custom) + + tbl <- build_table(lyt, ex_adsl) + + expect_equal(as_result_df(tbl, make_ard = TRUE)$stat_name, rep(NA_character_, 4)) +}) diff --git a/vignettes/ard_how_to.Rmd b/vignettes/ard_how_to.Rmd index 12a956525..d176026f9 100644 --- a/vignettes/ard_how_to.Rmd +++ b/vignettes/ard_how_to.Rmd @@ -123,8 +123,8 @@ mean_sd_custom <- function(x) { sd <- sd(x, na.rm = FALSE) rcell(c(mean, sd), - label = "Mean (SD)", format = "xx.x (xx.x)", - stat_names = c("Mean", "SD") + label = "Mean (SD)", format = "xx.x (xx.x)"#, + # stat_names = c("Mean", "SD") ) } counts_percentage_custom <- function(x) { From e2c6298ed54c29771fb82399cfab2f0a576758a9 Mon Sep 17 00:00:00 2001 From: Melkiades Date: Thu, 28 Nov 2024 16:13:44 +0100 Subject: [PATCH 16/18] fix NULL-NA when stats missing --- R/tt_afun_utils.R | 16 +++++++++------- man/in_rows.Rd | 7 ++++--- man/rcell.Rd | 2 +- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/R/tt_afun_utils.R b/R/tt_afun_utils.R index b11def08f..2925197f0 100644 --- a/R/tt_afun_utils.R +++ b/R/tt_afun_utils.R @@ -32,7 +32,7 @@ rcell <- function(x, footnotes = NULL, align = NULL, format_na_str = NULL, - stat_names = NA_character_) { + stat_names = NULL) { checkmate::assert_character(stat_names, null.ok = TRUE) if (!is.null(align)) { check_aligns(align) @@ -79,7 +79,7 @@ rcell <- function(x, indent_mod = indent_mod, footnotes = footnotes, format_na_str = format_na_str, - stat_names = stat_names + stat_names = stat_names %||% NA_character_ ) # RefFootnote(footnote)) } if (!is.null(align)) { @@ -126,8 +126,9 @@ non_ref_rcell <- function(x, is_ref, format = NULL, colspan = 1L, #' @param .aligns (`character` or `NULL`)\cr alignments for the cells. Standard for `NULL` is `"center"`. #' See [formatters::list_valid_aligns()] for currently supported alignments. #' @param .format_na_strs (`character` or `NULL`)\cr NA strings for the cells. -#' @param .stat_names (`character` or `NULL`)\cr names for the statistics in the cells. -#' It can be a vector of values. If `NULL`, statistic names are not specified. +#' @param .stat_names (`list`)\cr names for the statistics in the cells. +#' It can be a vector of values. If `list(NULL)`, statistic names are not specified and will +#' appear as `NA`. #' #' @note In post-processing, referential footnotes can also be added using row and column #' paths with [`fnotes_at_path<-`]. @@ -172,7 +173,7 @@ in_rows <- function(..., .list = NULL, .names = NULL, .row_footnotes = list(NULL), .aligns = NULL, .format_na_strs = NULL, - .stat_names = list(NA_character_)) { + .stat_names = list(NULL)) { if (is.function(.formats)) { .formats <- list(.formats) } @@ -195,7 +196,7 @@ in_rows <- function(..., .list = NULL, .names = NULL, length(.names) > 0 || length(.indent_mods) > 0 || length(.format_na_strs) > 0 || - length(.stat_names) > 0 + (!all(is.na(.stat_names)) && length(.stat_names) > 0) ) { stop( "in_rows got 0 rows but length >0 of at least one of ", @@ -231,12 +232,13 @@ in_rows <- function(..., .list = NULL, .names = NULL, if (is.null(.aligns)) { .aligns <- list(NULL) } + l2 <- mapply(rcell, x = l, format = .formats, footnotes = .cell_footnotes %||% list(NULL), align = .aligns, format_na_str = .format_na_strs %||% list(NULL), - stat_names = .stat_names %||% list(NA_character_), + stat_names = .stat_names %||% list(NULL), SIMPLIFY = FALSE ) } diff --git a/man/in_rows.Rd b/man/in_rows.Rd index 94258d70f..665638cc2 100644 --- a/man/in_rows.Rd +++ b/man/in_rows.Rd @@ -15,7 +15,7 @@ in_rows( .row_footnotes = list(NULL), .aligns = NULL, .format_na_strs = NULL, - .stat_names = list(NA_character_) + .stat_names = list(NULL) ) } \arguments{ @@ -40,8 +40,9 @@ See \code{\link[formatters:list_formats]{formatters::list_valid_aligns()}} for c \item{.format_na_strs}{(\code{character} or \code{NULL})\cr NA strings for the cells.} -\item{.stat_names}{(\code{character} or \code{NULL})\cr names for the statistics in the cells. -It can be a vector of values. If \code{NULL}, statistic names are not specified.} +\item{.stat_names}{(\code{list})\cr names for the statistics in the cells. +It can be a vector of values. If \code{list(NULL)}, statistic names are not specified and will +appear as \code{NA}.} } \value{ A \code{RowsVerticalSection} object (or \code{NULL}). The details of this object should be considered an diff --git a/man/rcell.Rd b/man/rcell.Rd index bd31d7785..06a17c7d6 100644 --- a/man/rcell.Rd +++ b/man/rcell.Rd @@ -14,7 +14,7 @@ rcell( footnotes = NULL, align = NULL, format_na_str = NULL, - stat_names = NA_character_ + stat_names = NULL ) non_ref_rcell( From 4cd4edb838512397a37cc5a07c4cc8feaeb01ccd Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 28 Nov 2024 15:16:21 +0000 Subject: [PATCH 17/18] [skip style] [skip vbump] Restyle files --- R/tt_afun_utils.R | 2 +- tests/testthat/test-result_data_frame.R | 12 ++++++------ vignettes/ard_how_to.Rmd | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/R/tt_afun_utils.R b/R/tt_afun_utils.R index 2925197f0..585deffd8 100644 --- a/R/tt_afun_utils.R +++ b/R/tt_afun_utils.R @@ -232,7 +232,7 @@ in_rows <- function(..., .list = NULL, .names = NULL, if (is.null(.aligns)) { .aligns <- list(NULL) } - + l2 <- mapply(rcell, x = l, format = .formats, footnotes = .cell_footnotes %||% list(NULL), diff --git a/tests/testthat/test-result_data_frame.R b/tests/testthat/test-result_data_frame.R index ffa006aae..01889620d 100644 --- a/tests/testthat/test-result_data_frame.R +++ b/tests/testthat/test-result_data_frame.R @@ -469,17 +469,17 @@ test_that("make_ard works if there are no stat_names", { mean_sd_custom <- function(x) { mean <- mean(x, na.rm = FALSE) sd <- sd(x, na.rm = FALSE) - + rcell(c(mean, sd), - label = "Mean (SD)", format = "xx.x (xx.x)" + label = "Mean (SD)", format = "xx.x (xx.x)" ) } - + lyt <- basic_table(show_colcounts = TRUE, colcount_format = "N=xx") %>% split_cols_by("ARM", split_fun = keep_split_levels(c("A: Drug X", "B: Placebo"))) %>% analyze(vars = "AGE", afun = mean_sd_custom) - + tbl <- build_table(lyt, ex_adsl) - - expect_equal(as_result_df(tbl, make_ard = TRUE)$stat_name, rep(NA_character_, 4)) + + expect_equal(as_result_df(tbl, make_ard = TRUE)$stat_name, rep(NA_character_, 4)) }) diff --git a/vignettes/ard_how_to.Rmd b/vignettes/ard_how_to.Rmd index d176026f9..5d08cd2cd 100644 --- a/vignettes/ard_how_to.Rmd +++ b/vignettes/ard_how_to.Rmd @@ -123,7 +123,7 @@ mean_sd_custom <- function(x) { sd <- sd(x, na.rm = FALSE) rcell(c(mean, sd), - label = "Mean (SD)", format = "xx.x (xx.x)"#, + label = "Mean (SD)", format = "xx.x (xx.x)" # , # stat_names = c("Mean", "SD") ) } From 948d68d6c265864263b2b3b5586144537e329f7b Mon Sep 17 00:00:00 2001 From: Melkiades Date: Fri, 29 Nov 2024 11:10:47 +0100 Subject: [PATCH 18/18] news --- NEWS.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 07d3c5060..75b750512 100644 --- a/NEWS.md +++ b/NEWS.md @@ -4,7 +4,8 @@ * Experimental pagination is now possible in `tt_as_flextable()` and `export_as_docx()`. * Added handling of widths in `tt_as_flextable()`. Now it is possible to change column widths for `.docx` exports. * Initialized vignette about quality control outputs of `as_result_df()`. - * Initialized parameter `make_ard` output for single-line statistical outputs. + * Completed parameter `make_ard` output for single-line statistical outputs. + * Added `stat_names` to `rcell()` to be used by `as_result_df(make_ard = TRUE)`. ### Miscellaneous * Split `docx` document generation to the new package [`rtables.officer`](https://github.com/insightsengineering/rtables.officer).