From effa131e3d87f4e370c13ef5ce989849aa328229 Mon Sep 17 00:00:00 2001 From: Kevin Ushey Date: Fri, 4 Apr 2014 11:23:54 -0700 Subject: [PATCH] Support '*' expansion in CellCounts (#30) --- R/CellCounts.R | 53 ++++++++++++++++++++++++++++++-- src/RcppExports.cpp | 24 +++++++-------- tests/testthat/test-CellCounts.R | 51 ++++++++++++++++++++---------- 3 files changed, 97 insertions(+), 31 deletions(-) diff --git a/R/CellCounts.R b/R/CellCounts.R index 429981a..6499eb9 100644 --- a/R/CellCounts.R +++ b/R/CellCounts.R @@ -75,12 +75,61 @@ CellCounts.COMPASSContainer <- function(data, combinations) { .CellCounts_character <- function(data, combinations) { + ## Pre-parse the combinations by expanding entries of the form + ## "A*B*C" to + ## + ## A & B & C + ## A & B & !C + ## A & !B & C + ## ... + ## + ## TODO: Handle things like A&(B*C) + combos <- lapply(combinations, function(x) { + + ## Bail if no '*' + if (!grepl("*", x, fixed=TRUE)) return(x) + + ## Bail if unsupported combination seen + if (grepl("*", x, fixed=TRUE) && grepl("[&|]", x, perl=TRUE)) { + stop("currently cannot combine '*' expander with '&' or '|'", + call.=FALSE) + } + + ## Generate a matrix of 0s and 1s that forms the same 'structure' + splat <- unlist(strsplit(x, "*", fixed = TRUE)) + n <- length(splat) + values <- do.call( + function(...) { + expand.grid(..., KEEP.OUT.ATTRS = FALSE) + }, + replicate(n, c(0, 1), simplify = FALSE) + ) + + ## Replace the 0s and 1s with appropriate names + for (i in seq_along(values)) { + values[, i] <- swap(values[, i], + c(0, 1), + c(splat[i], paste0("!", splat[i])) + ) + } + + ## Paste and return the output + do.call( + function(...) paste(..., sep = "&"), + values, + + ) + + }) + + combos <- unlist(combos) + output <- .Call(C_COMPASS_CellCounts_character, data, - lapply(combinations, function(x) parse(text=x)) + lapply(combos, function(x) parse(text=x)) ) rownames(output) <- names(data) - colnames(output) <- unlist(combinations) + colnames(output) <- combos return(output) } diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index b27545b..290e943 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -5,32 +5,32 @@ using namespace Rcpp; -// CellCounts -IntegerMatrix CellCounts(List x, List combos); -RcppExport SEXP COMPASS_CellCounts(SEXP xSEXP, SEXP combosSEXP) { +// CellCounts_character +IntegerMatrix CellCounts_character(List data, List combinations); +RcppExport SEXP COMPASS_CellCounts_character(SEXP dataSEXP, SEXP combinationsSEXP) { BEGIN_RCPP SEXP __sexp_result; { Rcpp::RNGScope __rngScope; - Rcpp::traits::input_parameter< List >::type x(xSEXP ); - Rcpp::traits::input_parameter< List >::type combos(combosSEXP ); - IntegerMatrix __result = CellCounts(x, combos); + Rcpp::traits::input_parameter< List >::type data(dataSEXP ); + Rcpp::traits::input_parameter< List >::type combinations(combinationsSEXP ); + IntegerMatrix __result = CellCounts_character(data, combinations); PROTECT(__sexp_result = Rcpp::wrap(__result)); } UNPROTECT(1); return __sexp_result; END_RCPP } -// CellCounts_character -IntegerMatrix CellCounts_character(List data, List combinations); -RcppExport SEXP COMPASS_CellCounts_character(SEXP dataSEXP, SEXP combinationsSEXP) { +// CellCounts +IntegerMatrix CellCounts(List x, List combos); +RcppExport SEXP COMPASS_CellCounts(SEXP xSEXP, SEXP combosSEXP) { BEGIN_RCPP SEXP __sexp_result; { Rcpp::RNGScope __rngScope; - Rcpp::traits::input_parameter< List >::type data(dataSEXP ); - Rcpp::traits::input_parameter< List >::type combinations(combinationsSEXP ); - IntegerMatrix __result = CellCounts_character(data, combinations); + Rcpp::traits::input_parameter< List >::type x(xSEXP ); + Rcpp::traits::input_parameter< List >::type combos(combosSEXP ); + IntegerMatrix __result = CellCounts(x, combos); PROTECT(__sexp_result = Rcpp::wrap(__result)); } UNPROTECT(1); diff --git a/tests/testthat/test-CellCounts.R b/tests/testthat/test-CellCounts.R index 6a59f77..633381d 100644 --- a/tests/testthat/test-CellCounts.R +++ b/tests/testthat/test-CellCounts.R @@ -15,23 +15,40 @@ data <- lapply(data, function(x) { return (x) }) combinations <- colnames(data[[1]]) ## [1] "A" "B" "C" "D" "E" "F" -expect_identical( - CellCounts(data, combinations), - CellCounts(data, 1:6) -) -expect_identical( - CellCounts(data, list(c(1, 2, 3))), - CellCounts(data, list("A&B&C")) -) +test_that("The integer and character interfaces for CellCounts match up", { -y <- "A&B&C" -expect_identical( - CellCounts(data, list(c(1, 2, 3))), - CellCounts(data, list(y)) -) + expect_identical( + CellCounts(data, combinations), + CellCounts(data, 1:6) + ) + + expect_identical( + CellCounts(data, list(c(1, 2, 3))), + CellCounts(data, list("A&B&C")) + ) + + y <- "A&B&C" + expect_identical( + CellCounts(data, list(c(1, 2, 3))), + CellCounts(data, list(y)) + ) + + expect_identical( + CellCounts(data, 1:6), + marginal_counts(data) + ) + +}) -expect_identical( - CellCounts(data, 1:6), - marginal_counts(data) -) +test_that("We properly expand with a '*' in the name", { + + expect_identical( + CellCounts(data, "A*B"), + CellCounts(data, c("A&B", "!A&B", "A&!B", "!A&!B")) + ) + + ## TODO: allow combinations of * and [&|] + expect_error( CellCounts(data, "A*B&C")) + +})