Skip to content

Commit

Permalink
Handle NAs and fixed a bug
Browse files Browse the repository at this point in the history
  • Loading branch information
frankcorneliusmartin committed Jan 17, 2024
1 parent b1380b8 commit 9623438
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 31 deletions.
7 changes: 4 additions & 3 deletions vtg.crosstab/src/R/RPC_CT.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,14 @@ RPC_CT <- function(data, subset_rules, master) {
return(data)
}

vtg::log$info("Rows before NA removal: {nrow(data)}")
used_variables <- all.vars(master$formula)
data <- na.omit(data[, used_variables])
vtg::log$info("Rows after NA removal: {nrow(data)}")
data <- data[, used_variables]
data[is.na(data)] <- "N/A"

for (i in used_variables) {
data[, i] <- factor(data[, i], levels = master$var_cat[[i]])
vtg::log$debug(i)
vtg::log$debug(paste(master$var_cat[[i]], collapse = ", "))
}
ct <- xtabs(master$formula, data = data)
return(ct)
Expand Down
6 changes: 3 additions & 3 deletions vtg.crosstab/src/R/RPC_get_vars.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ RPC_get_vars <- function(data, subset_rules, master) {
return(data)
}

vtg::log$info("Rows before NA removal: {nrow(data)}")
used_variables <- all.vars(master$formula)
data <- na.omit(data[, used_variables])
vtg::log$info("Rows after NA removal: {nrow(data)}")
data <- data[, used_variables]
data[is.na(data)] <-"N/A"


f <- master$formula
vars <- apply(data, 2, unique, simplify = F)
Expand Down
5 changes: 2 additions & 3 deletions vtg.crosstab/src/R/dct.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,6 @@ dct <- function(client, f, margin = NULL, percentage = F,
# this will call itself without the `use.master.container` option
#
if (client$use.master.container) {
vtg::log$debug(glue::glue("Running `dct` in master container using
image '{image.name}'.."))
result <- client$call(
"dct",
f = f,
Expand All @@ -43,7 +41,7 @@ dct <- function(client, f, margin = NULL, percentage = F,
# of the `use.master.container` option)
client$setOrganizations(organizations_to_include)

ct <- init_formula(f)
ct <- vtg.crosstab::init_formula(f)

vtg::log$info("")
vtg::log$info("###############################################")
Expand All @@ -64,6 +62,7 @@ dct <- function(client, f, margin = NULL, percentage = F,
# VARIABLE CATEGORIES - COLLECT UNIQUE VARIABLE CATEGORIES FROM NODES
#######################################################################
ct <- vtg.crosstab::variable_categories(nodes = nodes, master = ct)
vtg::log$debug("ct: {ct}")

vtg::log$info("")
vtg::log$info("###############################################")
Expand Down
15 changes: 10 additions & 5 deletions vtg.crosstab/src/R/variable_categories.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,14 @@
#' @export
#'
variable_categories <- function(nodes, master){
categories <- sapply(all.vars(master$formula), function(i){
unique(as.vector(sapply(nodes, function(j) j[[i]])))
}, simplify=F)
master$var_cat <- categories
return(master)

used_variables <- all.vars(master$formula)

for (i in used_variables) {
for (j in nodes) {
master$var_cat[[i]] <- unique(c(master$var_cat[[i]], j[[i]]))
}
}
return(master)

}
34 changes: 17 additions & 17 deletions vtg.crosstab/test.R
Original file line number Diff line number Diff line change
@@ -1,29 +1,29 @@
rm(list = ls(all.names = TRUE))
devtools::load_all("./src")
devtools::load_all("../vtg.preprocessing")
library(vtg.crosstab)

# This seems to be equivalent to "import x as y"
library(namespace)
tryCatch({
invisible(registerNamespace('vtg', loadNamespace('vtg')))
}, error = function(e) {
vtg::writeln("Package 'vantage.infrastructure' already loaded.")
})
data1 <- read.csv("/mnt/c/data/euracan-node-a.csv")
data2 <- read.csv("/mnt/c/data/euracan-node-b.csv")
data2 <- data2[data2$e34_cstage != 5, ]

library(vtg.crosstab)
# change the first row and make b04_sex collumn 999
data2$b04_sex[1] <- 999

dataset <- list(data1, data2)

# Data = rbind(data1, data2)

data <- data.frame(Type = paste0("T", rep(1:4, 9*4)),
Subj = gl(9, 4, 36*4),event=rbinom(36*4,1,.5))
data$Subj=as.numeric(data$Subj)
D1 <- data[data$Subj%in%c(1,2,3), ]
D2 <- data[data$Subj%in%c(4,5,6), ]
D3 <- data[data$Subj%in%c(7,8,9), ]
data_local <- rbind(vtg.preprocessing::extend_data(data1),
vtg.preprocessing::extend_data(data2))

dataset = list(D1,D2,D3)
# dataset = rbind(data1, data2)

formula = as.formula(~ Type + Subj + event)
formula = as.formula(~ e34_cstage + b04_sex)

crosstab.mock <- function(dataset,formula){
client=vtg::MockClient$new(datasets = dataset,pkgname = 'vtg.crosstab')
result=vtg.crosstab::dct(client = client,f = formula,
result=vtg.crosstab::dct(client = client, f = formula,
organizations_to_include = NULL)
return(result)
}
Expand Down

0 comments on commit 9623438

Please sign in to comment.