Skip to content

Commit

Permalink
Fixed a bug in the factor counting
Browse files Browse the repository at this point in the history
  • Loading branch information
frankcorneliusmartin committed Feb 1, 2024
1 parent a1fa288 commit 198cdc5
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 10 deletions.
15 changes: 12 additions & 3 deletions vtg.summary/src/R/dsummary.R
Original file line number Diff line number Diff line change
Expand Up @@ -191,16 +191,25 @@ combine_node_statistics <- function(summary_per_node, columns) {
}

vtg::log$debug("unique_levels: {unique_levels}")
# Initialize global_factor_counts
for (column in names(all_levels)) {
global_factor_counts[[column]] <- list()
for (levels in unique_levels[[column]]) {
for (level in levels) {
global_factor_counts[[column]][[level]] <- 0
}
}
}

vtg::log$debug("Global levels initialized: {global_factor_counts}")
# Compute the factor counts per column
for (node in factor_counts_per_node) {
for (column in factor_columns) {
for (levels in unique_levels[[column]]) {
for (level in levels) {
if (level %in% names(node[[column]])) {
global_factor_counts[[column]][[level]] <- node[[column]][[level]]
} else {
global_factor_counts[[column]][[level]] <- 0
global_factor_counts[[column]][[level]] <-
global_factor_counts[[column]][[level]] + node[[column]][[level]]
}
}
}
Expand Down
16 changes: 9 additions & 7 deletions vtg.summary/src/test.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,23 @@ devtools::load_all("./vtg.preprocessing")
# create fake data. Three columns with random numbers, two columns with factors
set.seed(123L);
columns = c("A", "B", "C", "D", "E")
l <- 250
data <- data.frame(
"A" = sample(1:10, size = 1000, replace = TRUE),
"B" = sample(c(1:3, NA), size= 1000, replace = TRUE),
"C" = sample(c(6:19, NA), size= 1000, replace = TRUE),
"D" = sample(gl(10, 100), size = 1000, replace = TRUE),
"A" = sample(1:10, size = l, replace = TRUE),
"B" = sample(c(1:3, NA), size= l, replace = TRUE),
"C" = sample(c(6:19, NA), size= l, replace = TRUE),
"D" = sample(gl(4, 1), size = l, replace = TRUE),
"E" = sample(as.character(c("female", "male", NA)),
size = 1000, replace = TRUE),
size = l, replace = TRUE),
"F" = sample(as.character(c("other")),
size = 1000, replace = TRUE)
size = l, replace = TRUE)
)


# Split the dataframe into two sets
n_rows <- nrow(data)
set_size <- floor(n_rows / 2)
set_size <- floor(l / 3)
# set_size <- 200

d1 <- data[1:set_size, ]
d2 <- data[(set_size + 1):n_rows, ]
Expand Down

0 comments on commit 198cdc5

Please sign in to comment.