Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix sharedString deletion #515

Merged
merged 3 commits into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 46 additions & 22 deletions R/wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -1900,45 +1900,69 @@ deleteDataColumn <- function(wb, sheet, col) {
}

a <- wb$worksheets[[sheet]]$sheet_data

# t: if a shared string is used or if the string is a value itself
# v: the shared string index or the string itself (if t == 0)
# in wb$sharedStrings we find the values of the shared strings by index + 1 (0 indexed!)


# check which elements to delete
keep <- a$cols != col
# if there is no column to delete, exit early
if (all(keep)) return(invisible(0))

# delete cols in cols "col" move higher cols one down
# delete cols in cols "col", move higher cols one down
a$cols <- as.integer(a$cols[keep] - 1 * (a$cols[keep] > col))
a$rows <- a$rows[keep]

# reduce the shared strings pointers if they are not used anymore
has_t <- !is.na(a$t) & a$t == 1
used_shared <- a$v[has_t] # a reference to all shared strings
keep_t <- keep[has_t] # these shared strings are kept
keep_t[is.na(keep_t)] <- FALSE
keep_shared <- used_shared[keep_t]
rem_shared <- setdiff(unique(used_shared[!keep_t]), unique(keep_shared))
for (v in rem_shared) {
to_reduce <- as.numeric(keep_shared) > as.numeric(v)
to_reduce[is.na(to_reduce)] <- FALSE
if (any(to_reduce))
keep_shared[to_reduce] <- as.character(as.numeric(keep_shared[to_reduce]) - 1)
}
used_shared[keep_t] <- keep_shared
a$v[has_t] <- used_shared

ss <- data.frame(old = numeric(0), new = numeric(0), string = character(0))
if (length(wb$sharedStrings) > 0) {
ss <- data.frame(
# the old index 0 indexed, as used in a$v
old = as.numeric(seq(length(wb$sharedStrings)) - 1),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe seq_along(wb$sharedStrings) - 1

# will hold the new index 0 indexed, as used in a$v
new = NA,
# the actual strings
string = wb$sharedStrings
)
}

# 1. remove the values from sheet_data (a)
a$v <- a$v[keep]
a$t <- a$t[keep]


# update the shared strings map (ss) with the new indices

# v_this_sheet etc are the indices that are still used
v_this_sheet <- as.numeric(a$v[!is.na(a$t) & a$t == 1])
# get all string indices from other sheets, so that strings used in other sheets are not deleted!
v_other_sheets <- unlist(lapply(setdiff(seq_along(wb$worksheets), sheet), function(sh) {
a <- wb$worksheets[[sh]]$sheet_data
as.numeric(a$v[!is.na(a$t) & a$t == 1])
}))

idx <- sort(unique(c(v_this_sheet, v_other_sheets)))
ss$new[ss$old %in% idx] <- seq_along(idx) - 1

# 2. remove the values from the sharedStrings object
wb$sharedStrings <- wb$sharedStrings[idx + 1]
attr(wb$sharedStrings, "uniqueCount") <- length(idx)

# 3. reindex the values from the sheet_data to use new shared strings indices
a$v[a$t == 1] <- as.character(ss$new[as.numeric(a$v[a$t == 1]) + 1])

# update the shared strings for all other sheets
for (s in setdiff(seq_along(wb$worksheets), sheet)) {
a <- wb$worksheets[[s]]$sheet_data
a$v[a$t == 1] <- as.character(ss$new[as.numeric(a$v[a$t == 1]) + 1])
}

a$f <- updateFormula(a$f[keep], col = col)
a$n_elements <- sum(keep)

if ("data_count" %in% names(a)) a$data_count <- length(unique(a$v))

# remove the unneeded strings from sharedStrings
rv <- as.numeric(rem_shared) + 1
wb$sharedStrings <- wb$sharedStrings[-rv]
attr(wb$sharedStrings, "uniqueCount") <- length(unique(wb$sharedStrings))

# adjust styles
sheet_name <- wb$sheet_names[[sheet]]
this_sheet <- sapply(wb$styleObjects, function(o) {
Expand Down
117 changes: 113 additions & 4 deletions tests/testthat/test-wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ test_that("int2col and col2int", {
test_that("deleteDataColumn basics", {
wb <- createWorkbook()
addWorksheet(wb, "tester")

JanMarvin marked this conversation as resolved.
Show resolved Hide resolved
for (i in seq(5)) {
mat <- data.frame(x = rep(paste0(int2col(i), i), 10))
writeData(wb, sheet = 1, startRow = 1, startCol = i, mat)
Expand All @@ -29,8 +29,8 @@ test_that("deleteDataColumn basics", {
c("<f>=COUNTA(A2:A11)</f>", "<f>=COUNTA(B2:B11)</f>", "<f>=COUNTA(C2:C11)</f>",
"<f>=COUNTA(D2:D11)</f>", "<f>=COUNTA(E2:E11)</f>")
)


deleteDataColumn(wb, 1, col = 3)
expect_equal(read.xlsx(wb),
data.frame(x = rep("A1", 10), x = "B2", x = "D4", x = "E5", # no C3!
Expand Down Expand Up @@ -130,7 +130,7 @@ test_that("deleteDataColumn with formatting data", {

st <- openxlsx::createStyle(textDecoration = "Bold", fontSize = 20, fontColour = "red")
openxlsx::addStyle(wb, 1, style = st, rows = 1, cols = seq(ncol(df)))

sst <- wb$styleObjects[[1]]
sst$rows <- c(1, 1)
sst$cols <- c(1, 2)
Expand All @@ -140,3 +140,112 @@ test_that("deleteDataColumn with formatting data", {
expect_equal(wb$styleObjects[[1]],
sst)
})

test_that("deleteDataColumn with shared strings does not crash or change inputs", {
df <- data.frame("Col 1" = "Row 2 Col 1",
"Col 2" = "Row 2 Col 2",
"Col 3" = "Row 2 Col 3",
check.names = FALSE)

wb <- createWorkbook()
addWorksheet(wb, "tester")
writeData(wb, sheet = 1, startRow = 1, startCol = 1, x = df, colNames = TRUE)

deleteDataColumn(wb, sheet = 1, col = 2)

expect_equal(
wb$sharedStrings,
structure(
list(
"<si><t xml:space=\"preserve\">Col 1</t></si>",
"<si><t xml:space=\"preserve\">Col 3</t></si>",
"<si><t xml:space=\"preserve\">Row 2 Col 1</t></si>",
"<si><t xml:space=\"preserve\">Row 2 Col 3</t></si>"
),
uniqueCount = 4L
)
)
expect_equal(
read.xlsx(wb),
data.frame(
"Col 1" = "Row 2 Col 1",
"Col 3" = "Row 2 Col 3"
)
)
})


test_that("deleteDataColumn with shared strings on other sheets", {
df <- data.frame("ABC" = "I am a shared string with sheet 2!")
df2 <- data.frame("AB" = "I am a shared string with sheet 2!")

wb <- createWorkbook()
addWorksheet(wb, "tester")
writeData(wb, sheet = 1, startRow = 1, startCol = 1, x = df, colNames = TRUE)

simplify <- function(sd) data.frame(rows = sd$rows, cols = sd$cols, t = sd$t, v = sd$v)
expect_equal(
simplify(wb$worksheets[[1]]$sheet_data),
data.frame(rows = c(1, 2), cols = 1, t = 1, v = c("0", "1"))
)
expect_equal(
wb$sharedStrings,
structure(
list(
"<si><t xml:space=\"preserve\">ABC</t></si>",
"<si><t xml:space=\"preserve\">I am a shared string with sheet 2!</t></si>"
),
uniqueCount = 2L
)
)

addWorksheet(wb, "tester2")
writeData(wb, sheet = 2, startRow = 1, startCol = 1, x = df2, colNames = TRUE)

expect_equal(
simplify(wb$worksheets[[2]]$sheet_data),
data.frame(rows = c(1, 2), cols = 1, t = 1, v = c("2", "1"))
)
expect_equal(
wb$sharedStrings,
structure(
list(
"<si><t xml:space=\"preserve\">ABC</t></si>",
"<si><t xml:space=\"preserve\">I am a shared string with sheet 2!</t></si>",
"<si><t xml:space=\"preserve\">AB</t></si>"
),
uniqueCount = 3L
)
)


# deleting from sheet 1 does not delete the string from sheet 2!
deleteDataColumn(wb, sheet = 1, col = 1)

expect_equal(
simplify(wb$worksheets[[1]]$sheet_data),
data.frame(rows = numeric(0), cols = numeric(0), t = numeric(0), v = character(0))
)

# note on sheet 2, the indices v to the shared strings have to change as well!
expect_equal(
simplify(wb$worksheets[[2]]$sheet_data),
data.frame(rows = c(1, 2), cols = 1, t = 1, v = c("1", "0"))
)

expect_equal(
wb$sharedStrings,
structure(
list(
"<si><t xml:space=\"preserve\">I am a shared string with sheet 2!</t></si>",
"<si><t xml:space=\"preserve\">AB</t></si>"
),
uniqueCount = 2L
)
)

expect_equal(
read.xlsx(wb, sheet = 2),
data.frame(AB = "I am a shared string with sheet 2!")
)
})
Loading