From 66155e06a054d1924d31cfd6a5c3f29ffaf77404 Mon Sep 17 00:00:00 2001 From: James Laird-Smith Date: Sun, 15 Oct 2023 19:41:20 +0100 Subject: [PATCH] Git fetch now robust to tree order As it stands, code for unpacking of git repo packfiles is sensitive to the order of the trees. This doesn't matter for servers that provide it in the order of deepest roots first, but for those that don't do that, unpacking will provide incorrect results. This problem is the case for repos hosted on Azure DevOps, for example. This change uses the frequency of the hashes throught the packfile to sort the trees from deepest (the root tree) to least deep. This is the most straightforward way I could think of for achieving this. Practically, the function for unpacking the packfile (`unpack_packfile_repo()`) now includes a call to a new function `sort_trees()`. --- R/git-protocol.R | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/R/git-protocol.R b/R/git-protocol.R index 1efa5e49..e9e0d4df 100644 --- a/R/git-protocol.R +++ b/R/git-protocol.R @@ -504,9 +504,32 @@ async_git_download_repo_sha <- function(url, sha, output) { then(function(packfile) unpack_packfile_repo(packfile, output)) } +filter_hashes <- function(leaf) { + out <- list() + obs <- leaf$object + obs <- obs[obs$type == "tree", "hash"] + out[["hashes"]] <- obs + out$hash <- leaf$hash + out +} + +sort_trees <- function(trees){ + hashes_tree <- lapply(trees, filter_hashes) + flat_hashes <- character() + for(i in seq_along(hashes_tree)){ + flat_hashes <- append(flat_hashes, hashes_tree[[i]]$hash) + flat_hashes <- append(flat_hashes, hashes_tree[[i]]$hashes) + } + flat_hashes_df <- as.data.frame(table(flat_hashes)) + new_idx <- as.character(flat_hashes_df[order(flat_hashes_df$Freq), + "flat_hashes"]) + trees[new_idx] +} + unpack_packfile_repo <- function(parsed, output) { types <- unname(vcapply(parsed, "[[", "type")) trees <- parsed[types == "tree"] + trees <- sort_trees(trees) done <- logical(length(trees)) idx <- 1L wd <- character()