From e82fff497aba5d7abf627c2c14c347f92f5324fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Cs=C3=A1rdi?= Date: Mon, 21 Oct 2024 12:32:54 +0200 Subject: [PATCH] Scan dependencies in Rmd files --- R/scan-deps-queries.R | 21 +++++++++++++++ R/scan-deps.R | 62 +++++++++++++++++++++++++++++++++++++++++-- R/utils.R | 5 ++++ 3 files changed, 86 insertions(+), 2 deletions(-) diff --git a/R/scan-deps-queries.R b/R/scan-deps-queries.R index 28b1fb38..75ab0472 100644 --- a/R/scan-deps-queries.R +++ b/R/scan-deps-queries.R @@ -30,3 +30,24 @@ q_deps <- function() { NULL ) } + +q_deps_rmd <- function() { + c(block = + '(fenced_code_block + (fenced_code_block_delimiter) + (info_string (language) @language) @header + (code_fence_content) @content + (#any-of? @language "r" "R" "rscript" "Rscript") + (#match? @header "^[{]") + )', + inline = + '(inline) @inline' + ) +} + +q_deps_rmd_inline <- function() { + '(code_span + (code_span_delimiter) @csd1 + (code_span_delimiter) @csd2 + ) @code' +} diff --git a/R/scan-deps.R b/R/scan-deps.R index 9ac0730c..074bc02b 100644 --- a/R/scan-deps.R +++ b/R/scan-deps.R @@ -31,6 +31,8 @@ clear_deps_cache <- function() { unlink(dirname(get_deps_cache_path()), recursive = TRUE) } +re_r_dep <- "library|require|loadNamespace|::" + scan_path_deps <- function(path) { code <- readBin(path, "raw", file.size(path)) @@ -46,7 +48,7 @@ scan_path_deps <- function(path) { } # scan it if it is worth it, based on a quick check - has_deps <- length(grepRaw("library|require|loadNamespace|::", code)) > 0 + has_deps <- length(grepRaw(re_r_dep, code)) > 0 deps <- if (has_deps) scan_path_deps_do(code, path) # save it to the cache, but anonimize it first. If no deps, save NULL @@ -150,5 +152,61 @@ parse_pkg_from_library_call <- function(fn, code) { # ------------------------------------------------------------------------- scan_path_deps_do_rmd <- function(code, path) { - # TODO + hits <- code_query(code, language = "markdown", query = q_deps_rmd()) + inl_pat <- hits$patterns$id[hits$patterns$name == "inline"] + inl_hits <- hits$matched_captures[ + hits$matched_captures$pattern %in% inl_pat, ] + blk_hits <- hits$matched_captures[ + ! hits$matched_captures$pattern %in% inl_pat, ] + rbind( + if (nrow(inl_hits)) scan_path_deps_do_inline_hits(code, inl_hits, path), + if (nrow(blk_hits)) scan_path_deps_do_block_hits(code, blk_hits, path) + ) +} + +range_cols <- c( + "start_row", "start_column", "end_row", "end_column", + "start_byte", "end_byte" +) + +scan_path_deps_do_inline_hits <- function(code, inl_hits, path) { + wcnd <- which(inl_hits$name == "inline") + wcnd <- wcnd[grepl("`", inl_hits$code[wcnd], fixed = TRUE)] + wcnd <- wcnd[grepl(re_r_dep, inl_hits$code[wcnd])] + if (length(wcnd) == 0) { + return(NULL) + } + + inl_ranges <- inl_hits[wcnd, range_cols] + r_hits <- code_query( + code, + language = "markdown-inline", + ranges = inl_ranges, + query = q_deps_rmd_inline() + ) + cpt <- r_hits$matched_captures + pre_drop <- nchar(cpt$code[cpt$name == "csd1"]) + post_drop <- nchar(cpt$code[cpt$name == "csd2"]) + r_code <- omit_pre_post(cpt$code[cpt$name == "code"], pre_drop, post_drop) + wcnd2 <- substr(r_code, 1, 2) == "r " & grepl(re_r_dep, r_code) + if (!any(wcnd2)) { + return(NULL) + } + # need to adjust the ranges for the _ASCII_ (!) delimiters + r_ranges <- cpt[cpt$name == "code", ][wcnd2, range_cols] + r_ranges$start_byte <- r_ranges$start_byte + pre_drop[wcnd2] + 2L # 'r ' + r_ranges$start_column <- r_ranges$start_column + pre_drop[wcnd2] + 2L + r_ranges$end_byte <- r_ranges$end_byte - post_drop[wcnd2] + scan_path_deps_do_r(code, path = path, ranges = r_ranges) +} + +scan_path_deps_do_block_hits <- function(code, blk_hits, path) { + wcnd <- which(blk_hits$name == "content") + wcnd <- wcnd[grepl(re_r_dep, blk_hits$code[wcnd])] + if (length(wcnd) == 0) { + return(NULL) + } + + r_ranges <- blk_hits[wcnd, range_cols] + scan_path_deps_do_r(code, path = path, ranges = r_ranges) } diff --git a/R/utils.R b/R/utils.R index 3dd999e3..2a294f79 100644 --- a/R/utils.R +++ b/R/utils.R @@ -507,3 +507,8 @@ na_omit <- function(x) { file_ext <- function(x) { re_match(x, "[.]([[:alnum:]]+)$")[[".match"]] } + +# drop a prefix and a postfix, vectorized +omit_pre_post <- function(x, pre = 0, post = 0) { + substr(x, 1L + pre, nchar(x) - post) +}