From 682148f4b46f847485293d03002bc330f5e108fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=A1bor=20Cs=C3=A1rdi?= Date: Fri, 18 Oct 2024 13:49:41 +0200 Subject: [PATCH] Allow reading code from file in C It is only slightly faster, though. --- R/scan-dependencies.R | 13 ++++++++++--- src/init.c | 6 ++++-- src/tree-sitter.c | 39 ++++++++++++++++++++++++++++++++++++--- 3 files changed, 50 insertions(+), 8 deletions(-) diff --git a/R/scan-dependencies.R b/R/scan-dependencies.R index e7eb6b1c..3c115f86 100644 --- a/R/scan-dependencies.R +++ b/R/scan-dependencies.R @@ -1,11 +1,18 @@ -code_query <- function(code, query) { - if (is.character(code)) code <- charToRaw(paste(code, collapse = "\n")) +code_query <- function(code = NULL, query, file = NULL) { qlen <- nchar(query, type = "bytes") + 1L # + \n qbeg <- c(1L, cumsum(qlen)) qnms <- names(query) %||% rep(NA_character_, length(query)) query1 <- paste0(query, "\n", collapse = "") - res <- call_with_cleanup(c_code_query, code, query1) + + if (!is.null(code)) { + if (is.character(code)) code <- charToRaw(paste(code, collapse = "\n")) + res <- call_with_cleanup(c_code_query, code, query1) + } else { + res <- call_with_cleanup(c_code_query_path, file, query1) + } + qorig <- as.integer(cut(res[[1]][[3]], breaks = qbeg, include.lowest = TRUE)) + list( patterns = data_frame( id = seq_along(res[[1]][[1]]), diff --git a/src/init.c b/src/init.c index 826a7b6c..0ec30fff 100644 --- a/src/init.c +++ b/src/init.c @@ -5,12 +5,14 @@ #include "cleancall.h" SEXP code_query(SEXP input, SEXP pattern); +SEXP code_query_path(SEXP path, SEXP pattern); SEXP s_expr(SEXP input); static const R_CallMethodDef callMethods[] = { CLEANCALL_METHOD_RECORD, - { "code_query", (DL_FUNC) &code_query, 2 }, - { "s_expr", (DL_FUNC) &s_expr, 1 }, + { "code_query", (DL_FUNC) &code_query, 2 }, + { "code_query_path", (DL_FUNC) &code_query_path, 2 }, + { "s_expr", (DL_FUNC) &s_expr, 1 }, { NULL, NULL, 0 } }; diff --git a/src/tree-sitter.c b/src/tree-sitter.c index 99162618..f5481349 100644 --- a/src/tree-sitter.c +++ b/src/tree-sitter.c @@ -1,3 +1,5 @@ +#include + #define R_NO_REMAP #include "R.h" #include "Rinternals.h" @@ -317,7 +319,7 @@ bool check_predicates(const struct query_match_t *qm) { return true; } -SEXP code_query(SEXP input, SEXP pattern) { +SEXP code_query_c(const char *c_input, uint32_t length, SEXP pattern) { const TSLanguage *rlang = NULL; TSParser *parser = NULL; @@ -369,8 +371,6 @@ SEXP code_query(SEXP input, SEXP pattern) { r_call_on_exit(r_free, capture_map_pattern); memset(capture_map_pattern, 0, sizeof(uint32_t) * capture_count); - const char *c_input = (const char*) RAW(input); - uint32_t length = Rf_length(input); TSTree *tree = ts_parser_parse_string(parser, NULL, c_input, length); r_call_on_exit((cleanup_fn_t) ts_tree_delete, tree); TSNode root = ts_tree_root_node(tree); @@ -479,3 +479,36 @@ SEXP code_query(SEXP input, SEXP pattern) { UNPROTECT(3); return result; } + +SEXP code_query(SEXP input, SEXP pattern) { + const char *c_input = (const char*) RAW(input); + uint32_t length = Rf_length(input); + return code_query_c(c_input, length, pattern); +} + +SEXP code_query_path(SEXP path, SEXP pattern) { + const char *cpath = CHAR(STRING_ELT(path, 0)); + FILE *fp = fopen(cpath, "rb"); + if (fp == NULL) { + Rf_error("Cannot open path %s", cpath); + } + + fseek(fp, 0, SEEK_END); // seek to end of file + size_t file_size = ftell(fp); // get current file pointer + rewind(fp); + + char *buf = malloc(file_size); + if (!buf) { + fclose(fp); + Rf_error("Cannot allocate memory for file %s", cpath); + } + r_call_on_exit(r_free, buf); + + if ((fread(buf, 1, file_size, fp)) != file_size) { + fclose(fp); + Rf_error("Error reading file: %s", cpath); + } + fclose(fp); + + return code_query_c(buf, file_size, pattern); +}