jhudsl · howardbaik · Jun 28, 2024 · Jun 28, 2024 · Jul 9, 2024 · Jul 9, 2024
diff --git a/R/find_issue.R b/R/find_issue.R
@@ -0,0 +1,24 @@
+find_issue <- function(repo, github_pat) {
+  # Argument check
+  if (!is.character(repo)) {
+    repo <- as.character(repo)
+  }
+
+  result <- httr::GET(
+    paste0("https://api.github.com/repos/", repo, "/issues"),
+    httr::add_headers(Authorization = paste0("Bearer ", github_pat)),
+    httr::accept_json()
+  )
+
+  if (httr::status_code(result) != 200) {
+    httr::stop_for_status(result)
+  }
+
+  # Process and return results
+  result_content <- httr::content(result, "text")
+  result_list <- jsonlite::fromJSON(result_content)
+
+  issue_exists <- length(grep('Broken URLs found in the course!', result_list$title))
+
+  issue_exists
+}
diff --git a/R/spell_check.R b/R/spell_check.R
@@ -0,0 +1,59 @@
+spell_check <- function(variables) {
+
+  # Find .git root directory
+  root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
+
+  # Set up output file directory
+  output_file <- file.path(root_dir, 'check_reports', 'spell_check_results.tsv')
+
+  if (!dir.exists('check_reports')) {
+    dir.create('check_reports')
+  }
+
+  # Read in dictionary
+  dict_file <- file.path(root_dir, 'resources', 'dictionary.txt')
+  dictionary <- readLines(dict_file)
+
+  # Declare exclude_files.txt
+  exclude_file <- file.path(root_dir, 'resources', 'exclude_files.txt')
+
+  # Read in exclude_files.txt if it exists
+  if (file.exists(exclude_file)) {
+    exclude_file <- readLines(exclude_file)
+  } else {
+    exclude_file <- ""
+  }
+
+  # Only declare `.Rmd` files but not the ones in the style-sets directory
+  files <- list.files(pattern = 'md$', recursive = TRUE, full.names = TRUE)
+
+  if( exclude_file[1] != "") files <- grep(paste0(exclude_file, collapse = "|"), files, invert = TRUE, value = TRUE)
+
+  tryCatch(
+    expr = {
+      # Run spell check
+      sp_errors <- spelling::spell_check_files(files, ignore = dictionary)
+
+      if (nrow(sp_errors) > 0) {
+        sp_errors <- sp_errors %>%
+          data.frame() %>%
+          tidyr::unnest(cols = found) %>%
+          tidyr::separate(found, into = c("file", "lines"), sep = ":")
+      } else {
+        sp_errors <- data.frame(errors = NA)
+      }
+    },
+    error = function(e){
+      message("Spell check did not work. Check that your dictionary is formatted correctly. You cannot have special characters (e.g., Diné) in the dictionary.txt file. You need to use HTML formatting (e.g., Din&eacute;) for these.")
+    }
+  )
+
+  # Print out how many spell check errors
+  write(nrow(sp_errors), stdout())
+
+  # Save spell errors to file temporarily
+  readr::write_tsv(sp_errors, output_file)
+
+  message(paste0("Saved to: ", output_file))
+
+}
diff --git a/R/url_check.R b/R/url_check.R
@@ -0,0 +1,60 @@
+#' Check URLs and create a summary of the URL checks
+#'
+#' @return A data frame containing three columns: `urls`, `urls_status`, `file`
+#' @export
+url_check <- function() {
+  # Find .git root directory
+  root_dir <- rprojroot::find_root(rprojroot::has_dir(".git"))
+
+  output_file <- file.path(root_dir, 'check_reports', 'url_checks.tsv')
+  # If check_reports folder doesn't exist, create
+  if (!dir.exists('check_reports')) {
+    dir.create('check_reports')
+  }
+
+  # Declare ignore_urls file
+  ignore_urls_file <- system.file("extdata", "checks", "ignore-urls.txt", package = "ottrpal")
+  # Declare exclude_files.txt
+  exclude_file <- system.file("extdata", "checks", "exclude_files.txt", package = "ottrpal")
+
+  # Read in ignore urls file if it exists
+  if (file.exists(ignore_urls_file)) {
+    ignore_urls <- readLines(ignore_urls_file)
+  } else {
+    ignore_urls <- ""
+  }
+
+  # Read in ignore urls file if it exists
+  if (file.exists(exclude_file)) {
+    exclude_file <- readLines(exclude_file)
+  } else {
+    exclude_file <- ""
+  }
+
+  # Only declare `.md` files but not the ones in the style-sets directory
+  files <- list.files(pattern = 'md$', full.names = TRUE, recursive = TRUE)
+  if( exclude_file[1] != "") files <- grep(paste0(exclude_file, collapse = "|"), files, invert = TRUE, value = TRUE)
+
+
+  # Run this for all Rmds
+  all_urls <- lapply(files, get_urls, ignore_urls)
+
+  # Write the file
+  all_urls_df <- dplyr::bind_rows(all_urls) %>%
+    dplyr::filter(!(urls %in% ignore_urls))
+
+  if (nrow(all_urls_df) > 0) {
+    all_urls_df <- all_urls_df %>%
+      dplyr::filter(urls_status == "failed") %>%
+      readr::write_tsv(output_file)
+  } else {
+    all_urls_df <- data.frame(errors = NA)
+  }
+
+  # Print out how many spell check errors
+  write(nrow(all_urls_df), stdout())
+
+  # Save spell errors to file temporarily
+  readr::write_tsv(all_urls_df, output_file)
+  message(paste0("Saved to: ", output_file))
+}
diff --git a/R/utils.R b/R/utils.R
@@ -137,6 +137,113 @@ add_footer <- function(rmd_path, footer_text = NULL) {
   )
 }
 
+test_url <- function(url, ignore_urls) {
+
+  if (url %in% ignore_urls) {
+    message(paste0("Ignoring: ", url))
+    return("ignored")
+  }
+
+  message(paste0("Testing: ", url))
+
+  url_status <- try(httr::GET(url), silent = TRUE)
+
+  # Fails if host can't be resolved
+  status <- ifelse(suppressMessages(grepl("Could not resolve host", url_status)), "failed", "success")
+
+  if (status == "success") {
+    # Fails if 404'ed
+    status <- ifelse(try(url_status$status_code, silent = TRUE) == 404, "failed", "success")
+  }
+
+  return(status)
+}
+
+get_urls <- function(file, ignore_urls) {
+
+  message(paste("##### Testing URLs from file:", file))
+
+  # Read in a file and return the urls from it
+  content <- readLines(file)
+
+  # Set up the possible tags
+  html_tag <- "<a href="
+  include_url_tag <- "include_url\\("
+  include_slide_tag <- "include_slide\\("
+  markdown_tag <- "\\[.*\\]\\(http[s]?.*\\)"
+  markdown_tag_bracket <- "\\[.*\\]: http[s]?"
+  http_gen <- "http[s]?"
+  url_pattern <- "[(|<]?http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
+
+  # Other patterns
+  nested_parens <- "\\((.*)\\((.*)\\)(.*)\\)"
+  outermost_parens <- "^\\((.*)\\)(.*)$"
+
+  # Collect the different kinds of tags in a named vector
+  all_tags <- c(html = html_tag,
+                knitr = include_url_tag,
+                ottrpal = include_slide_tag,
+                markdown = markdown_tag,
+                markdown_bracket = markdown_tag_bracket,
+                other_http = http_gen)
+
+  url_list <- sapply(all_tags, grep, content, value = TRUE)
+  url_list$other_http <- setdiff(url_list$other_http, unlist(url_list[-6]))
+
+  # Extract the urls only of each type
+  if (length(url_list$html) > 0 ){
+    url_list$html <- sapply(url_list$html, function(html_line) {
+      head(rvest::html_attr(rvest::html_nodes(rvest::read_html(html_line), "a"), "href"))
+    })
+    url_list$html <- unlist(url_list$html)
+  }
+  url_list$knitr <- stringr::word(url_list$knitr, sep = "include_url\\(\"|\"\\)", 2)
+  url_list$ottrpal <- stringr::word(url_list$ottrpal, sep = "include_slide\\(\"|\"\\)", 2)
+
+  # Check markdown for parentheticals outside of [ ]( )
+  parens_index <- sapply(url_list$markdown, stringr::str_detect, nested_parens)
+
+  if (length(parens_index) >= 1) {
+    # Break down to parenthetical only
+    url_list$markdown[parens_index] <- stringr::str_extract(url_list$markdown[parens_index], nested_parens)
+    # Remove parentheticals outside [ ]( )
+    url_list$markdown[parens_index] <- stringr::word(stringr::str_replace(url_list$markdown[parens_index], outermost_parens, "\\1"), sep = "\\]", 2)
+
+    url_list$markdown[!parens_index] <- stringr::word(url_list$markdown[!parens_index], sep = "\\]", 2)
+    url_list$markdown <- grep("http", url_list$markdown, value = TRUE)
+  }
+  if (length(url_list$markdown_bracket) > 0 ){
+    url_list$markdown_bracket <- paste0("http", stringr::word(url_list$markdown_bracket, sep = "\\]: http", 2))
+  }
+  url_list$other_http <- stringr::word(stringr::str_extract(url_list$other_http, url_pattern), sep = "\\]", 1)
+
+  # Remove parentheses only if they are on the outside
+  url_list$other_http <- stringr::word(stringr::str_replace(url_list$other_http, outermost_parens, "\\1"), sep = "\\]", 1)
+  url_list$markdown <- stringr::word(stringr::str_replace(url_list$markdown, outermost_parens, "\\1"), sep = "\\]", 1)
+
+  # Remove `< >`
+  url_list$other_http <- stringr::word(stringr::str_replace(url_list$other_http, "^<(.*)>(.*)$", "\\1"), sep = "\\]", 1)
+
+  # If after the manipulations there's not actually a URL, remove it.
+  url_list <- lapply(url_list, na.omit)
+
+  # collapse list
+  urls <- unlist(url_list)
+
+  # Remove trailing characters
+  urls <- gsub("\\'\\:$|\\'|\\:$|\\.$", "", urls)
+
+  if (length(urls) > 0 ){
+    # Remove trailing characters
+    urls_status <- sapply(urls, test_url, ignore_urls)
+    url_df <- data.frame(urls, urls_status, file)
+    return(url_df)
+  } else {
+    message("No URLs found")
+  }
+}
+
+
 #' Pipe operator
 #'
 #' See \code{magrittr::\link[magrittr:pipe]{\%>\%}} for details.

diff --git a/inst/extdata/checks/dictionary.txt b/inst/extdata/checks/dictionary.txt
@@ -0,0 +1,67 @@
+AnVIL
+Audiographer
+Audiography
+automagic
+BIPOC
+Bloomberg
+Bookdown
+chapt
+Coursera
+creativecommons
+css
+Datatrail
+DataTrail
+DISCOVERABLE
+Dockerfile
+Dockerhub
+dropdown
+enforceability
+faq
+favicon
+FirstName
+Funder
+fyi
+GDSCN
+Generis
+GH
+Github
+GitHub
+https
+ies
+iframe
+immunities
+impactful
+ITCR
+itcrtraining
+ITN
+LastName
+Leanpub
+Licensor
+LICENSOR
+Markua
+mentorship
+MERCHANTABILITY
+Muschelli
+NCI
+NHGRI
+OTTR
+ottrpal
+ottrproject
+Pandoc
+png
+pre
+reproducibility
+rmarkdown
+RMarkdown
+Savonen
+sexualized
+socio
+synched
+tada
+UE
+UE5
+underserved
+Videographer
+Videography
+WIPO	
+www
diff --git a/inst/extdata/checks/exclude_files.txt b/inst/extdata/checks/exclude_files.txt
@@ -0,0 +1,8 @@
+About.Rmd
+docs/*
+style-sets/*
+manuscript/*
+CONTRIBUTING.md
+LICENSE.md
+code_of_conduct.md
+README.md
diff --git a/inst/extdata/checks/ignore-urls.txt b/inst/extdata/checks/ignore-urls.txt
@@ -0,0 +1,5 @@
+https://www.someurl.html
+https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0].
+https://www.contributor-covenant.org/faq][FAQ].
+https://www.contributor-covenant.org/translations][translations].
+https://github.com/jhudsl/OTTR_Template/issues/new/choose)!