jhudsl · cansavvy · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024 · Dec 20, 2024
diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml
@@ -32,14 +32,6 @@ jobs:
           remotes::install_cran("rcmdcheck")
         shell: Rscript {0}
 
-      - name: Check
-        env:
-          GH_PAT: ${{ secrets.GH_PAT }}
-        run: |
-          options(crayon.enabled = TRUE)
-          rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), check_dir = "check")
-        shell: Rscript {0}
-
       - name: Check package
         uses: r-lib/actions/check-r-package@v2
         with:

diff --git a/NAMESPACE b/NAMESPACE
@@ -11,6 +11,8 @@ export(check_quiz)
 export(check_quiz_attributes)
 export(check_quiz_question_attributes)
 export(check_quizzes)
+export(check_spelling)
+export(check_urls)
 export(clean_up)
 export(convert_coursera_quizzes)
 export(convert_quiz)
@@ -30,6 +32,7 @@ export(get_pages_url)
 export(get_repo_info)
 export(get_slide_id)
 export(get_slide_page)
+export(get_urls)
 export(good_quiz_path)
 export(gs_id_from_slide)
 export(gs_png_download)
@@ -46,10 +49,12 @@ export(pptx_slide_text_df)
 export(qrmd_files)
 export(render_without_toc)
 export(setup_ottr_template)
+export(test_url)
 export(unzip_pptx)
 export(website_to_embed_leanpub)
 export(xml_notes)
 import(dplyr)
+import(stringr)
 importFrom(gitcreds,gitcreds_get)
 importFrom(httr,GET)
 importFrom(httr,accept_json)
@@ -64,7 +69,13 @@ importFrom(magrittr,"%>%")
 importFrom(readr,write_tsv)
 importFrom(rprojroot,find_root)
 importFrom(rprojroot,has_dir)
+importFrom(rvest,html_attr)
+importFrom(rvest,html_nodes)
+importFrom(rvest,read_html)
+importFrom(spelling,spell_check_files)
 importFrom(stringr,str_to_title)
+importFrom(tidyr,separate)
+importFrom(tidyr,unnest)
 importFrom(utils,browseURL)
 importFrom(utils,download.file)
 importFrom(utils,installed.packages)

diff --git a/R/auth.R b/R/auth.R
@@ -16,7 +16,6 @@
 #' authorize("github")
 #'
 #' authorize("google")
-#'
 #' }
 authorize <- function(app_name = NULL,
                       cache = FALSE,
@@ -242,11 +241,11 @@ find_scopes <- function(app_name) {
   ### Declare all the scopes
   scopes <- list(
     google = c(
-    "https://www.googleapis.com/auth/drive",
-    "https://www.googleapis.com/auth/drive.file",
-    "https://www.googleapis.com/auth/drive.readonly",
-    "https://www.googleapis.com/auth/presentations",
-    "https://www.googleapis.com/auth/presentations.readonly"
+      "https://www.googleapis.com/auth/drive",
+      "https://www.googleapis.com/auth/drive.file",
+      "https://www.googleapis.com/auth/drive.readonly",
+      "https://www.googleapis.com/auth/presentations",
+      "https://www.googleapis.com/auth/presentations.readonly"
     ),
     github = c("repo")
   )

diff --git a/R/book_txt.R b/R/book_txt.R
@@ -1,4 +1,3 @@
-
 #' Create Book.txt file from files existing in quiz directory
 #'
 #' @param path path to the bookdown or quarto course repository, must have a `_bookdown.yml` or `_quarto.yml` file
@@ -84,8 +83,7 @@ course_to_book_txt <- function(path = ".",
 #' @export
 #'
 qrmd_files <- function(path = ".") {
-
-  yaml <- list.files(path = path, pattern ="_bookdown.yml|_quarto.yml", full.names = TRUE)
+  yaml <- list.files(path = path, pattern = "_bookdown.yml|_quarto.yml", full.names = TRUE)
 
   spec <- yaml::read_yaml(yaml)
 
@@ -95,7 +93,7 @@ qrmd_files <- function(path = ".") {
     rmd_files <- spec$rmd_files
   }
   if (basename(yaml) == "_quarto.yml") {
-   qmd_files <- grep(".qmd", unlist(spec$book$chapters), value = TRUE)
+    qmd_files <- grep(".qmd", unlist(spec$book$chapters), value = TRUE)
   }
   if (length(rmd_files) > 0 && length(qmd_files) > 0) stop("Both qmd and rmd files are found. Not sure what format to expect")
 

diff --git a/R/get_data.R b/R/get_data.R
@@ -46,19 +46,19 @@ setup_ottr_template <- function(dir = ".", type, render = TRUE) {
 
   ## Render it
   if (render) {
-  if (type == "rmd") bookdown::render_book(output_dir)
-  if (type == "rmd_website") rmarkdown::render_site(output_dir)
-
-  if (type == "quarto" | type == "quarto_website") {
-    quarto::quarto_render(output_dir, as_job = FALSE)
-  }
-  if (type == "quarto") {
-    quarto::quarto_render(output_dir,
-      metadata = list(sidebar = F, toc = F),
-      quarto_args = c("--output-dir", "docs/no_toc/"),
-      as_job = FALSE
-    )
-  }
+    if (type == "rmd") bookdown::render_book(output_dir)
+    if (type == "rmd_website") rmarkdown::render_site(output_dir)
+
+    if (type == "quarto" | type == "quarto_website") {
+      quarto::quarto_render(output_dir, as_job = FALSE)
+    }
+    if (type == "quarto") {
+      quarto::quarto_render(output_dir,
+        metadata = list(sidebar = F, toc = F),
+        quarto_args = c("--output-dir", "docs/no_toc/"),
+        as_job = FALSE
+      )
+    }
   }
   return(output_dir)
 }

diff --git a/R/github_handling.R b/R/github_handling.R
@@ -136,7 +136,6 @@ get_pages_url <- function(repo_name,
 get_repo_info <- function(repo_name,
                           token = NULL,
                           verbose = FALSE) {
-
   # Try to get credentials other way
   if (is.null(token)) {
     # Get auth token
@@ -205,8 +204,7 @@ get_repo_info <- function(repo_name,
 #'
 #' authorize("github")
 #' check_git_repo("jhudsl/OTTR_Template")
-#'
-#'}
+#' }
 check_git_repo <- function(repo_name,
                            token = NULL,
                            silent = TRUE,

diff --git a/R/google_slides.R b/R/google_slides.R
@@ -78,7 +78,6 @@ get_slide_page <- function(url) {
 #' @param output_dir path to output png
 #' @param overwrite should the slide PNG be overwritten?
 gs_png_download <- function(url, output_dir = ".", overwrite = TRUE) {
-
   id <- get_slide_id(url)
   slide_id <- get_slide_page(url)
   url <- gs_png_url(url)

diff --git a/R/leanpub.R b/R/leanpub.R
@@ -6,7 +6,7 @@
 #' Column names `url`, `chapt_title`, and `img_path` must be used.
 #' If no chapter title column supplied, the basename of the url will be used,
 #' If no image column supplied, default image used.
-#' @param clean_up Should the previous docs and manuscript folder be cleaned up? 
+#' @param clean_up Should the previous docs and manuscript folder be cleaned up?
 #' @param html_page The file path of the rendered index.html file
 #' @param base_url The base url of where the chapters are published -- the url to provide to the iframe in Leanpub
 #' e.g. https://jhudatascience.org/OTTR_Template/coursera
@@ -52,15 +52,13 @@ website_to_embed_leanpub <- function(path = ".",
                                      remove_resources_start = FALSE,
                                      verbose = TRUE,
                                      footer_text = "") {
-
   # Find the OTTR course
   root_dir <- course_path(path = path)
 
   rooted_output_dir <- file.path(root_dir, output_dir)
   rooted_quiz_dir <- file.path(root_dir, quiz_dir)
 
   if (clean_up) {
-
     if (dir.exists(rooted_output_dir)) {
       message(paste("Clearing out old version of output files:", rooted_output_dir))
 
@@ -271,7 +269,6 @@ make_embed_markdown <- function(path = ".",
 get_chapters <- function(path = ".",
                          html_page = file.path("docs", "index.html"),
                          base_url = ".") {
-
   # Put this relative to project path
   html_page <- file.path(root_dir, html_page)
 

diff --git a/R/quiz_formatting.R b/R/quiz_formatting.R
@@ -844,7 +844,6 @@ check_quizzes <- function(path = ".",
                           write_report = TRUE,
                           verbose = TRUE,
                           ignore_coursera = TRUE) {
-
   files <- list.files(
     pattern = "\\.md",
     ignore.case = TRUE,

diff --git a/R/spell_check.R b/R/spell_check.R
@@ -0,0 +1,101 @@
+#' Check spelling of all md,rmd, and qmd files
+#'
+#' @param path path to the bookdown or quarto course repository, must have a
+#'   `.github` folder which will be used to establish the top of the repo.
+#' @param output_dir A relative file path to the folder (existing or not) that the
+#'   output check file should be saved to. Default is "check_reports"
+#' @param resources_dir A relative file path to the folder (existing or not) that the
+#'   dictionary.txt file and exclude_files.txt will be found. Default is "resources".
+#'   If no dictionary.txt file and exclude_files.txt files are found, we will download one.
+#' @param file_pattern A file pattern should we be looking for for the files whose
+#'   spelling should be tested. Default is "md$". Regex interpreted.
+#' @return A file will be saved that lists the broken URLs will be saved to the specified output_dir.
+#' @export
+#'
+#' @importFrom magrittr %>%
+#' @importFrom spelling spell_check_files
+#' @importFrom rprojroot find_root has_dir
+#' @importFrom tidyr unnest separate
+#' @importFrom readr write_tsv
+#'
+#' @examples
+#'
+#' rmd_dir <- setup_ottr_template(dir = ".", type = "rmd", render = FALSE)
+#'
+#' check_spelling(rmd_dir)
+#'
+#' # If there are broken URLs they will be printed in a list at "check_reports/url_checks.tsv"
+#'
+#' qmd_dir <- setup_ottr_template(dir = ".", type = "qmd", render = FALSE)
+#'
+#' check_spelling(qmd_dir)
+#'
+check_spelling <- function(path = ".",
+                           output_dir = "check_reports",
+                           resources_dir = "resources",
+                           file_pattern = "md$") {
+  # Find .git root directory
+  root_dir <- rprojroot::find_root(path = path, rprojroot::has_dir(".github"))
+
+  resources_dir <- file.path(root_dir, resources_dir)
+  output_dir <- file.path(root_dir, output_dir)
+
+  if (!dir.exists(output_dir)) {
+    dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
+  }
+  if (!dir.exists(resources_dir)) {
+    dir.create(resources_dir, recursive = TRUE, showWarnings = FALSE)
+  }
+
+  output_file <- file.path(output_dir, "spell_check_results.tsv")
+  dictionary_file <- file.path(resources_dir, "dictionary.txt")
+  exclude_file <- file.path(resources_dir, "exclude_files.txt")
+
+  # Read in dictionary file if it exists
+  if (file.exists(dictionary_file)) {
+    dictionary <- readLines(dictionary_file)
+  } else {
+    dictionary <- ""
+  }
+
+  # Read in ignore urls file if it exists
+  if (file.exists(exclude_file)) {
+    exclude_file <- readLines(exclude_file)
+  } else {
+    exclude_file <- ""
+  }
+
+  # Only declare `.Rmd` files but not the ones in the style-sets directory
+  files <- list.files(path = path, pattern = file_pattern, recursive = TRUE, full.names = TRUE)
+
+  if (exclude_file[1] != "") files <- grep(paste0(exclude_file, collapse = "|"), files, invert = TRUE, value = TRUE)
+
+  tryCatch(
+    expr = {
+      # Run spell check
+      sp_errors <- spelling::spell_check_files(files, ignore = dictionary)
+
+      if (nrow(sp_errors) > 0) {
+        sp_errors <- sp_errors %>%
+          data.frame() %>%
+          tidyr::unnest(cols = found) %>%
+          tidyr::separate(found, into = c("file", "lines"), sep = ":")
+      } else {
+        sp_errors <- data.frame(errors = NA)
+      }
+    },
+    error = function(e) {
+      stop("Spell check did not work. Check that your dictionary is formatted correctly. You cannot have special characters (e.g., Diné) in the dictionary.txt file. You need to use HTML formatting (e.g., Din&eacute;) for these.")
+    }
+  )
+
+  # Print out how many spell check errors
+  write(nrow(sp_errors), stdout())
+
+  # Save spell errors to file temporarily
+  readr::write_tsv(sp_errors, output_file)
+
+  message(paste0("Saved to: ", output_file))
+
+  return(as.numeric(nrow(sp_errors)))
+}
diff --git a/R/token-handlers.R → R/token_handlers.R b/R/token-handlers.R → R/token_handlers.R
diff --git a/R/url-check.R → R/url_check.R b/R/url-check.R → R/url_check.R
@@ -12,7 +12,10 @@
 #' @return A file will be saved that lists the broken URLs will be saved to the specified output_dir.
 #' @export
 #'
-#' @importFrom magrittr
+#' @importFrom magrittr %>%
+#' @importFrom rprojroot find_root has_dir
+#' @importFrom tidyr unnest separate
+#' @importFrom readr write_tsv
 #'
 #' @examples
 #'
@@ -74,9 +77,9 @@ check_urls <- function(path = ".",
 
   if (nrow(all_urls_df) > 0) {
     if (!report_all) {
-    all_urls_df <- all_urls_df %>%
-      dplyr::filter(urls_status == "failed") %>%
-      readr::write_tsv(output_file)
+      all_urls_df <- all_urls_df %>%
+        dplyr::filter(urls_status == "failed") %>%
+        readr::write_tsv(output_file)
     }
   } else {
     all_urls_df <- data.frame(errors = NA)
@@ -89,6 +92,8 @@ check_urls <- function(path = ".",
   readr::write_tsv(all_urls_df, output_file)
 
   message(paste0("Saved to: ", output_file))
+
+  return(nrow(all_urls_df))
 }
 
 
@@ -100,10 +105,20 @@ check_urls <- function(path = ".",
 #' @return a logical TRUE/FALSE for whether the URL is legitimate.
 #' @export
 #'
-#' @importFrom magrittr
+#' @importFrom magrittr %>%
+#' @importFrom httr GET
+#'
+#' @examples /dontrun {
+#'
+#' # This should print out "failed"
+#' test_url("https://notawebsiteaaaaaaa.com")
+#'
+#'
+#' # This should print out "success"
+#' test_url("https://github.com")
+#' }
 #'
 test_url <- function(url, ignore_urls = "") {
-
   if (url %in% ignore_urls) {
     message(paste0("Ignoring: ", url))
     return("ignored")
@@ -133,7 +148,16 @@ test_url <- function(url, ignore_urls = "") {
 #' @return a data.frame of all the URLs identified in the given rmd/qmd/md file
 #' @export
 #'
-#' @importFrom magrittr
+#' @importFrom magrittr %>%
+#' @importFrom rvest html_nodes read_html html_attr
+#' @import stringr
+#'
+#' @examples
+#'
+#' # Add in a URL error
+#  writeLines("A URL error: https://notawebsiteaaaaaaa.com", "url_test_error.md")
+#'
+#' get_urls("url_test_error.md")
 #'
 get_urls <- function(file, ignore_urls = "") {
   message(paste("##### Testing URLs from file:", file))

diff --git a/R/utils.R b/R/utils.R
@@ -17,7 +17,6 @@ utils::globalVariables(c(
 #' @export
 #'
 course_path <- function(path = ".") {
-
   # Find .git root directory
   root_dir <- rprojroot::find_root(has_dir(".github"), path = path)