Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Functionalize spelling check #161

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 0 additions & 8 deletions .github/workflows/R-CMD-check.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,6 @@ jobs:
remotes::install_cran("rcmdcheck")
shell: Rscript {0}

- name: Check
env:
GH_PAT: ${{ secrets.GH_PAT }}
run: |
options(crayon.enabled = TRUE)
rcmdcheck::rcmdcheck(args = c("--no-manual", "--as-cran"), check_dir = "check")
shell: Rscript {0}

- name: Check package
uses: r-lib/actions/check-r-package@v2
with:
Expand Down
11 changes: 11 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ export(check_quiz)
export(check_quiz_attributes)
export(check_quiz_question_attributes)
export(check_quizzes)
export(check_spelling)
export(check_urls)
export(clean_up)
export(convert_coursera_quizzes)
export(convert_quiz)
Expand All @@ -30,6 +32,7 @@ export(get_pages_url)
export(get_repo_info)
export(get_slide_id)
export(get_slide_page)
export(get_urls)
export(good_quiz_path)
export(gs_id_from_slide)
export(gs_png_download)
Expand All @@ -46,10 +49,12 @@ export(pptx_slide_text_df)
export(qrmd_files)
export(render_without_toc)
export(setup_ottr_template)
export(test_url)
export(unzip_pptx)
export(website_to_embed_leanpub)
export(xml_notes)
import(dplyr)
import(stringr)
importFrom(gitcreds,gitcreds_get)
importFrom(httr,GET)
importFrom(httr,accept_json)
Expand All @@ -64,7 +69,13 @@ importFrom(magrittr,"%>%")
importFrom(readr,write_tsv)
importFrom(rprojroot,find_root)
importFrom(rprojroot,has_dir)
importFrom(rvest,html_attr)
importFrom(rvest,html_nodes)
importFrom(rvest,read_html)
importFrom(spelling,spell_check_files)
importFrom(stringr,str_to_title)
importFrom(tidyr,separate)
importFrom(tidyr,unnest)
importFrom(utils,browseURL)
importFrom(utils,download.file)
importFrom(utils,installed.packages)
Expand Down
11 changes: 5 additions & 6 deletions R/auth.R
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
#' authorize("github")
#'
#' authorize("google")
#'
#' }
authorize <- function(app_name = NULL,
cache = FALSE,
Expand Down Expand Up @@ -242,11 +241,11 @@ find_scopes <- function(app_name) {
### Declare all the scopes
scopes <- list(
google = c(
"https://www.googleapis.com/auth/drive",
"https://www.googleapis.com/auth/drive.file",
"https://www.googleapis.com/auth/drive.readonly",
"https://www.googleapis.com/auth/presentations",
"https://www.googleapis.com/auth/presentations.readonly"
"https://www.googleapis.com/auth/drive",
"https://www.googleapis.com/auth/drive.file",
"https://www.googleapis.com/auth/drive.readonly",
"https://www.googleapis.com/auth/presentations",
"https://www.googleapis.com/auth/presentations.readonly"
),
github = c("repo")
)
Expand Down
6 changes: 2 additions & 4 deletions R/book_txt.R
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

#' Create Book.txt file from files existing in quiz directory
#'
#' @param path path to the bookdown or quarto course repository, must have a `_bookdown.yml` or `_quarto.yml` file
Expand Down Expand Up @@ -84,8 +83,7 @@ course_to_book_txt <- function(path = ".",
#' @export
#'
qrmd_files <- function(path = ".") {

yaml <- list.files(path = path, pattern ="_bookdown.yml|_quarto.yml", full.names = TRUE)
yaml <- list.files(path = path, pattern = "_bookdown.yml|_quarto.yml", full.names = TRUE)

spec <- yaml::read_yaml(yaml)

Expand All @@ -95,7 +93,7 @@ qrmd_files <- function(path = ".") {
rmd_files <- spec$rmd_files
}
if (basename(yaml) == "_quarto.yml") {
qmd_files <- grep(".qmd", unlist(spec$book$chapters), value = TRUE)
qmd_files <- grep(".qmd", unlist(spec$book$chapters), value = TRUE)
}
if (length(rmd_files) > 0 && length(qmd_files) > 0) stop("Both qmd and rmd files are found. Not sure what format to expect")

Expand Down
26 changes: 13 additions & 13 deletions R/get_data.R
Original file line number Diff line number Diff line change
Expand Up @@ -46,19 +46,19 @@ setup_ottr_template <- function(dir = ".", type, render = TRUE) {

## Render it
if (render) {
if (type == "rmd") bookdown::render_book(output_dir)
if (type == "rmd_website") rmarkdown::render_site(output_dir)

if (type == "quarto" | type == "quarto_website") {
quarto::quarto_render(output_dir, as_job = FALSE)
}
if (type == "quarto") {
quarto::quarto_render(output_dir,
metadata = list(sidebar = F, toc = F),
quarto_args = c("--output-dir", "docs/no_toc/"),
as_job = FALSE
)
}
if (type == "rmd") bookdown::render_book(output_dir)
if (type == "rmd_website") rmarkdown::render_site(output_dir)

if (type == "quarto" | type == "quarto_website") {
quarto::quarto_render(output_dir, as_job = FALSE)
}
if (type == "quarto") {
quarto::quarto_render(output_dir,
metadata = list(sidebar = F, toc = F),
quarto_args = c("--output-dir", "docs/no_toc/"),
as_job = FALSE
)
}
}
return(output_dir)
}
Expand Down
4 changes: 1 addition & 3 deletions R/github_handling.R
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,6 @@ get_pages_url <- function(repo_name,
get_repo_info <- function(repo_name,
token = NULL,
verbose = FALSE) {

# Try to get credentials other way
if (is.null(token)) {
# Get auth token
Expand Down Expand Up @@ -205,8 +204,7 @@ get_repo_info <- function(repo_name,
#'
#' authorize("github")
#' check_git_repo("jhudsl/OTTR_Template")
#'
#'}
#' }
check_git_repo <- function(repo_name,
token = NULL,
silent = TRUE,
Expand Down
1 change: 0 additions & 1 deletion R/google_slides.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,6 @@ get_slide_page <- function(url) {
#' @param output_dir path to output png
#' @param overwrite should the slide PNG be overwritten?
gs_png_download <- function(url, output_dir = ".", overwrite = TRUE) {

id <- get_slide_id(url)
slide_id <- get_slide_page(url)
url <- gs_png_url(url)
Expand Down
5 changes: 1 addition & 4 deletions R/leanpub.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#' Column names `url`, `chapt_title`, and `img_path` must be used.
#' If no chapter title column supplied, the basename of the url will be used,
#' If no image column supplied, default image used.
#' @param clean_up Should the previous docs and manuscript folder be cleaned up?
#' @param clean_up Should the previous docs and manuscript folder be cleaned up?
#' @param html_page The file path of the rendered index.html file
#' @param base_url The base url of where the chapters are published -- the url to provide to the iframe in Leanpub
#' e.g. https://jhudatascience.org/OTTR_Template/coursera
Expand Down Expand Up @@ -52,15 +52,13 @@ website_to_embed_leanpub <- function(path = ".",
remove_resources_start = FALSE,
verbose = TRUE,
footer_text = "") {

# Find the OTTR course
root_dir <- course_path(path = path)

rooted_output_dir <- file.path(root_dir, output_dir)
rooted_quiz_dir <- file.path(root_dir, quiz_dir)

if (clean_up) {

if (dir.exists(rooted_output_dir)) {
message(paste("Clearing out old version of output files:", rooted_output_dir))

Expand Down Expand Up @@ -271,7 +269,6 @@ make_embed_markdown <- function(path = ".",
get_chapters <- function(path = ".",
html_page = file.path("docs", "index.html"),
base_url = ".") {

# Put this relative to project path
html_page <- file.path(root_dir, html_page)

Expand Down
1 change: 0 additions & 1 deletion R/quiz_formatting.R
Original file line number Diff line number Diff line change
Expand Up @@ -844,7 +844,6 @@ check_quizzes <- function(path = ".",
write_report = TRUE,
verbose = TRUE,
ignore_coursera = TRUE) {

files <- list.files(
pattern = "\\.md",
ignore.case = TRUE,
Expand Down
101 changes: 101 additions & 0 deletions R/spell_check.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#' Check spelling of all md,rmd, and qmd files
#'
#' @param path path to the bookdown or quarto course repository, must have a
#' `.github` folder which will be used to establish the top of the repo.
#' @param output_dir A relative file path to the folder (existing or not) that the
#' output check file should be saved to. Default is "check_reports"
#' @param resources_dir A relative file path to the folder (existing or not) that the
#' dictionary.txt file and exclude_files.txt will be found. Default is "resources".
#' If no dictionary.txt file and exclude_files.txt files are found, we will download one.
#' @param file_pattern A file pattern should we be looking for for the files whose
#' spelling should be tested. Default is "md$". Regex interpreted.
#' @return A file will be saved that lists the broken URLs will be saved to the specified output_dir.
#' @export
#'
#' @importFrom magrittr %>%
#' @importFrom spelling spell_check_files
#' @importFrom rprojroot find_root has_dir
#' @importFrom tidyr unnest separate
#' @importFrom readr write_tsv
#'
#' @examples
#'
#' rmd_dir <- setup_ottr_template(dir = ".", type = "rmd", render = FALSE)
#'
#' check_spelling(rmd_dir)
#'
#' # If there are broken URLs they will be printed in a list at "check_reports/url_checks.tsv"
#'
#' qmd_dir <- setup_ottr_template(dir = ".", type = "qmd", render = FALSE)
#'
#' check_spelling(qmd_dir)
#'
check_spelling <- function(path = ".",
output_dir = "check_reports",
resources_dir = "resources",
file_pattern = "md$") {
# Find .git root directory
root_dir <- rprojroot::find_root(path = path, rprojroot::has_dir(".github"))

resources_dir <- file.path(root_dir, resources_dir)
output_dir <- file.path(root_dir, output_dir)

if (!dir.exists(output_dir)) {
dir.create(output_dir, recursive = TRUE, showWarnings = FALSE)
}
if (!dir.exists(resources_dir)) {
dir.create(resources_dir, recursive = TRUE, showWarnings = FALSE)
}

output_file <- file.path(output_dir, "spell_check_results.tsv")
dictionary_file <- file.path(resources_dir, "dictionary.txt")
exclude_file <- file.path(resources_dir, "exclude_files.txt")

# Read in dictionary file if it exists
if (file.exists(dictionary_file)) {
dictionary <- readLines(dictionary_file)
} else {
dictionary <- ""
}

# Read in ignore urls file if it exists
if (file.exists(exclude_file)) {
exclude_file <- readLines(exclude_file)
} else {
exclude_file <- ""
}

# Only declare `.Rmd` files but not the ones in the style-sets directory
files <- list.files(path = path, pattern = file_pattern, recursive = TRUE, full.names = TRUE)

if (exclude_file[1] != "") files <- grep(paste0(exclude_file, collapse = "|"), files, invert = TRUE, value = TRUE)

tryCatch(
expr = {
# Run spell check
sp_errors <- spelling::spell_check_files(files, ignore = dictionary)

if (nrow(sp_errors) > 0) {
sp_errors <- sp_errors %>%
data.frame() %>%
tidyr::unnest(cols = found) %>%
tidyr::separate(found, into = c("file", "lines"), sep = ":")
} else {
sp_errors <- data.frame(errors = NA)
}
},
error = function(e) {
stop("Spell check did not work. Check that your dictionary is formatted correctly. You cannot have special characters (e.g., Diné) in the dictionary.txt file. You need to use HTML formatting (e.g., Din&eacute;) for these.")
}
)

# Print out how many spell check errors
write(nrow(sp_errors), stdout())

# Save spell errors to file temporarily
readr::write_tsv(sp_errors, output_file)

message(paste0("Saved to: ", output_file))

return(as.numeric(nrow(sp_errors)))
}
File renamed without changes.
38 changes: 31 additions & 7 deletions R/url-check.R → R/url_check.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,10 @@
#' @return A file will be saved that lists the broken URLs will be saved to the specified output_dir.
#' @export
#'
#' @importFrom magrittr
#' @importFrom magrittr %>%
#' @importFrom rprojroot find_root has_dir
#' @importFrom tidyr unnest separate
#' @importFrom readr write_tsv
#'
#' @examples
#'
Expand Down Expand Up @@ -74,9 +77,9 @@ check_urls <- function(path = ".",

if (nrow(all_urls_df) > 0) {
if (!report_all) {
all_urls_df <- all_urls_df %>%
dplyr::filter(urls_status == "failed") %>%
readr::write_tsv(output_file)
all_urls_df <- all_urls_df %>%
dplyr::filter(urls_status == "failed") %>%
readr::write_tsv(output_file)
}
} else {
all_urls_df <- data.frame(errors = NA)
Expand All @@ -89,6 +92,8 @@ check_urls <- function(path = ".",
readr::write_tsv(all_urls_df, output_file)

message(paste0("Saved to: ", output_file))

return(nrow(all_urls_df))
}


Expand All @@ -100,10 +105,20 @@ check_urls <- function(path = ".",
#' @return a logical TRUE/FALSE for whether the URL is legitimate.
#' @export
#'
#' @importFrom magrittr
#' @importFrom magrittr %>%
#' @importFrom httr GET
#'
#' @examples /dontrun {
#'
#' # This should print out "failed"
#' test_url("https://notawebsiteaaaaaaa.com")
#'
#'
#' # This should print out "success"
#' test_url("https://github.com")
#' }
#'
test_url <- function(url, ignore_urls = "") {

if (url %in% ignore_urls) {
message(paste0("Ignoring: ", url))
return("ignored")
Expand Down Expand Up @@ -133,7 +148,16 @@ test_url <- function(url, ignore_urls = "") {
#' @return a data.frame of all the URLs identified in the given rmd/qmd/md file
#' @export
#'
#' @importFrom magrittr
#' @importFrom magrittr %>%
#' @importFrom rvest html_nodes read_html html_attr
#' @import stringr
#'
#' @examples
#'
#' # Add in a URL error
# writeLines("A URL error: https://notawebsiteaaaaaaa.com", "url_test_error.md")
#'
#' get_urls("url_test_error.md")
#'
get_urls <- function(file, ignore_urls = "") {
message(paste("##### Testing URLs from file:", file))
Expand Down
1 change: 0 additions & 1 deletion R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ utils::globalVariables(c(
#' @export
#'
course_path <- function(path = ".") {

# Find .git root directory
root_dir <- rprojroot::find_root(has_dir(".github"), path = path)

Expand Down
Loading
Loading