From 484c2baa0c9f428ff5e86e4f27e2bc8eb7454381 Mon Sep 17 00:00:00 2001 From: Kate Isaac <41767733+kweav@users.noreply.github.com> Date: Thu, 11 Jul 2024 11:02:05 -0400 Subject: [PATCH 1/2] handle duplicate file names --- R/screenshot.R | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/R/screenshot.R b/R/screenshot.R index 50e3084..2cc02c3 100644 --- a/R/screenshot.R +++ b/R/screenshot.R @@ -12,6 +12,7 @@ #' @importFrom webshot2 webshot #' @importFrom magrittr %>% #' @importFrom rprojroot find_root has_dir +#' @importFrom janitor make_clean_names #' #' @author Candace Savonen #' @@ -41,7 +42,7 @@ make_screenshots <- function(git_pat, repo, output_dir = "resources/chapt_screen chapt_df <- ottrpal::get_chapters(html_page = file.path(root_dir, "docs", "index.html"), base_url = base_url) - # Now take screenshots for each + # Get file names and make unique file_names <- lapply(chapt_df$url, function(url){ file_name <- gsub(".html", ".png", @@ -53,17 +54,21 @@ make_screenshots <- function(git_pat, repo, output_dir = "resources/chapt_screen "", file_name ) - - # Take the screenshot - webshot(url, file = file_name) - - return(file_name) - }) + return(gsub(".png", "", file_name)) #remove .png so clean_names is adding any numbers before file extension + }) %>% + make_clean_names() %>% #handle repeat chapter names + paste0(".png") #add back .png - # Save file of chapter urls and file_names + #add cleaned file names as a column in the dataframe with URLs chapt_df <- chapt_df %>% dplyr::mutate(img_path = unlist(file_names)) + # Now take screenshots for each, referencing the dataframe for the URL and desired filename + lapply(1:nrow(chapt_df), + function(x) webshot(chapt_df$url[x], + file = chapt_df$img_path[x])) + + # Save file of chapter urls and file_names chapt_df %>% readr::write_tsv(file.path(output_folder, "chapter_urls.tsv")) From f4dd48461f96651e4d27bfb514e0ae4e3d803468 Mon Sep 17 00:00:00 2001 From: Kate Isaac <41767733+kweav@users.noreply.github.com> Date: Thu, 11 Jul 2024 11:07:23 -0400 Subject: [PATCH 2/2] janitor in description and namespace --- DESCRIPTION | 4 +++- NAMESPACE | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 0f4b8a9..9a7e645 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -8,7 +8,8 @@ Authors@R: c( person("John", "Muschelli", , "muschellij2@gmail.com", role = "aut", comment = c(ORCID = "0000-0001-6469-1750")), person("Carrie", "Wright", , "cwrigh60@jhu.edu", role = "ctb"), - person("Howard", "Baek", , "notifications@github.com", role = "ctb") + person("Howard", "Baek", , "notifications@github.com", role = "ctb"), + person("Kate", "Isaac", , "kisaac@fredhutch.org", role = "ctb") ) Description: Tools for converting Open-Source Tools for Training Resources (OTTR) courses into Leanpub or Coursera courses. 'ottrpal' is for use @@ -24,6 +25,7 @@ Imports: dplyr, fs, httr, + janitor, jsonlite, knitr (>= 1.33), magrittr, diff --git a/NAMESPACE b/NAMESPACE index aa7ad95..b7fda99 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -56,6 +56,7 @@ importFrom(httr,content) importFrom(httr,oauth2.0_token) importFrom(httr,oauth_app) importFrom(httr,oauth_endpoints) +importFrom(janitor,make_clean_names) importFrom(jsonlite,fromJSON) importFrom(magrittr,"%>%") importFrom(openssl,aes_cbc_decrypt)