Merge pull request #156 from jhudsl/ki/refactor_screenshotDupNames

handle duplicate file names
jhudsl · Aug 2, 2024 · 6fe8e03 · 6fe8e03
2 parents 3d56b9f + f4dd484
commit 6fe8e03
Show file tree

Hide file tree

Showing 3 changed files with 17 additions and 9 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,7 +8,8 @@ Authors@R: c(
     person("John", "Muschelli", , "[email protected]", role = "aut",
            comment = c(ORCID = "0000-0001-6469-1750")),
     person("Carrie", "Wright", , "[email protected]", role = "ctb"),
-    person("Howard", "Baek", , "[email protected]", role = "ctb")
+    person("Howard", "Baek", , "[email protected]", role = "ctb"),
+    person("Kate", "Isaac", , "[email protected]", role = "ctb")
   )
 Description: Tools for converting Open-Source Tools for Training Resources
     (OTTR) courses into Leanpub or Coursera courses. 'ottrpal' is for use
@@ -24,6 +25,7 @@ Imports:
     dplyr,
     fs,
     httr,
+    janitor,
     jsonlite,
     knitr (>= 1.33),
     magrittr,

diff --git a/NAMESPACE b/NAMESPACE
@@ -56,6 +56,7 @@ importFrom(httr,content)
 importFrom(httr,oauth2.0_token)
 importFrom(httr,oauth_app)
 importFrom(httr,oauth_endpoints)
+importFrom(janitor,make_clean_names)
 importFrom(jsonlite,fromJSON)
 importFrom(magrittr,"%>%")
 importFrom(openssl,aes_cbc_decrypt)

diff --git a/R/screenshot.R b/R/screenshot.R
@@ -12,6 +12,7 @@
 #' @importFrom webshot2 webshot
 #' @importFrom magrittr %>%
 #' @importFrom rprojroot find_root has_dir
+#' @importFrom janitor make_clean_names
 #' 
 #' @author Candace Savonen
 #'
@@ -41,7 +42,7 @@ make_screenshots <- function(git_pat, repo, output_dir = "resources/chapt_screen
   chapt_df <- ottrpal::get_chapters(html_page = file.path(root_dir, "docs", "index.html"),
                                     base_url = base_url)
 
-  # Now take screenshots for each
+  # Get file names and make unique
   file_names <- lapply(chapt_df$url, function(url){
     file_name <- gsub(".html", 
                       ".png", 
@@ -53,17 +54,21 @@ make_screenshots <- function(git_pat, repo, output_dir = "resources/chapt_screen
                       "",
                       file_name
                       )
-
-    # Take the screenshot
-    webshot(url, file = file_name)
-
-    return(file_name)
-  })
+    return(gsub(".png", "", file_name)) #remove .png so clean_names is adding any numbers before file extension
+  }) %>% 
+    make_clean_names() %>% #handle repeat chapter names
+    paste0(".png") #add back .png
 
-  # Save file of chapter urls and file_names
+  #add cleaned file names as a column in the dataframe with URLs
   chapt_df <- chapt_df %>%
     dplyr::mutate(img_path = unlist(file_names))
 
+  # Now take screenshots for each, referencing the dataframe for the URL and desired filename
+  lapply(1:nrow(chapt_df), 
+         function(x) webshot(chapt_df$url[x], 
+                             file = chapt_df$img_path[x]))
+
+  # Save file of chapter urls and file_names
   chapt_df %>% 
     readr::write_tsv(file.path(output_folder, "chapter_urls.tsv"))