diff --git a/R/atlas_occurrences.R b/R/atlas_occurrences.R index 5b9c4dce..2925bc3b 100644 --- a/R/atlas_occurrences.R +++ b/R/atlas_occurrences.R @@ -22,7 +22,10 @@ #' @param mint_doi `logical`: by default no DOI will be generated. Set to #' `TRUE` if you intend to use the data in a publication or similar. #' @param doi `string`: (optional) DOI to download. If provided overrides -#' all other arguments. Only available for the ALA. +#' all other arguments. Only available for the ALA. +#' @param file `string`: (Optional) file name. If not given, will be set to +#' `data` with date and time added. The file path (directory) is always given by +#' `galah_config()$package$directory`. #' @details #' Note that unless care is taken, some queries can be particularly large. #' While most cases this will simply take a long time to process, if the number @@ -75,15 +78,17 @@ atlas_occurrences <- function(request = NULL, data_profile = NULL, select = NULL, mint_doi = FALSE, - doi = NULL + doi = NULL, + file = NULL ) { if(!is.null(doi)){ request_data() |> filter(doi == doi) |> - collect() + collect(file = file) }else{ args <- as.list(environment()) # capture supplied arguments check_atlas_inputs(args) |> # convert to `data_request` object - collect(wait = TRUE) + collect(wait = TRUE, + file = file) } } \ No newline at end of file diff --git a/R/check.R b/R/check.R index 3d26ecaa..c4c8e0db 100644 --- a/R/check.R +++ b/R/check.R @@ -49,16 +49,25 @@ check_directory <- function(x){ #' @noRd #' @keywords Internal check_download_filename <- function(file, ext = "zip"){ - if(!is.null(file)){ - file + if(!is.null(file)){ # is `file` present + expected_suffix <- paste0(".", ext, "$") + if(!grepl(expected_suffix, file)){ # expected suffix is missing + if(grepl("\\.[[:alpha:]]{2,4}$", file)){ # does it have a different suffix? + file <- gsub("\\.[[:alpha:]]{2,4}$", + sub("\\$$", "", expected_suffix), + file) # replace + }else{ + file <- paste0(file, ".zip") # append + } + } # no else{}, as all good here }else{ - cache_directory <- pour("package", "directory", .pkg = "galah") current_time <- Sys.time() |> format("%Y-%m-%d_%H-%M-%S") - file <- glue("{cache_directory}/data_{current_time}.{ext}") |> + file <- paste0('data_', current_time, ".", ext) + } + cache_directory <- pour("package", "directory", .pkg = "galah") + glue("{cache_directory}/{file}") |> as.character() # check_path()? # currently commented out in check.R - } - return(file) } #' Subfunction to `check_login()` diff --git a/R/collect.R b/R/collect.R index e79845a5..74beef61 100644 --- a/R/collect.R +++ b/R/collect.R @@ -10,8 +10,9 @@ #' @param ... Arguments passed on to other methods #' @param wait logical; should `galah` wait for a response? Defaults to FALSE. #' Only applies for `type = "occurrences"` or `"species"`. -#' @param file (optional) file name. If not given will be `data` with date and -#' time added. File path is always given by `galah_config()$package$directory`. +#' @param file (Optional) file name. If not given, will be set to `data` with +#' date and time added. The file path (directory) is always given by +#' `galah_config()$package$directory`. #' @return In most cases, `collect()` returns a `tibble` containing requested #' data. Where the requested data are not yet ready (i.e. for occurrences when #' `wait` is set to `FALSE`), this function returns an object of class `query` @@ -60,7 +61,7 @@ collect.query_set <- function(x, ..., wait = TRUE, file = NULL){ collect.query <- function(x, ..., wait = TRUE, - file = NULL # FIXME: is `file` used? + file = NULL ){ # sometimes no url is given, e.g. when a search returns no data if(is.null(x$url) & # most queries have a `url` @@ -73,7 +74,7 @@ collect.query <- function(x, "data/occurrences" = collect_occurrences(x, wait = wait, file = file), "data/occurrences-count" = collect_occurrences_count(x), "data/occurrences-count-groupby" = collect_occurrences_count(x), - "data/occurrences-doi" = collect_occurrences_doi(x), + "data/occurrences-doi" = collect_occurrences_doi(x, file = file), "data/species"= collect_species(x, file = file), "data/species-count" = collect_species_count(x), # "data/taxonomy" = collect_taxonomy(x), diff --git a/R/collect_occurrences.R b/R/collect_occurrences.R index cb4c3a1b..75a68ee1 100644 --- a/R/collect_occurrences.R +++ b/R/collect_occurrences.R @@ -80,7 +80,9 @@ collect_occurrences_default <- function(.query, wait, file){ #' @importFrom rlang abort #' @importFrom rlang inform #' @importFrom tibble tibble -collect_occurrences_doi <- function(.query, file = NULL, error_call = caller_env()) { +collect_occurrences_doi <- function(.query, + file = NULL, + error_call = caller_env()) { .query$file <- check_download_filename(file) query_API(.query) result <- load_zip(.query$file) diff --git a/man/atlas_occurrences.Rd b/man/atlas_occurrences.Rd index 127921b7..21b8ebe8 100644 --- a/man/atlas_occurrences.Rd +++ b/man/atlas_occurrences.Rd @@ -12,7 +12,8 @@ atlas_occurrences( data_profile = NULL, select = NULL, mint_doi = FALSE, - doi = NULL + doi = NULL, + file = NULL ) } \arguments{ @@ -39,6 +40,10 @@ atlas_occurrences( \item{doi}{\code{string}: (optional) DOI to download. If provided overrides all other arguments. Only available for the ALA.} + +\item{file}{\code{string}: (Optional) file name. If not given, will be set to +\code{data} with date and time added. The file path (directory) is always given by +\code{galah_config()$package$directory}.} } \value{ An object of class \code{tbl_df} and \code{data.frame} (aka a tibble) of diff --git a/man/collect_galah.Rd b/man/collect_galah.Rd index b48f3ab8..8c4ca368 100644 --- a/man/collect_galah.Rd +++ b/man/collect_galah.Rd @@ -29,8 +29,9 @@ \item{wait}{logical; should \code{galah} wait for a response? Defaults to FALSE. Only applies for \code{type = "occurrences"} or \code{"species"}.} -\item{file}{(optional) file name. If not given will be \code{data} with date and -time added. File path is always given by \code{galah_config()$package$directory}.} +\item{file}{(Optional) file name. If not given, will be set to \code{data} with +date and time added. The file path (directory) is always given by +\code{galah_config()$package$directory}.} } \value{ In most cases, \code{collect()} returns a \code{tibble} containing requested diff --git a/tests/testthat/test-atlas_occurrences.R b/tests/testthat/test-atlas_occurrences.R index 90382214..ecda665e 100644 --- a/tests/testthat/test-atlas_occurrences.R +++ b/tests/testthat/test-atlas_occurrences.R @@ -124,6 +124,48 @@ test_that("atlas_occurrences accepts all narrowing functions in pipe", { expect_equal(unique(occ$stateProvince), "New South Wales") }) +test_that("atlas_occurrences() and friends accept a file name", { + skip_if_offline() + # set up directory for testing purposes + directory <- "TEMP" + unlink(directory, recursive = TRUE) + dir.create(directory) + galah_config(directory = directory) + # set up query + base_query <- galah_call() |> + galah_filter(year <= 1970) |> + galah_select(group = "basic") |> + galah_identify("Crinia tinnula") + # base_query |> count() |> collect() # n = 49 on 2023-11-15 + # test `atlas_occurrences` + occ1 <- base_query |> + atlas_occurrences(file = "crinia_file") + expect_s3_class(occ1, c("tbl_df", "tbl", "data.frame")) + expect_true(any(list.files(directory) == "crinia_file.zip")) + # test `collect` + occ2 <- base_query |> collect(file = "crinia_collect") + expect_equal(occ1, occ2) + expect_true(any(list.files(directory) == "crinia_collect.zip")) + # test DOIs + doi <- "10.26197/ala.0c1e8744-a639-47f1-9a5f-5610017ba060" + occ3 <- atlas_occurrences(doi = doi, file = "test_doi") + expect_true(any(list.files(directory) == "test_doi.zip")) + # doi with collect + occ3 <- atlas_occurrences(doi = doi, file = "test_doi") + expect_true(any(list.files(directory) == "test_doi.zip")) + occ4 <- request_data() |> + filter(doi == doi) |> + collect(file = "test_doi2") + expect_equal(occ3, occ4) + expect_true(any(list.files(directory) == "test_doi2.zip")) + # clean up + unlink("TEMP", recursive = TRUE) + cache_dir <- tempfile() + dir.create(cache_dir) + galah_config(directory = cache_dir) + rm(cache_dir) +}) + test_that("atlas_occurrences() errors with an invalid DOI", { expect_error(atlas_occurrences(doi = "random_doi")) }) @@ -144,6 +186,7 @@ test_that("atlas_occurrences downloads data from a DOI", { expect_equal(result2[[1]]$type, "data/occurrences-doi") result3 <- collect(result2) expect_equal(result1, result3) + # TODO add file name tests }) test_that("`atlas_occurrences()` places DOI in `attr()` correctly", { diff --git a/tests/testthat/test-check.R b/tests/testthat/test-check.R new file mode 100644 index 00000000..d1e1ff2b --- /dev/null +++ b/tests/testthat/test-check.R @@ -0,0 +1,14 @@ +test_that("check_download_filename() works", { + # missing case (default) + x <- check_download_filename(file = NULL) + expect_true(grepl(galah_config()$package$directory, x)) # contains directory + expect_true(grepl(".zip$", x)) + # file name with no suffix + x <- check_download_filename(file = "something") + expect_true(grepl(galah_config()$package$directory, x)) # contains directory + expect_true(grepl("something.zip$", x)) + # wrong suffix + x <- check_download_filename(file = "something.csv") + expect_true(grepl(galah_config()$package$directory, x)) # contains directory + expect_true(grepl("something.zip$", x)) +}) \ No newline at end of file