diff --git a/NAMESPACE b/NAMESPACE index da0acc3..5d41f11 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,7 @@ export(get_ed_dates) export(get_ed_dates_all) export(get_ed_grds) export(get_ed_info) +export(get_ed_vals_all) export(get_url_ply) export(grds_to_ts) export(grds_to_ts_cat) diff --git a/R/read.R b/R/read.R index cc196fc..df1ab27 100644 --- a/R/read.R +++ b/R/read.R @@ -540,3 +540,37 @@ get_ed_dates_all <- function(ed_info, date_beg, date_end){ as.Date() } +#' Get list of all dates available from Seascape dataset +#' +#' Given a SeaScape dataset info object and date range, return a vector of all available dates. +#' +#' @param ed_info ERDDAP info object on SeaScape dataset, as returned by \code{\link{get_ed_info}}) +#' @param var variable to extract +#' +#' @return vector of values for given variable +#' +#' @importFrom glue glue +#' @importFrom readr read_csv +#' @importFrom magrittr %>% +#' @importFrom dplyr pull +#' @export +#' @concept read +#' +#' @examples +#' ed_i <- get_ed_info("https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_d749_a206_cd3a.html") +#' get_ed_vals_all(ed_i, "LEV") +get_ed_vals_all <- function(ed_info, var){ + # ed_info = get_ed_info("https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_d749_a206_cd3a.html") + # var = "LEV" + + ed_dataset = attr(ed_info, "datasetid") + + t_csv <- glue("{ed_info$base_url}/griddap/{ed_dataset}.csvp?{var}") + d_t <- try(read_csv(t_csv, show_col_types = F)) + if ("try-error" %in% class(d_t)) + stop(glue("Problem fetching dates from ERDDAP with: {t_csv}")) + + d_t |> + pull() +} + diff --git a/inst/test_throttling.qmd b/inst/test_throttling.qmd index 413c159..3338bbf 100644 --- a/inst/test_throttling.qmd +++ b/inst/test_throttling.qmd @@ -13,7 +13,10 @@ librarian::shelf( devtools::load_all() # load extractr functions locally ply <- onmsR::sanctuaries |> - filter(nms == "MBNMS") + filter(nms == "MBNMS") |> + select(spatial) |> + unnest(spatial) |> + st_as_sf() mapView(ply) ``` @@ -24,12 +27,20 @@ mapView(ply) * [ERDDAP - ECCO ECCO2 cube92 salt - Data Access Form](https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_d749_a206_cd3a.html) ```{r} -ed_url <- "https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_d749_a206_cd3a.html" -ed <- extractr::get_ed_info(ed_url) +var <- "salt" +var_z <- "LEV" +n_max <- 10000 + +ed_url <- "https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_d749_a206_cd3a.html" +ed <- extractr::get_ed_info(ed_url) (ed_date_range <- extractr::get_ed_dates(ed)) # "1992-01-02" "2023-04-28" -ed_dates <- extractr::get_ed_dates_all( +ed_dates <- extractr::get_ed_dates_all( ed, min(ed_date_range), max(ed_date_range)) + +if (!is.null(var_z)) + z <- extractr::get_ed_vals_all(ed, "LEV") + # get geospatial attributes a <- ed$alldata$NC_GLOBAL |> filter( @@ -52,30 +63,152 @@ r_na <- rast( crs = "epsg:4326") # a) either rotate raster to -180, 180 -if (ext(r_na)[2] > 180) - r_na <- rotate(r_na, 180) +if (ext(r_na)[2] > 180){ +# r_na <- terra::rotate(r_na, 180) # b) or shift vector to 0, 360 -# st_shift_longitude(ply) + ply <- st_shift_longitude(ply) # xmin: -123.1401 ymin: 35.5 xmax: -121.1036 ymax: 37.88163 +} r_idx <- r_na -values(r_idx) <- 1:ncell(r_idx) - +values(r_idx) <- 1:ncell(r_idx) # ncell: 1,036,800 # mapView(r_idx) stopifnot(st_crs(ply) == st_crs(4326)) # a) for only pixels with centroids inside polygon -idx_r_ply <- extract(r_idx, ply, ID=F) - -# TODO: explore args cells, xy -idx_r_ply <- extract(r_idx, ply, ID=F, exact=T)[,1] -r_ply <- r_na -r_ply[idx_r_ply] <- idx_r_ply +# get all pixels that touch polygon, esp places like Monitor +# idx_r_ply_0 <- terra::extract(r_idx, ply, ID=F) # n=28 +# idx_r_ply <- terra::extract(r_idx, ply, ID=F, weights=T) # n=44 +d_r_ply <- terra::extract(r_idx, ply, ID=F, exact=T) # n=47 + +# apply area-weighted avg using range(idx_r_ply$fraction) +r_ply <- r_na +r_ply[d_r_ply[,1]] <- d_r_ply[,1] r_ply <- trim(r_ply) +# mapView(ply) + +# mapView(r_ply) +# TODO: mask anything touching land, then apply area-weighted avg, since only small portion might be oceanic + # a) for full width of grid cells -ext(r_ply) +b <- ext(r_ply) |> as.vector() # dimensions: 10, 9, 1 (nrow, ncol, nlyr) +# names(b) # "xmin" "xmax" "ymin" "ymax" # b) for centroids of pixels -apply(xyFromCell(r_idx, idx_r_ply[,1]), 2, range) +# apply(xyFromCell(r_idx, d_r_ply[,1]), 2, range) + +# paginate through time given threshold of fetch +n_cells <- ncell(r_ply) # 90 +n_z <- length(z) # 50 + +times <- extractr::get_ed_vals_all(ed, "time") +n_t <- length(times) # 3,814 + +# x * y * z * t +# n_cells * n_z * n_t # 17,163,000 +n_xyz <- n_cells * n_z # 4,500 + +n_max <- 1000000 +stopifnot(n_max >= n_xyz) +n_t_per_req <- n_max %/% n_xyz # 1,716 + +d_csv <- here("data_tmp/test_throttle.csv") # TODO: arg +req_csv <- here("data_tmp/test_throttle_1req.csv") # TODO: as tempfile + +i_t <- 1 # DEBUG: i_t <- 41 +while (i_t <= n_t) { + + # get time slice end + i_t_end <- min(c(i_t + n_t_per_req - 1, n_t)) + # dates <- ed_dates[c(i_t, i_t_end)] |> as.character() + t_req <- times[c(i_t, i_t_end)] |> + format_ISO8601(usetz="Z") + # TODO: slice if not starting at i_t=1 + + # TODO: skip slices already fetched + if (file.exists(d_csv)){ + d <- read_csv(d_csv, show_col_types=F) + # class(d$time) + + if (all(as_datetime(t_req) %in% as_datetime(d$time))){ + message(glue("Skipping {i_t}:{i_t_end} ({paste(times[c(i_t, i_t_end)], collapse = ':')}) of {n_t}, since already in csv ~ {format(Sys.time(), '%H:%M:%S %Z')}")) + # iterate to next time slice + i_t <- i_t_end + 1 + next + } + } else { + d <- tibble() + } + message(glue("Fetching time slice {i_t}:{i_t_end} ({paste(times[c(i_t, i_t_end)], collapse = ':')}) of {n_t} ~ {format(Sys.time(), '%H:%M:%S %Z')}")) + + # n_max <- 1000000 # 1,000,000 + # Fetching time slice 1:222 (1992-01-02:1993-10-26) of 3814 ~ 14:59:38 PST + # Fetching time slice 223:444 (1993-10-29:1995-08-23) of 3814 ~ 15:21:08 PST + # Fetching time slice 445:666 (1995-08-26:1997-06-19) of 3814 ~ 15:44:19 PST # Fetching time slice 667:888 (1997-06-22:1999-04-16) of 3814 ~ 16:03:40 PST # Fetching time slice 889:1110 (1999-04-19:2001-02-10) of 3814 ~ 16:26:00 PST # Fetching time slice 1111:1332 (2001-02-13:2002-12-08) of 3814 ~ 16:47:05 PST # Fetching time slice 1333:1554 (2002-12-11:2004-10-04) of 3814 ~ 17:15:06 PST # Fetching time slice 1555:1776 (2004-10-07:2006-08-01) of 3814 ~ 17:38:31 PST + + # TODO: get time (not date) slices; + # dates <- ed_dates[c(i_t, i_t_end)]; |> as.POSIXct() + # [1] "time bounds are out of range" + # [1] "You gave: " + # [1] "1992-01-02" "1992-01-05" + # [1] "Dataset times are: " + # [1] "1992-01-02T00:00:00Z" "2023-04-28T00:00:00Z" + + nc_retry <- T + nc_n_try <- 0 + while (nc_retry){ + # message(" griddap()") + nc <- try(rerddap::griddap( + datasetx = attr(ed_info, "datasetid"), + fields = var, + url = ed_info$base_url, + LEV = c(min(z), max(z)), + longitude = c(b["xmin"], b["xmax"]), + latitude = c(b["ymin"], b["ymax"]), + time = t_req, + fmt = "nc")) + + if (inherits(nc, "try-error")){ + message(" griddap() error, retrying after deleting cache...") + rerddap::cache_delete_all() + } else { + nc_retry <- F + } + + nc_n_try <- nc_n_try + 1 + if (nc_n_try > 2) + stop("Failed to fetch nc file") + } + # Error in R_nc4_open: NetCDF: Unknown file format + # names(nc) # "summary" "data" + + # write to csv + nc$data |> + mutate(across(where(is.array), as.vector)) |> + write_csv(req_csv) + d |> + bind_rows( + read_csv(req_csv, show_col_types=F)) |> + write_csv(d_csv) + + # terra::writeCDF(nc, "test.nc") + # gdalcubes::write_ncdf(nc, "tmp.nc") + + # iterate to next time slice + i_t <- i_t_end + 1 +} + +i <- 1 +z_i <- z[i] +t_i <- times[i] |> format_ISO8601(usetz="Z") +r <- nc$data |> + filter( + LEV == z_i, + time == t_i) |> + tibble() |> + select(longitude, latitude , salt) |> + rast(type="xyz", crs="EPSG:4326") +# mapView(r) + +# TODO: result as averaged across z or ... return stack of multiple z's? mapView(ply) + mapView(r_ply) diff --git a/man/get_ed_vals_all.Rd b/man/get_ed_vals_all.Rd new file mode 100644 index 0000000..91dd274 --- /dev/null +++ b/man/get_ed_vals_all.Rd @@ -0,0 +1,24 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/read.R +\name{get_ed_vals_all} +\alias{get_ed_vals_all} +\title{Get list of all dates available from Seascape dataset} +\usage{ +get_ed_vals_all(ed_info, var) +} +\arguments{ +\item{ed_info}{ERDDAP info object on SeaScape dataset, as returned by \code{\link{get_ed_info}})} + +\item{var}{variable to extract} +} +\value{ +vector of values for given variable +} +\description{ +Given a SeaScape dataset info object and date range, return a vector of all available dates. +} +\examples{ +ed_i <- get_ed_info("https://apdrc.soest.hawaii.edu/erddap/griddap/hawaii_soest_d749_a206_cd3a.html") +get_ed_vals_all(ed_i, "LEV") +} +\concept{read} diff --git a/man/plot_ts.Rd b/man/plot_ts.Rd index c1485a8..d78f75f 100644 --- a/man/plot_ts.Rd +++ b/man/plot_ts.Rd @@ -5,7 +5,7 @@ \title{Plot time series} \usage{ plot_ts( - ts_csv, + ts, fld_avg = "mean", fld_sd = NULL, fld_date = "date", @@ -15,7 +15,7 @@ plot_ts( ) } \arguments{ -\item{ts_csv}{file path to timeseries table as comma-separated value (CSV) (required)} +\item{ts}{file path to timeseries table as comma-separated value (CSV) or data frame (required)} \item{fld_avg}{field name containing value average; default = \code{"mean"}} @@ -37,8 +37,8 @@ The purpose of this function is to generate time series plots of ERRDAP data. } \examples{ \dontrun{ -ts_csv <- here::here("data_tmp/ts.csv") -plot_ts(ts_csv, main = "SST") +ts <- here::here("data_tmp/ts.csv") +plot_ts(ts, main = "SST") } }