Merge pull request #3218 from moki1202/fix_warnings_atmosphere

notes/warnings fixes in data.atmosphere package
PecanProject · Mar 11, 2024 · e4e14ae · e4e14ae
2 parents 4564337 + f6b0857
commit e4e14ae
Show file tree

Hide file tree

Showing 41 changed files with 423 additions and 603 deletions.
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
@@ -33,7 +33,6 @@
 "data.table","*","base/visualization","Imports",FALSE
 "data.table","*","models/biocro","Imports",FALSE
 "data.table","*","models/ldndc","Imports",FALSE
-"data.table","*","modules/data.atmosphere","Imports",FALSE
 "data.table","*","modules/data.remote","Suggests",FALSE
 "dataone","*","modules/data.land","Suggests",FALSE
 "datapack","*","modules/data.land","Imports",FALSE
@@ -55,10 +54,10 @@
 "dplyr","*","models/stics","Imports",FALSE
 "dplyr","*","modules/assim.sequential","Imports",FALSE
 "dplyr","*","modules/benchmark","Imports",FALSE
-"dplyr","*","modules/data.atmosphere","Imports",FALSE
 "dplyr","*","modules/data.land","Imports",FALSE
 "dplyr","*","modules/data.remote","Suggests",FALSE
 "dplyr","*","modules/uncertainty","Imports",FALSE
+"dplyr",">= 0.8.1","modules/data.atmosphere","Imports",FALSE
 "dplyr",">= 1.1.2","base/db","Imports",FALSE
 "ellipse","*","modules/assim.batch","Imports",FALSE
 "emdbook","*","modules/assim.sequential","Suggests",FALSE
@@ -157,7 +156,6 @@
 "magrittr","*","models/ed","Imports",FALSE
 "magrittr","*","modules/assim.sequential","Imports",FALSE
 "magrittr","*","modules/benchmark","Imports",FALSE
-"magrittr","*","modules/data.atmosphere","Imports",FALSE
 "magrittr","*","modules/data.land","Imports",FALSE
 "magrittr","*","modules/data.remote","Imports",FALSE
 "markdown","*","modules/allometry","Suggests",FALSE
@@ -178,7 +176,6 @@
 "methods","*","modules/allometry","Imports",FALSE
 "methods","*","modules/assim.batch","Imports",FALSE
 "methods","*","modules/assim.sequential","Suggests",FALSE
-"methods","*","modules/data.atmosphere","Depends",FALSE
 "methods","*","modules/emulator","Imports",FALSE
 "mgcv","*","modules/data.atmosphere","Imports",FALSE
 "minpack.lm","*","modules/rtm","Suggests",FALSE
@@ -646,6 +643,7 @@
 "withr","*","models/ed","Suggests",FALSE
 "withr","*","models/sibcasa","Suggests",FALSE
 "withr","*","modules/allometry","Suggests",FALSE
+"withr","*","modules/data.atmosphere","Suggests",FALSE
 "XML","*","base/db","Imports",FALSE
 "XML","*","base/workflow","Imports",FALSE
 "XML","*","models/biocro","Imports",FALSE

diff --git a/models/biocro/R/met2model.BIOCRO.R b/models/biocro/R/met2model.BIOCRO.R
@@ -142,6 +142,11 @@ met2model.BIOCRO <- function(in.path, in.prefix, outfolder, overwrite = FALSE,
 ##' @author David LeBauer
 cf2biocro <- function(met, longitude = NULL, zulu2solarnoon = FALSE) {
 
+  if (!data.table::is.data.table(met)) {
+    met <- data.table::copy(met)
+    data.table::setDT(met)
+  }
+
   if ((!is.null(longitude)) & zulu2solarnoon) {
     solarnoon_offset <- PEcAn.utils::ud_convert(longitude/360, "day", "minute")
     met[, `:=`(solardate = met$date + lubridate::minutes(solarnoon_offset))]

diff --git a/modules/data.atmosphere/DESCRIPTION b/modules/data.atmosphere/DESCRIPTION
@@ -20,22 +20,18 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
     package converts climate driver data into a standard format for models
     integrated into PEcAn. As a standalone package, it provides an interface to
     access diverse climate data sets.
-Depends:
-    methods
 Imports:
     abind (>= 1.4.5),
     amerifluxr,
     arrow,
     curl,
-    data.table,
-    dplyr,
+    dplyr (>= 0.8.1),
     geonames (> 0.998),
     ggplot2,
     glue,
     httr,
     jsonlite,
     lubridate (>= 1.6.0),
-    magrittr,
     MASS,
     mgcv,
     ncdf4 (>= 1.15),
@@ -71,7 +67,8 @@ Suggests:
     parallel,
     PEcAn.settings,
     progress,
-    reticulate
+    reticulate,
+    withr
 Remotes:
     github::adokter/suntools,
     github::chuhousen/amerifluxr,

diff --git a/modules/data.atmosphere/NAMESPACE b/modules/data.atmosphere/NAMESPACE
@@ -110,8 +110,7 @@ export(temporal_downscale_half_hour)
 export(upscale_met)
 export(wide2long)
 export(write_noaa_gefs_netcdf)
-import(dplyr)
-import(tidyselect)
-importFrom(magrittr,"%>%")
+importFrom(dplyr,"%>%")
 importFrom(rlang,.data)
+importFrom(rlang,.env)
 importFrom(sf,st_crs)
diff --git a/modules/data.atmosphere/R/ERA5_met_process.R b/modules/data.atmosphere/R/ERA5_met_process.R
@@ -10,7 +10,7 @@
 #' @export
 #' 
 #' @author Dongchen Zhang
-#' @importFrom magrittr %>%
+#' @importFrom dplyr %>%
 #'
 ERA5_met_process <- function(settings, in.path, out.path, write.db=FALSE, write = TRUE){
   #Initialize the multicore computation.

diff --git a/modules/data.atmosphere/R/GEFS_helper_functions.R b/modules/data.atmosphere/R/GEFS_helper_functions.R
@@ -533,7 +533,6 @@ process_gridded_noaa_download <- function(lat_list,
 #' @param overwrite, logical stating to overwrite any existing output_file
 #' @param hr time step in hours of temporal downscaling (default = 1)
 #' @importFrom rlang .data 
-#' @import tidyselect
 #' 
 #' @author Quinn Thomas
 #'

diff --git a/modules/data.atmosphere/R/debias_met_regression.R b/modules/data.atmosphere/R/debias_met_regression.R
diff --git a/modules/data.atmosphere/R/download.Fluxnet2015.R b/modules/data.atmosphere/R/download.Fluxnet2015.R
@@ -4,7 +4,7 @@
 ##' @title download.Fluxnet2015
 ##' @export
 ##' @param sitename the FLUXNET ID of the site to be downloaded, used as file name prefix. 
-##' The 'SITE_ID' field in \href{http://fluxnet.fluxdata.org//sites/site-list-and-pages/}{list of Ameriflux sites}
+##' The 'SITE_ID' field in \href{https://fluxnet.org/sites/site-list-and-pages/}{list of Ameriflux sites}
 ##' @param outfolder location on disk where outputs will be stored
 ##' @param start_date the start date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year part of the date)
 ##' @param end_date the end date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year part of the date)

diff --git a/modules/data.atmosphere/R/download.NARR.R b/modules/data.atmosphere/R/download.NARR.R
@@ -8,7 +8,7 @@
 ##' @param end_date desired end date YYYY-MM-DD
 ##' @param ... other inputs
 ##' example options(download.ftp.method="ncftpget")
-##' @importFrom magrittr %>%
+##' @importFrom dplyr %>%
 ##' 
 ##' @examples
 ##' \dontrun{

diff --git a/modules/data.atmosphere/R/download.NARR_site.R b/modules/data.atmosphere/R/download.NARR_site.R
@@ -233,7 +233,7 @@ get_NARR_thredds <- function(start_date, end_date, lat.in, lon.in,
     get_dfs$data <- foreach::`%dopar%`(
       foreach::foreach(
         url = get_dfs$url, flx = get_dfs$flx,
-        .packages = c("PEcAn.data.atmosphere", "magrittr"),
+        .packages = c("PEcAn.data.atmosphere", "dplyr"),
         .export = c("get_narr_url", "robustly")
       ),
         PEcAn.utils::robustly(get_narr_url)(url, xy = xy, flx = flx)

diff --git a/modules/data.atmosphere/R/download.NEONmet.R b/modules/data.atmosphere/R/download.NEONmet.R
@@ -6,7 +6,7 @@
 ##' 
 ##' @export
 ##' @param sitename the NEON ID of the site to be downloaded, used as file name prefix. 
-##' The 4-letter SITE code  in \href{http://www.neonscience.org/science-design/field-sites/list}{list of NEON sites}
+##' The 4-letter SITE code  in \href{https://www.neonscience.org/science-design/field-sites/list}{list of NEON sites}
 ##' @param outfolder location on disk where outputs will be stored
 ##' @param start_date the start date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year and month of the date)
 ##' @param end_date the end date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year and month part of the date)

diff --git a/modules/data.atmosphere/R/download.US_Wlef.R b/modules/data.atmosphere/R/download.US_Wlef.R
@@ -35,7 +35,7 @@ download.US_Wlef <- function(start_date, end_date, timestep = 1) {
     url <- paste0(base_url, year,"/flux_", year, ".txt") #Build proper url
     PEcAn.logger::logger.info(paste0("Reading data for year ", year))
     print(url)
-    influx <- tryCatch(read.table(url, header = T, sep = ""), error=function(e) {NULL}, warning=function(e) {NULL})
+    influx <- tryCatch(utils::read.table(url, header = T, sep = ""), error=function(e) {NULL}, warning=function(e) {NULL})
     if (is.null(influx)) { #Error encountered in data fetching.
       PEcAn.logger::logger.warn(paste0("Data not avaliable for year ", year, ". All values for ", year, " will be NA."))
       # Determine the number of days in the year

diff --git a/modules/data.atmosphere/R/download_noaa_gefs_efi.R b/modules/data.atmosphere/R/download_noaa_gefs_efi.R
@@ -40,10 +40,10 @@ download_NOAA_GEFS_EFI <- function(sitename, outfolder, start_date, site.lat, si
 
     noaa_data[v] <- NULL
     #filter for met variable
-    curr_var <- filter(weather, .data$variable == cf_var_names[v])
+    curr_var <- dplyr::filter(weather, .data$variable == cf_var_names[v])
     #remove ensemble member 31 does not cover full timeseries
     #this is a HACK should add a generalized method for ensemble member outlier detection 
-    curr_var <- filter(curr_var, .data$parameter <= 30)
+    curr_var <- dplyr::filter(curr_var, .data$parameter <= 30)
     noaa_data[[v]] <- list(value = curr_var$prediction,
                            ensembles = curr_var$parameter,
                            forecast.date = curr_var$datetime)

diff --git a/modules/data.atmosphere/R/downscaling_helper_functions.R b/modules/data.atmosphere/R/downscaling_helper_functions.R
@@ -112,7 +112,7 @@ downscale_repeat_6hr_to_hrly <- function(df, varName, hr = 1){
   t0 <- min(df$time)
 
   df <- df %>%
-    dplyr::select("time", all_of(varName)) %>%
+    dplyr::select("time", tidyselect::all_of(varName)) %>%
     #Calculate time difference
     dplyr::mutate(days_since_t0 = difftime(.data$time, t0, units = "days")) %>%
     #Shift valued back because the 6hr value represents the average over the

diff --git a/modules/data.atmosphere/R/extract.nc.module.R b/modules/data.atmosphere/R/extract.nc.module.R
@@ -1,5 +1,4 @@
 ##' @export
-##' @import dplyr
 .extract.nc.module <- function(cf.id, register, dir, met, str_ns, site, new.site, con, 
                                start_date, end_date, host, overwrite = FALSE) {
   PEcAn.logger::logger.info("Site Extraction")

diff --git a/modules/data.atmosphere/R/extract_ERA5.R b/modules/data.atmosphere/R/extract_ERA5.R
@@ -122,13 +122,13 @@ extract.nc.ERA5 <-
               # send out as xts object
               xts::xts(all.data.point, order.by = timestamp)
             }) %>%
-            setNames(paste0("ERA_ensemble_", ensemblesN))
+            stats::setNames(paste0("ERA_ensemble_", ensemblesN))
 
           #Merge mean and the speard
           return(point.data)
 
         }) %>%
-        setNames(years)
+        stats::setNames(years)
 
 
       # The order of one.year.out is year and then Ens - Mainly because of the spead  / I wanted to touch each file just once.

diff --git a/modules/data.atmosphere/R/half_hour_downscale.R b/modules/data.atmosphere/R/half_hour_downscale.R
@@ -229,7 +229,7 @@ downscale_ShortWave_to_half_hrly <- function(df,lat, lon, hr = 0.5){
   }
 
   #ShortWave.ds <- dplyr::select(data.hrly, time, surface_downwelling_shortwave_flux_in_air)
-  ShortWave.ds <- data.hrly %>% select("time", "surface_downwelling_shortwave_flux_in_air")
+  ShortWave.ds <- data.hrly %>% dplyr::select("time", "surface_downwelling_shortwave_flux_in_air")
   # data.hrly$group_6hr <- NA
   # 
   # group <- 0
@@ -277,7 +277,7 @@ downscale_repeat_6hr_to_half_hrly <- function(df, varName, hr = 0.5){
   t0 <- min(df$time)
 
   df <- df %>%
-    dplyr::select("time", all_of(varName)) %>%
+    dplyr::select("time", tidyselect::all_of(varName)) %>%
     #Calculate time difference
     dplyr::mutate(days_since_t0 = difftime(.data$time, t0, units = "days")) %>%
     #Shift valued back because the 6hr value represents the average over the

diff --git a/modules/data.atmosphere/R/met_functions.R → modules/data.atmosphere/R/lightME.R b/modules/data.atmosphere/R/met_functions.R → modules/data.atmosphere/R/lightME.R
@@ -1,34 +1,4 @@
 
-get.weather <- function(lat, lon, met.nc = met.nc, start.date, end.date, output.dt = 1) {
-  # if(!is.land(lat, lon)) stop('point is in ocean')
-  result <- load.cfmet(lat = lat, lon = lon, met.nc = met.nc, start.date, end.date)
-  downscaled.result <- cfmet.downscale.time(cfmet = result, output.dt = output.dt, lat = lat)
-  weather <- cruncep_dt2weather(downscaled.result)
-} # get.weather
-
-
-is.land <- function(lat, lon) {
-  Lat <- ncdf4::ncvar_get(nc = met.nc, varid = "lat")
-  Lon <- ncdf4::ncvar_get(nc = met.nc, varid = "lon")
-  lati <- which.min(abs(Lat - lat))
-  loni <- which.min(abs(Lon - lon))
-  mask <- ncdf4::ncvar_get(nc = met.nc, varid = "mask", start = c(loni, lati), count = c(1, 1))
-  return(mask >= 0)
-} # is.land
-
-get.latlonbox <- function(lati, loni, Lat = Lat, Lon = Lon) {
-  lat <- c(mean(Lat[lati:(lati - 1)]), mean(Lat[lati:(lati + 1)]))
-  lon <- c(mean(Lon[loni:(loni - 1)]), mean(Lon[loni:(loni + 1)]))
-  return(c(sort(lat), sort(lon)))
-} # get.latlonbox
-
-get.cruncep <- function(lat, lon, start.date, end.date) {
-  result <- load.cfmet(lat, lon)
-  hourly.result <- cruncep_hourly(result, lat = Lat[lati])
-  weather <- cruncep_dt2weather(hourly.result)
-  return(weather)
-} # get.cruncep
-
 ##' Simulates the light macro environment
 ##'
 ##' Simulates light macro environment based on latitude, day of the year.

diff --git a/modules/data.atmosphere/R/load.cfmet.R b/modules/data.atmosphere/R/load.cfmet.R
@@ -1,16 +1,16 @@
-## ensures data.table objects treated as such http://stackoverflow.com/q/24501245/513006
-.datatable.aware <- TRUE
 
 ##' Load met data from PEcAn formatted met driver
 ##'
-##' subsets a PEcAn formatted met driver file and converts to a data.table / data.frame object
-##' @title load CF met
+##' subsets a PEcAn formatted met driver file and converts to a data.frame object
+##'
 ##' @param met.nc object of class ncdf4 representing an open CF compliant, PEcAn standard netcdf file with met data
-##' @param lat numeric value of latutude
+##' @param lat numeric value of latitude
 ##' @param lon numeric value of longitude
 ##' @param start.date format is 'YYYY-MM-DD'
 ##' @param end.date format is 'YYYY-MM-DD'
-##' @return data.table of met data
+##' @return data frame of met data
+##' @importFrom rlang .data
+##' @importFrom dplyr %>%
 ##' @export
 ##' @author David LeBauer
 load.cfmet <- function(met.nc, lat, lon, start.date, end.date) {
@@ -43,15 +43,13 @@ load.cfmet <- function(met.nc, lat, lon, start.date, end.date) {
   base.units      <- strsplit(basetime.string, " since ")[[1]][1]
 
   ## convert to days
-  if (!base.units == "days") {
+  if (base.units != "days") {
     time.idx <- PEcAn.utils::ud_convert(time.idx, basetime.string, paste("days since ", base.date))
   }
   time.idx <- PEcAn.utils::ud_convert(time.idx, "days", "seconds")
-  date <- as.POSIXct.numeric(time.idx, origin = base.date, tz = "UTC")
+  date <- as.POSIXct.numeric(round(time.idx), origin = base.date, tz = "UTC")
 
-  ## data table warns not to use POSIXlt, which is induced by round() 
-  ## but POSIXlt moves times off by a second
-  suppressWarnings(all.dates <- data.table::data.table(index = seq(time.idx), date = round(date)))
+  all.dates <- data.frame(index = seq_along(time.idx), date = date)
 
   if (start.date + lubridate::days(1) < min(all.dates$date)) {
    PEcAn.logger::logger.severe("run start date", start.date, "before met data starts", min(all.dates$date))
@@ -60,15 +58,15 @@ load.cfmet <- function(met.nc, lat, lon, start.date, end.date) {
    PEcAn.logger::logger.severe("run end date", end.date, "after met data ends", max(all.dates$date))
   }
 
-  run.dates <- all.dates[date >= start.date & date <= end.date,
-                         list(index, 
-                              date = date, 
-                              doy = lubridate::yday(date),
-                              year = lubridate::year(date),
-                              month = lubridate::month(date),
-                              day  = lubridate::day(date), 
-                              hour = lubridate::hour(date) + lubridate::minute(date) / 60)]
-  
+  run.dates <- all.dates %>%
+    dplyr::filter(.data$date >= start.date & .data$date <= end.date) %>%
+    dplyr::mutate(
+      doy = lubridate::yday(.data$date),
+      year = lubridate::year(.data$date),
+      month = lubridate::month(.data$date),
+      day  = lubridate::day(.data$date),
+      hour = lubridate::hour(.data$date) + lubridate::minute(.data$date) / 60)
+
   results <- list()
 
 
@@ -83,5 +81,5 @@ load.cfmet <- function(met.nc, lat, lon, start.date, end.date) {
 
   names(vars) <- gsub("surface_pressure", "air_pressure", variables)
 
-  return(cbind(run.dates, data.table::as.data.table(vars[!sapply(vars, is.null)])))
+  return(cbind(run.dates, vars[!sapply(vars, is.null)]))
 } # load.cfmet
diff --git a/modules/data.atmosphere/R/met.process.R b/modules/data.atmosphere/R/met.process.R
@@ -19,6 +19,7 @@
 ##'        *except* raw met downloads. I.e., it corresponds to:
 ##'
 ##'        list(download = FALSE, met2cf = TRUE, standardize = TRUE,  met2model = TRUE)
+##' @importFrom rlang .data .env
 ##' @export
 ##' @author Elizabeth Cowdery, Michael Dietze, Ankur Desai, James Simkins, Ryan Kelly
 met.process <- function(site, input_met, start_date, end_date, model,
@@ -227,25 +228,25 @@ met.process <- function(site, input_met, start_date, end_date, model,
       cf.id <- raw.id <- db.file
     }else{ 
       # I did this bc dbfile.input.check does not cover the between two time periods situation
-      mimetypeid <- get.id(table = "mimetypes", colnames = "type_string", 
+      mimetypeid <- PEcAn.DB::get.id(table = "mimetypes", colnames = "type_string", 
                            values = "application/x-netcdf", con = con)
 
-      formatid <- get.id(table = "formats", colnames = c("mimetype_id", "name"),
+      formatid <- PEcAn.DB::get.id(table = "formats", colnames = c("mimetype_id", "name"),
                          values = c(mimetypeid, "CF Meteorology"), con = con)
 
-      machine.id <- get.id(table = "machines", "hostname", PEcAn.remote::fqdn(), con)
-      # Fidning the tiles.
-      raw.tiles <- tbl(con, "inputs") %>%
-        filter(
-          site_id == register$ParentSite,
-          start_date >= start_date,
-          end_date <= end_date,
-          format_id == formatid
+      machine.id <- PEcAn.DB::get.id(table = "machines", "hostname", PEcAn.remote::fqdn(), con)
+      # Finding the tiles.
+      raw.tiles <- dplyr::tbl(con, "inputs") %>%
+        dplyr::filter(
+          .data$site_id == register$ParentSite,
+          .data$start_date <= .env$start_date,
+          .data$end_date >= .env$end_date,
+          .data$format_id == formatid
         ) %>%
-        filter(grepl(met, "name")) %>%
-        inner_join(tbl(con, "dbfiles"), by = c('id' = 'container_id')) %>%
-        filter(machine_id == machine.id) %>%
-        collect()
+        dplyr::filter(grepl(met, "name")) %>%
+        dplyr::inner_join(dplyr::tbl(con, "dbfiles"), by = c('id' = 'container_id')) %>%
+        dplyr::filter(.data$machine_id == machine.id) %>%
+        dplyr::collect()
 
       cf.id <- raw.id <- list(input.id = raw.tiles$id.x, dbfile.id = raw.tiles$id.y)
     }