Skip to content

Commit

Permalink
Merge pull request #3218 from moki1202/fix_warnings_atmosphere
Browse files Browse the repository at this point in the history
notes/warnings fixes in data.atmosphere package
  • Loading branch information
mdietze authored Mar 11, 2024
2 parents 4564337 + f6b0857 commit e4e14ae
Show file tree
Hide file tree
Showing 41 changed files with 423 additions and 603 deletions.
6 changes: 2 additions & 4 deletions docker/depends/pecan_package_dependencies.csv
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
"data.table","*","base/visualization","Imports",FALSE
"data.table","*","models/biocro","Imports",FALSE
"data.table","*","models/ldndc","Imports",FALSE
"data.table","*","modules/data.atmosphere","Imports",FALSE
"data.table","*","modules/data.remote","Suggests",FALSE
"dataone","*","modules/data.land","Suggests",FALSE
"datapack","*","modules/data.land","Imports",FALSE
Expand All @@ -55,10 +54,10 @@
"dplyr","*","models/stics","Imports",FALSE
"dplyr","*","modules/assim.sequential","Imports",FALSE
"dplyr","*","modules/benchmark","Imports",FALSE
"dplyr","*","modules/data.atmosphere","Imports",FALSE
"dplyr","*","modules/data.land","Imports",FALSE
"dplyr","*","modules/data.remote","Suggests",FALSE
"dplyr","*","modules/uncertainty","Imports",FALSE
"dplyr",">= 0.8.1","modules/data.atmosphere","Imports",FALSE
"dplyr",">= 1.1.2","base/db","Imports",FALSE
"ellipse","*","modules/assim.batch","Imports",FALSE
"emdbook","*","modules/assim.sequential","Suggests",FALSE
Expand Down Expand Up @@ -157,7 +156,6 @@
"magrittr","*","models/ed","Imports",FALSE
"magrittr","*","modules/assim.sequential","Imports",FALSE
"magrittr","*","modules/benchmark","Imports",FALSE
"magrittr","*","modules/data.atmosphere","Imports",FALSE
"magrittr","*","modules/data.land","Imports",FALSE
"magrittr","*","modules/data.remote","Imports",FALSE
"markdown","*","modules/allometry","Suggests",FALSE
Expand All @@ -178,7 +176,6 @@
"methods","*","modules/allometry","Imports",FALSE
"methods","*","modules/assim.batch","Imports",FALSE
"methods","*","modules/assim.sequential","Suggests",FALSE
"methods","*","modules/data.atmosphere","Depends",FALSE
"methods","*","modules/emulator","Imports",FALSE
"mgcv","*","modules/data.atmosphere","Imports",FALSE
"minpack.lm","*","modules/rtm","Suggests",FALSE
Expand Down Expand Up @@ -646,6 +643,7 @@
"withr","*","models/ed","Suggests",FALSE
"withr","*","models/sibcasa","Suggests",FALSE
"withr","*","modules/allometry","Suggests",FALSE
"withr","*","modules/data.atmosphere","Suggests",FALSE
"XML","*","base/db","Imports",FALSE
"XML","*","base/workflow","Imports",FALSE
"XML","*","models/biocro","Imports",FALSE
Expand Down
5 changes: 5 additions & 0 deletions models/biocro/R/met2model.BIOCRO.R
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,11 @@ met2model.BIOCRO <- function(in.path, in.prefix, outfolder, overwrite = FALSE,
##' @author David LeBauer
cf2biocro <- function(met, longitude = NULL, zulu2solarnoon = FALSE) {

if (!data.table::is.data.table(met)) {
met <- data.table::copy(met)
data.table::setDT(met)
}

if ((!is.null(longitude)) & zulu2solarnoon) {
solarnoon_offset <- PEcAn.utils::ud_convert(longitude/360, "day", "minute")
met[, `:=`(solardate = met$date + lubridate::minutes(solarnoon_offset))]
Expand Down
9 changes: 3 additions & 6 deletions modules/data.atmosphere/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -20,22 +20,18 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific
package converts climate driver data into a standard format for models
integrated into PEcAn. As a standalone package, it provides an interface to
access diverse climate data sets.
Depends:
methods
Imports:
abind (>= 1.4.5),
amerifluxr,
arrow,
curl,
data.table,
dplyr,
dplyr (>= 0.8.1),
geonames (> 0.998),
ggplot2,
glue,
httr,
jsonlite,
lubridate (>= 1.6.0),
magrittr,
MASS,
mgcv,
ncdf4 (>= 1.15),
Expand Down Expand Up @@ -71,7 +67,8 @@ Suggests:
parallel,
PEcAn.settings,
progress,
reticulate
reticulate,
withr
Remotes:
github::adokter/suntools,
github::chuhousen/amerifluxr,
Expand Down
5 changes: 2 additions & 3 deletions modules/data.atmosphere/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -110,8 +110,7 @@ export(temporal_downscale_half_hour)
export(upscale_met)
export(wide2long)
export(write_noaa_gefs_netcdf)
import(dplyr)
import(tidyselect)
importFrom(magrittr,"%>%")
importFrom(dplyr,"%>%")
importFrom(rlang,.data)
importFrom(rlang,.env)
importFrom(sf,st_crs)
2 changes: 1 addition & 1 deletion modules/data.atmosphere/R/ERA5_met_process.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' @export
#'
#' @author Dongchen Zhang
#' @importFrom magrittr %>%
#' @importFrom dplyr %>%
#'
ERA5_met_process <- function(settings, in.path, out.path, write.db=FALSE, write = TRUE){
#Initialize the multicore computation.
Expand Down
1 change: 0 additions & 1 deletion modules/data.atmosphere/R/GEFS_helper_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -533,7 +533,6 @@ process_gridded_noaa_download <- function(lat_list,
#' @param overwrite, logical stating to overwrite any existing output_file
#' @param hr time step in hours of temporal downscaling (default = 1)
#' @importFrom rlang .data
#' @import tidyselect
#'
#' @author Quinn Thomas
#'
Expand Down
182 changes: 90 additions & 92 deletions modules/data.atmosphere/R/debias_met_regression.R

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion modules/data.atmosphere/R/download.Fluxnet2015.R
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
##' @title download.Fluxnet2015
##' @export
##' @param sitename the FLUXNET ID of the site to be downloaded, used as file name prefix.
##' The 'SITE_ID' field in \href{http://fluxnet.fluxdata.org//sites/site-list-and-pages/}{list of Ameriflux sites}
##' The 'SITE_ID' field in \href{https://fluxnet.org/sites/site-list-and-pages/}{list of Ameriflux sites}
##' @param outfolder location on disk where outputs will be stored
##' @param start_date the start date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year part of the date)
##' @param end_date the end date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year part of the date)
Expand Down
2 changes: 1 addition & 1 deletion modules/data.atmosphere/R/download.NARR.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
##' @param end_date desired end date YYYY-MM-DD
##' @param ... other inputs
##' example options(download.ftp.method="ncftpget")
##' @importFrom magrittr %>%
##' @importFrom dplyr %>%
##'
##' @examples
##' \dontrun{
Expand Down
2 changes: 1 addition & 1 deletion modules/data.atmosphere/R/download.NARR_site.R
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ get_NARR_thredds <- function(start_date, end_date, lat.in, lon.in,
get_dfs$data <- foreach::`%dopar%`(
foreach::foreach(
url = get_dfs$url, flx = get_dfs$flx,
.packages = c("PEcAn.data.atmosphere", "magrittr"),
.packages = c("PEcAn.data.atmosphere", "dplyr"),
.export = c("get_narr_url", "robustly")
),
PEcAn.utils::robustly(get_narr_url)(url, xy = xy, flx = flx)
Expand Down
2 changes: 1 addition & 1 deletion modules/data.atmosphere/R/download.NEONmet.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
##'
##' @export
##' @param sitename the NEON ID of the site to be downloaded, used as file name prefix.
##' The 4-letter SITE code in \href{http://www.neonscience.org/science-design/field-sites/list}{list of NEON sites}
##' The 4-letter SITE code in \href{https://www.neonscience.org/science-design/field-sites/list}{list of NEON sites}
##' @param outfolder location on disk where outputs will be stored
##' @param start_date the start date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year and month of the date)
##' @param end_date the end date of the data to be downloaded. Format is YYYY-MM-DD (will only use the year and month part of the date)
Expand Down
2 changes: 1 addition & 1 deletion modules/data.atmosphere/R/download.US_Wlef.R
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ download.US_Wlef <- function(start_date, end_date, timestep = 1) {
url <- paste0(base_url, year,"/flux_", year, ".txt") #Build proper url
PEcAn.logger::logger.info(paste0("Reading data for year ", year))
print(url)
influx <- tryCatch(read.table(url, header = T, sep = ""), error=function(e) {NULL}, warning=function(e) {NULL})
influx <- tryCatch(utils::read.table(url, header = T, sep = ""), error=function(e) {NULL}, warning=function(e) {NULL})
if (is.null(influx)) { #Error encountered in data fetching.
PEcAn.logger::logger.warn(paste0("Data not avaliable for year ", year, ". All values for ", year, " will be NA."))
# Determine the number of days in the year
Expand Down
4 changes: 2 additions & 2 deletions modules/data.atmosphere/R/download_noaa_gefs_efi.R
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ download_NOAA_GEFS_EFI <- function(sitename, outfolder, start_date, site.lat, si

noaa_data[v] <- NULL
#filter for met variable
curr_var <- filter(weather, .data$variable == cf_var_names[v])
curr_var <- dplyr::filter(weather, .data$variable == cf_var_names[v])
#remove ensemble member 31 does not cover full timeseries
#this is a HACK should add a generalized method for ensemble member outlier detection
curr_var <- filter(curr_var, .data$parameter <= 30)
curr_var <- dplyr::filter(curr_var, .data$parameter <= 30)
noaa_data[[v]] <- list(value = curr_var$prediction,
ensembles = curr_var$parameter,
forecast.date = curr_var$datetime)
Expand Down
2 changes: 1 addition & 1 deletion modules/data.atmosphere/R/downscaling_helper_functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ downscale_repeat_6hr_to_hrly <- function(df, varName, hr = 1){
t0 <- min(df$time)

df <- df %>%
dplyr::select("time", all_of(varName)) %>%
dplyr::select("time", tidyselect::all_of(varName)) %>%
#Calculate time difference
dplyr::mutate(days_since_t0 = difftime(.data$time, t0, units = "days")) %>%
#Shift valued back because the 6hr value represents the average over the
Expand Down
1 change: 0 additions & 1 deletion modules/data.atmosphere/R/extract.nc.module.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
##' @export
##' @import dplyr
.extract.nc.module <- function(cf.id, register, dir, met, str_ns, site, new.site, con,
start_date, end_date, host, overwrite = FALSE) {
PEcAn.logger::logger.info("Site Extraction")
Expand Down
4 changes: 2 additions & 2 deletions modules/data.atmosphere/R/extract_ERA5.R
Original file line number Diff line number Diff line change
Expand Up @@ -122,13 +122,13 @@ extract.nc.ERA5 <-
# send out as xts object
xts::xts(all.data.point, order.by = timestamp)
}) %>%
setNames(paste0("ERA_ensemble_", ensemblesN))
stats::setNames(paste0("ERA_ensemble_", ensemblesN))

#Merge mean and the speard
return(point.data)

}) %>%
setNames(years)
stats::setNames(years)


# The order of one.year.out is year and then Ens - Mainly because of the spead / I wanted to touch each file just once.
Expand Down
4 changes: 2 additions & 2 deletions modules/data.atmosphere/R/half_hour_downscale.R
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ downscale_ShortWave_to_half_hrly <- function(df,lat, lon, hr = 0.5){
}

#ShortWave.ds <- dplyr::select(data.hrly, time, surface_downwelling_shortwave_flux_in_air)
ShortWave.ds <- data.hrly %>% select("time", "surface_downwelling_shortwave_flux_in_air")
ShortWave.ds <- data.hrly %>% dplyr::select("time", "surface_downwelling_shortwave_flux_in_air")
# data.hrly$group_6hr <- NA
#
# group <- 0
Expand Down Expand Up @@ -277,7 +277,7 @@ downscale_repeat_6hr_to_half_hrly <- function(df, varName, hr = 0.5){
t0 <- min(df$time)

df <- df %>%
dplyr::select("time", all_of(varName)) %>%
dplyr::select("time", tidyselect::all_of(varName)) %>%
#Calculate time difference
dplyr::mutate(days_since_t0 = difftime(.data$time, t0, units = "days")) %>%
#Shift valued back because the 6hr value represents the average over the
Expand Down
Original file line number Diff line number Diff line change
@@ -1,34 +1,4 @@

get.weather <- function(lat, lon, met.nc = met.nc, start.date, end.date, output.dt = 1) {
# if(!is.land(lat, lon)) stop('point is in ocean')
result <- load.cfmet(lat = lat, lon = lon, met.nc = met.nc, start.date, end.date)
downscaled.result <- cfmet.downscale.time(cfmet = result, output.dt = output.dt, lat = lat)
weather <- cruncep_dt2weather(downscaled.result)
} # get.weather


is.land <- function(lat, lon) {
Lat <- ncdf4::ncvar_get(nc = met.nc, varid = "lat")
Lon <- ncdf4::ncvar_get(nc = met.nc, varid = "lon")
lati <- which.min(abs(Lat - lat))
loni <- which.min(abs(Lon - lon))
mask <- ncdf4::ncvar_get(nc = met.nc, varid = "mask", start = c(loni, lati), count = c(1, 1))
return(mask >= 0)
} # is.land

get.latlonbox <- function(lati, loni, Lat = Lat, Lon = Lon) {
lat <- c(mean(Lat[lati:(lati - 1)]), mean(Lat[lati:(lati + 1)]))
lon <- c(mean(Lon[loni:(loni - 1)]), mean(Lon[loni:(loni + 1)]))
return(c(sort(lat), sort(lon)))
} # get.latlonbox

get.cruncep <- function(lat, lon, start.date, end.date) {
result <- load.cfmet(lat, lon)
hourly.result <- cruncep_hourly(result, lat = Lat[lati])
weather <- cruncep_dt2weather(hourly.result)
return(weather)
} # get.cruncep

##' Simulates the light macro environment
##'
##' Simulates light macro environment based on latitude, day of the year.
Expand Down
40 changes: 19 additions & 21 deletions modules/data.atmosphere/R/load.cfmet.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
## ensures data.table objects treated as such http://stackoverflow.com/q/24501245/513006
.datatable.aware <- TRUE

##' Load met data from PEcAn formatted met driver
##'
##' subsets a PEcAn formatted met driver file and converts to a data.table / data.frame object
##' @title load CF met
##' subsets a PEcAn formatted met driver file and converts to a data.frame object
##'
##' @param met.nc object of class ncdf4 representing an open CF compliant, PEcAn standard netcdf file with met data
##' @param lat numeric value of latutude
##' @param lat numeric value of latitude
##' @param lon numeric value of longitude
##' @param start.date format is 'YYYY-MM-DD'
##' @param end.date format is 'YYYY-MM-DD'
##' @return data.table of met data
##' @return data frame of met data
##' @importFrom rlang .data
##' @importFrom dplyr %>%
##' @export
##' @author David LeBauer
load.cfmet <- function(met.nc, lat, lon, start.date, end.date) {
Expand Down Expand Up @@ -43,15 +43,13 @@ load.cfmet <- function(met.nc, lat, lon, start.date, end.date) {
base.units <- strsplit(basetime.string, " since ")[[1]][1]

## convert to days
if (!base.units == "days") {
if (base.units != "days") {
time.idx <- PEcAn.utils::ud_convert(time.idx, basetime.string, paste("days since ", base.date))
}
time.idx <- PEcAn.utils::ud_convert(time.idx, "days", "seconds")
date <- as.POSIXct.numeric(time.idx, origin = base.date, tz = "UTC")
date <- as.POSIXct.numeric(round(time.idx), origin = base.date, tz = "UTC")

## data table warns not to use POSIXlt, which is induced by round()
## but POSIXlt moves times off by a second
suppressWarnings(all.dates <- data.table::data.table(index = seq(time.idx), date = round(date)))
all.dates <- data.frame(index = seq_along(time.idx), date = date)

if (start.date + lubridate::days(1) < min(all.dates$date)) {
PEcAn.logger::logger.severe("run start date", start.date, "before met data starts", min(all.dates$date))
Expand All @@ -60,15 +58,15 @@ load.cfmet <- function(met.nc, lat, lon, start.date, end.date) {
PEcAn.logger::logger.severe("run end date", end.date, "after met data ends", max(all.dates$date))
}

run.dates <- all.dates[date >= start.date & date <= end.date,
list(index,
date = date,
doy = lubridate::yday(date),
year = lubridate::year(date),
month = lubridate::month(date),
day = lubridate::day(date),
hour = lubridate::hour(date) + lubridate::minute(date) / 60)]
run.dates <- all.dates %>%
dplyr::filter(.data$date >= start.date & .data$date <= end.date) %>%
dplyr::mutate(
doy = lubridate::yday(.data$date),
year = lubridate::year(.data$date),
month = lubridate::month(.data$date),
day = lubridate::day(.data$date),
hour = lubridate::hour(.data$date) + lubridate::minute(.data$date) / 60)

results <- list()


Expand All @@ -83,5 +81,5 @@ load.cfmet <- function(met.nc, lat, lon, start.date, end.date) {

names(vars) <- gsub("surface_pressure", "air_pressure", variables)

return(cbind(run.dates, data.table::as.data.table(vars[!sapply(vars, is.null)])))
return(cbind(run.dates, vars[!sapply(vars, is.null)]))
} # load.cfmet
29 changes: 15 additions & 14 deletions modules/data.atmosphere/R/met.process.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
##' *except* raw met downloads. I.e., it corresponds to:
##'
##' list(download = FALSE, met2cf = TRUE, standardize = TRUE, met2model = TRUE)
##' @importFrom rlang .data .env
##' @export
##' @author Elizabeth Cowdery, Michael Dietze, Ankur Desai, James Simkins, Ryan Kelly
met.process <- function(site, input_met, start_date, end_date, model,
Expand Down Expand Up @@ -227,25 +228,25 @@ met.process <- function(site, input_met, start_date, end_date, model,
cf.id <- raw.id <- db.file
}else{
# I did this bc dbfile.input.check does not cover the between two time periods situation
mimetypeid <- get.id(table = "mimetypes", colnames = "type_string",
mimetypeid <- PEcAn.DB::get.id(table = "mimetypes", colnames = "type_string",
values = "application/x-netcdf", con = con)

formatid <- get.id(table = "formats", colnames = c("mimetype_id", "name"),
formatid <- PEcAn.DB::get.id(table = "formats", colnames = c("mimetype_id", "name"),
values = c(mimetypeid, "CF Meteorology"), con = con)

machine.id <- get.id(table = "machines", "hostname", PEcAn.remote::fqdn(), con)
# Fidning the tiles.
raw.tiles <- tbl(con, "inputs") %>%
filter(
site_id == register$ParentSite,
start_date >= start_date,
end_date <= end_date,
format_id == formatid
machine.id <- PEcAn.DB::get.id(table = "machines", "hostname", PEcAn.remote::fqdn(), con)
# Finding the tiles.
raw.tiles <- dplyr::tbl(con, "inputs") %>%
dplyr::filter(
.data$site_id == register$ParentSite,
.data$start_date <= .env$start_date,
.data$end_date >= .env$end_date,
.data$format_id == formatid
) %>%
filter(grepl(met, "name")) %>%
inner_join(tbl(con, "dbfiles"), by = c('id' = 'container_id')) %>%
filter(machine_id == machine.id) %>%
collect()
dplyr::filter(grepl(met, "name")) %>%
dplyr::inner_join(dplyr::tbl(con, "dbfiles"), by = c('id' = 'container_id')) %>%
dplyr::filter(.data$machine_id == machine.id) %>%
dplyr::collect()

cf.id <- raw.id <- list(input.id = raw.tiles$id.x, dbfile.id = raw.tiles$id.y)
}
Expand Down
Loading

0 comments on commit e4e14ae

Please sign in to comment.