From 2c55c22806d61665575236b458fc4a75d8415a7c Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 29 Nov 2023 12:22:51 -0500 Subject: [PATCH 01/48] Improve the reading NC file function. --- modules/data.land/R/pool_ic_netcdf2list.R | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/data.land/R/pool_ic_netcdf2list.R b/modules/data.land/R/pool_ic_netcdf2list.R index 871d3132e78..d488b004d45 100644 --- a/modules/data.land/R/pool_ic_netcdf2list.R +++ b/modules/data.land/R/pool_ic_netcdf2list.R @@ -8,19 +8,18 @@ ##' @author Anne Thomas pool_ic_netcdf2list <- function(nc.path){ IC.nc <- try(ncdf4::nc_open(nc.path)) + on.exit(ncdf4::nc_close(IC.nc), add = FALSE) if(!inherits(IC.nc, "try-error")) { dims <- vector(mode = "list", length = length(IC.nc$dim)) names(dims) <- names(IC.nc$dim) for(i in seq(IC.nc$dim)){ dims[[i]] <- IC.nc$dim[[i]]$vals } - vals <- vector(mode = "list", length = length(IC.nc$var)) names(vals) <- names(IC.nc$var) for(varname in names(vals)){ vals[[varname]] <- ncdf4::ncvar_get(IC.nc,varname) } - on.exit(ncdf4::nc_close(IC.nc), add = FALSE) return(list(dims = dims, vals = vals)) } else{ From b22aa0487d8d79a21297fbe06b088ba3bdce5f81 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 29 Nov 2023 12:23:08 -0500 Subject: [PATCH 02/48] Improve qsub_parallel function. --- base/remote/R/qsub_parallel.R | 77 ++++++++++++++++++++++++++------ base/remote/man/qsub_parallel.Rd | 10 ++++- 2 files changed, 73 insertions(+), 14 deletions(-) diff --git a/base/remote/R/qsub_parallel.R b/base/remote/R/qsub_parallel.R index 6b9826033fb..15a661ce88b 100644 --- a/base/remote/R/qsub_parallel.R +++ b/base/remote/R/qsub_parallel.R @@ -4,6 +4,7 @@ #' @param files allow submit jobs based on job.sh file paths. #' @param prefix used for detecting if jobs are completed or not. #' @param sleep time (in second) that we wait each time for the jobs to be completed. +#' @param hybrid Decide if we want to detect the job completion by both files and job ids on the server or just by the job ids on the server.s #' @export #' @examples #' \dontrun{ @@ -12,7 +13,7 @@ #' @author Dongchen Zhang #' #' @importFrom foreach %dopar% -qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out", sleep = 10) { +qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out", sleep = 10, hybrid = TRUE) { if("try-error" %in% class(try(find.package("doSNOW"), silent = T))){ PEcAn.logger::logger.info("Package doSNOW is not installed! Please install it and rerun the function!") return(0) @@ -89,21 +90,71 @@ qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out", sleep = PEcAn.logger::logger.info("Checking the qsub jobs status!") PEcAn.logger::logger.info(paste0("Checking the file ", prefix)) ## setup progressbar - pb <- utils::txtProgressBar(min = 0, max = length(unlist(run_list)), style = 3) - pbi <- 0 folders <- file.path(settings$host$outdir, run_list) - completed_folders <- c() - while (length(completed_folders) < length(folders)) { - Sys.sleep(sleep) - completed_folders <- foreach::foreach(folder = folders) %dopar% { - if(file.exists(file.path(folder, prefix))){ - return(folder) - } + L_folder <- length(folders) + L_jobid <- length(jobids) + + pb <- utils::txtProgressBar(min = 0, max = L_folder, style = 3) + pb1 <- utils::txtProgressBar(min = 0, max = L_jobid, style = 3) + pbi <- pbi1 <- 0 + #here we not only detect if the target files are generated. + #we also detect if the jobs are still existed on the server. + if (hybrid) { + while ((L_folder - length(folders)) < L_folder & + (L_jobid - length(jobids)) < L_jobid) { + Sys.sleep(sleep) + completed_folders <- foreach::foreach(folder = folders) %dopar% { + if(file.exists(file.path(folder, prefix))){ + return(folder) + } + } %>% unlist() + folders <- folders[which(!folders %in% completed_folders)] + + #or we can try detect if the jobs are still on the server. + #specify the host and qstat arguments for the future_map function. + host <- settings$host + qstat <- host$qstat + completed_jobs <- jobids %>% furrr::future_map(function(id) { + if (PEcAn.remote::qsub_run_finished( + run = id, + host = host, + qstat = qstat)) { + return(id) + } + }) %>% unlist() + jobids <- jobids[which(!jobids %in% completed_jobs)] + + #compare two progresses and set the maximum progress for the progress bar. + pbi <- L_folder - length(folders) + utils::setTxtProgressBar(pb, pbi) + + pbi1 <- L_jobid - length(jobids) + utils::setTxtProgressBar(pb1, pbi1) + } + } else { + #special case that only detect the job ids on the server. + while ((L_jobid - length(jobids)) < L_jobid) { + #detect if the jobs are still on the server. + #specify the host and qstat arguments for the future_map function. + Sys.sleep(sleep) + host <- settings$host + qstat <- host$qstat + completed_jobs <- jobids %>% furrr::future_map(function(id) { + if (PEcAn.remote::qsub_run_finished( + run = id, + host = host, + qstat = qstat)) { + return(id) + } + }) %>% unlist() + jobids <- jobids[which(!jobids %in% completed_jobs)] + + #compare two progresses and set the maximum progress for the progress bar. + pbi1 <- L_jobid - length(jobids) + utils::setTxtProgressBar(pb1, pbi1) } - completed_folders <- unlist(completed_folders) - pbi <- length(completed_folders) - utils::setTxtProgressBar(pb, pbi) } + close(pb) parallel::stopCluster(cl) PEcAn.logger::logger.info("Completed!") diff --git a/base/remote/man/qsub_parallel.Rd b/base/remote/man/qsub_parallel.Rd index a7d2f8bd751..d608ad30771 100644 --- a/base/remote/man/qsub_parallel.Rd +++ b/base/remote/man/qsub_parallel.Rd @@ -4,7 +4,13 @@ \alias{qsub_parallel} \title{qsub_parallel} \usage{ -qsub_parallel(settings, files = NULL, prefix = "sipnet.out", sleep = 10) +qsub_parallel( + settings, + files = NULL, + prefix = "sipnet.out", + sleep = 10, + hybrid = TRUE +) } \arguments{ \item{settings}{pecan settings object} @@ -14,6 +20,8 @@ qsub_parallel(settings, files = NULL, prefix = "sipnet.out", sleep = 10) \item{prefix}{used for detecting if jobs are completed or not.} \item{sleep}{time (in second) that we wait each time for the jobs to be completed.} + +\item{hybrid}{Decide if we want to detect the job completion by both files and job ids on the server or just by the job ids on the server.s} } \description{ qsub_parallel From 86c7246e60c86de7f5f496ebf3ded7ad994d9799 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 29 Nov 2023 12:23:21 -0500 Subject: [PATCH 03/48] Add verbose to the write restart sipnet function. --- models/sipnet/R/write_restart.SIPNET.R | 25 +++++++++++++---------- models/sipnet/man/write_restart.SIPNET.Rd | 5 ++++- 2 files changed, 18 insertions(+), 12 deletions(-) diff --git a/models/sipnet/R/write_restart.SIPNET.R b/models/sipnet/R/write_restart.SIPNET.R index 18387a9f893..de8f14a67f4 100755 --- a/models/sipnet/R/write_restart.SIPNET.R +++ b/models/sipnet/R/write_restart.SIPNET.R @@ -20,13 +20,14 @@ ##' @param RENAME flag to either rename output file or not ##' @param new.params list of parameters to convert between different states ##' @param inputs list of model inputs to use in write.configs.SIPNET +##' @param verbose decide if we want to print the outputs. ##' ##' @description Write restart files for SIPNET. WARNING: Some variables produce illegal values < 0 and have been hardcoded to correct these values!! ##' ##' @return NONE ##' @export write_restart.SIPNET <- function(outdir, runid, start.time, stop.time, settings, new.state, - RENAME = TRUE, new.params = FALSE, inputs) { + RENAME = TRUE, new.params = FALSE, inputs, verbose = FALSE) { rundir <- settings$host$rundir variables <- colnames(new.state) @@ -58,16 +59,16 @@ write_restart.SIPNET <- function(outdir, runid, start.time, stop.time, settings, analysis.save[[length(analysis.save) + 1]] <- PEcAn.utils::ud_convert(new.state$NPP, "kg/m^2/s", "Mg/ha/yr") #*unit.conv -> Mg/ha/yr names(analysis.save[[length(analysis.save)]]) <- c("NPP") } - + if ("NEE" %in% variables) { analysis.save[[length(analysis.save) + 1]] <- new.state$NEE names(analysis.save[[length(analysis.save)]]) <- c("NEE") } - if ("AbvGrndWood" %in% variables) { - AbvGrndWood <- PEcAn.utils::ud_convert(new.state$AbvGrndWood, "Mg/ha", "g/m^2") - analysis.save[[length(analysis.save) + 1]] <- AbvGrndWood - names(analysis.save[[length(analysis.save)]]) <- c("AbvGrndWood") + if ("AbvGrndWood" %in% variables) { + AbvGrndWood <- PEcAn.utils::ud_convert(new.state$AbvGrndWood, "Mg/ha", "g/m^2") + analysis.save[[length(analysis.save) + 1]] <- AbvGrndWood + names(analysis.save[[length(analysis.save)]]) <- c("AbvGrndWood") } if ("LeafC" %in% variables) { @@ -105,7 +106,7 @@ write_restart.SIPNET <- function(outdir, runid, start.time, stop.time, settings, if (analysis.save[[length(analysis.save)]] < 0) analysis.save[[length(analysis.save)]] <- 0 names(analysis.save[[length(analysis.save)]]) <- c("SWE") } - + if ("LAI" %in% variables) { analysis.save[[length(analysis.save) + 1]] <- new.state$LAI if (new.state$LAI < 0) analysis.save[[length(analysis.save)]] <- 0 @@ -119,9 +120,11 @@ write_restart.SIPNET <- function(outdir, runid, start.time, stop.time, settings, }else{ analysis.save.mat <- NULL } - - print(runid %>% as.character()) - print(analysis.save.mat) + + if (verbose) { + print(runid %>% as.character()) + print(analysis.save.mat) + } do.call(write.config.SIPNET, args = list(defaults = NULL, trait.values = new.params, settings = settings, @@ -129,4 +132,4 @@ write_restart.SIPNET <- function(outdir, runid, start.time, stop.time, settings, inputs = inputs, IC = analysis.save.mat)) print(runid) -} # write_restart.SIPNET +} # write_restart.SIPNET \ No newline at end of file diff --git a/models/sipnet/man/write_restart.SIPNET.Rd b/models/sipnet/man/write_restart.SIPNET.Rd index d080faabd93..22d7f885d7b 100644 --- a/models/sipnet/man/write_restart.SIPNET.Rd +++ b/models/sipnet/man/write_restart.SIPNET.Rd @@ -13,7 +13,8 @@ write_restart.SIPNET( new.state, RENAME = TRUE, new.params = FALSE, - inputs + inputs, + verbose = FALSE ) } \arguments{ @@ -34,6 +35,8 @@ write_restart.SIPNET( \item{new.params}{list of parameters to convert between different states} \item{inputs}{list of model inputs to use in write.configs.SIPNET} + +\item{verbose}{decide if we want to print the outputs.} } \value{ NONE From dceaf9418a4a0681c41a48e3d1cdf837f77f2461 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 29 Nov 2023 12:23:40 -0500 Subject: [PATCH 04/48] Bug fixes for less than 12 sites number. --- modules/assim.sequential/R/SDA_OBS_Assembler.R | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/modules/assim.sequential/R/SDA_OBS_Assembler.R b/modules/assim.sequential/R/SDA_OBS_Assembler.R index e9e778e9af8..a1d6f2bc109 100644 --- a/modules/assim.sequential/R/SDA_OBS_Assembler.R +++ b/modules/assim.sequential/R/SDA_OBS_Assembler.R @@ -35,8 +35,7 @@ SDA_OBS_Assembler <- function(settings){ } #prepare site_info offline, because we need to submit this to server remotely, which might not support the Bety connection. - site_info <- settings %>% - purrr::map(~.x[['run']] ) %>% + site_info <- settings$run %>% purrr::map('site')%>% purrr::map(function(site.list){ #conversion from string to number @@ -215,8 +214,8 @@ SDA_OBS_Assembler <- function(settings){ Obs_Prep[var_ind] %>% purrr::map(~.x$start.date), Obs_Prep[var_ind] %>% purrr::map(~.x$end.date)), function(var_timestep, var_start_date, var_end_date){ - obs_timestep2timepoint(var_start_date, var_end_date, var_timestep) - }) %>% + obs_timestep2timepoint(var_start_date, var_end_date, var_timestep) + }) %>% purrr::map(function(all_timepoints){ all_timepoints[which(!all_timepoints %in% time_points)] }) %>% @@ -258,4 +257,4 @@ SDA_OBS_Assembler <- function(settings){ save(obs.mean, file = file.path(Obs_Prep$outdir, "Rdata", "obs.mean.Rdata")) save(obs.cov, file = file.path(Obs_Prep$outdir, "Rdata", "obs.cov.Rdata")) list(obs.mean = obs.mean, obs.cov = obs.cov) -} +} \ No newline at end of file From d6ba99f3f380a8701165df3d4e4c65307ff3548b Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 29 Nov 2023 12:23:59 -0500 Subject: [PATCH 05/48] Improve the ERA5 met process function. --- modules/data.atmosphere/R/ERA5_met_process.R | 155 ++++++++++++------- 1 file changed, 100 insertions(+), 55 deletions(-) diff --git a/modules/data.atmosphere/R/ERA5_met_process.R b/modules/data.atmosphere/R/ERA5_met_process.R index 2018951ca19..75cb39cdeef 100644 --- a/modules/data.atmosphere/R/ERA5_met_process.R +++ b/modules/data.atmosphere/R/ERA5_met_process.R @@ -3,37 +3,60 @@ #' @param settings a multi-settings object #' @param in.path met input path #' @param out.path output path -#' @param Write if write into Bety database +#' @param write.db if write into Bety database +#' @param write if write the settings into pecan.xml file in the outdir of settings. #' -#' @return if Write is True then return input IDs with physical paths; if Write is False then return just physical paths of extracted ERA5 clim files. +#' @return if write.db is True then return input IDs with physical paths; if write.db is False then return just physical paths of extracted ERA5 clim files. #' @export +#' +#' @author Dongchen Zhang +#' @importFrom magrittr %>% #' #' @examples -ERA5_met_process <- function(settings, in.path, out.path, Write=FALSE){ - #getting site info - #getting site ID - observations <- c() - for (i in 1:length(settings)) { - obs <- settings[[i]]$run$site$id - observations <- c(observations,obs) +ERA5_met_process <- function(settings, in.path, out.path, write.db=FALSE, write = TRUE){ + #Initialize the multicore computation. + if (future::supportsMulticore()) { + future::plan(future::multicore) + } else { + future::plan(future::multisession) } - #query site info - bety <- dplyr::src_postgres(dbname = settings$database$bety$dbname, - host = settings$database$bety$host, - user = settings$database$bety$user, - password = settings$database$bety$password) - con <- bety$con - site_ID <- observations - suppressWarnings(site_qry <- glue::glue_sql("SELECT *, ST_X(ST_CENTROID(geometry)) AS lon, + #getting site info + #grab the site info from Bety DB if we can't get the site info directly from the settings object. + if ("try-error" %in% class(try(site_info <- settings$run %>% + purrr::map('site')%>% + purrr::map(function(site.list){ + #conversion from string to number + site.list$lat <- as.numeric(site.list$lat) + site.list$lon <- as.numeric(site.list$lon) + list(site_id=site.list$id, lat=site.list$lat, lon=site.list$lon, site_name=site.list$name) + }) %>% + dplyr::bind_rows() %>% + as.list()))) { + #getting site ID + observations <- c() + for (i in 1:length(settings)) { + obs <- settings[[i]]$run$site$id + observations <- c(observations,obs) + } + + #query site info + bety <- dplyr::src_postgres(dbname = settings$database$bety$dbname, + host = settings$database$bety$host, + user = settings$database$bety$user, + password = settings$database$bety$password) + con <- bety$con + site_ID <- observations + suppressWarnings(site_qry <- glue::glue_sql("SELECT *, ST_X(ST_CENTROID(geometry)) AS lon, ST_Y(ST_CENTROID(geometry)) AS lat FROM sites WHERE id IN ({ids*})", - ids = site_ID, .con = con)) - suppressWarnings(qry_results <- PEcAn.DB::db.query(con = con, query = site_qry))#use PEcAn.DB instead - site_info <- list(site_id=qry_results$id, site_name=qry_results$sitename, lat=qry_results$lat, - lon=qry_results$lon, time_zone=qry_results$time_zone) + ids = site_ID, .con = con)) + suppressWarnings(qry_results <- PEcAn.DB::db.query(con = con, query = site_qry))#use PEcAn.DB instead + site_info <- list(site_id=qry_results$id, site_name=qry_results$sitename, lat=qry_results$lat, + lon=qry_results$lon, time_zone=qry_results$time_zone) + } #initialize db query elements - if(Write){ + if(write.db){ mimetype <- "application/x-netcdf" formatname <- "CF Meteorology" hostname <- PEcAn.remote::fqdn() @@ -57,51 +80,54 @@ ERA5_met_process <- function(settings, in.path, out.path, Write=FALSE){ Input_IDs <- list() } - #initialize physical paths for each ERA5 file - Clim_paths <- list() - - #initializing start and end date from settings - start_date <- settings$state.data.assimilation$start.date - end_date <- settings$state.data.assimilation$end.date - - #setting up met2model function depending on model name from settings - met2model_method <- do.call("::", list(paste0("PEcAn.", settings$model$type), paste0("met2model.", settings$model$type))) - - #loop over each site - for (i in 1:length(site_info$site_id)) { + #restructure the site_info. + new.site.info <- vector("list", length(settings)) + for (i in seq_along(new.site.info)) { + new.site.info[[i]] <- list(site.id = site_info$site_id[i], + lat = as.numeric(site_info$lat[i]), + lon = as.numeric(site_info$lon[i]), + start_date = settings$state.data.assimilation$start.date, + end_date = settings$state.data.assimilation$end.date, + out.path = out.path, + in.path = in.path, + model.type = settings$model$type) + } + #Extract ERA5 for each site. + PEcAn.logger::logger.info("Started extracting ERA5 data!\n") + Clim_paths <- furrr::future_map(new.site.info, function(site){ #check if sub-folder exists, if doesn't then create a new folder specific for each site - site_outFolder <- paste0(out.path,'/',as.character(site_info$site_id[i])) - + site_outFolder <- paste0(site$out.path,'/', site$site.id) #check if folder already exists, if it does, then jump to the next loop if(!file.exists(site_outFolder)){ dir.create(site_outFolder) }else{ - #export info - print(paste0("The output files for site ",as.character(site_info$site_id[i])," already exists jump to the next site")) - #grab physical paths of existing ERA5 files #need to be generalized when more models come in. - Clim_paths[i] <- list(in.path=list.files(path=site_outFolder, pattern = '*.clim', full.names = T)) - next + clim.paths <- list(in.path=list.files(path=site_outFolder, pattern = '*.clim', full.names = T)) + names(clim.paths) <- site$site.id + return(clim.paths) } #extract ERA5.nc files - PEcAn.data.atmosphere::extract.nc.ERA5(slat = site_info$lat[i], - slon = site_info$lon[i], - in.path = in.path, - start_date = start_date, - end_date = end_date, + PEcAn.data.atmosphere::extract.nc.ERA5(slat = site$lat, + slon = site$lon, + in.path = site$in.path, + start_date = site$start_date, + end_date = site$end_date, outfolder = site_outFolder, in.prefix = 'ERA5_', - newsite = as.character(site_info$site_id[i])) + newsite = as.character(site$site.id)) #starting working on met2model.model function over each ensemble + #setting up met2model function depending on model name from settings + met2model_method <- do.call("::", list(paste0("PEcAn.", site$model.type), paste0("met2model.", site$model.type))) + #grab the rbind.xts function + rbind.xts <- do.call("::", list("xts", "rbind.xts")) #find every path associated with each ensemble member ens_nc <- list.files(path = site_outFolder, full.names = T) - #loop over each ensemble member - for (j in 1:length(ens_nc)) { - nc_path <- ens_nc[j] + for (i in 1:length(ens_nc)) { + nc_path <- ens_nc[i] #find a proper in prefix for each ensemble member ens_num <- strsplit(basename(nc_path),"_")[[1]][3] @@ -111,15 +137,34 @@ ERA5_met_process <- function(settings, in.path, out.path, Write=FALSE){ met2model_method(in.path = nc_path, in.prefix = in_prefix, outfolder = site_outFolder, - start_date = start_date, - end_date = end_date) + start_date = site$start_date, + end_date = site$end_date) } # grab physical paths of ERA5 files - Clim_paths[i] <- list(in.path=list.files(path=site_outFolder, pattern = '*.clim', full.names = T)) + clim.paths <- list(in.path=list.files(path=site_outFolder, pattern = '*.clim', full.names = T)) + names(clim.paths) <- site$site.id + return(clim.paths) + }, .progress = TRUE) + PEcAn.logger::logger.info("\nFinished!") + + #write the paths into settings. + if (write) { + #write paths into settings. + settings$run <- furrr::future_map2(settings$run, Clim_paths, function(site.list, paths){ + met.list <- as.list(paths) + names(met.list) <- rep("path", length(paths)) + site.list$run$inputs$met <- met.list + site.list + }) + + #write settings into xml file. + PEcAn.logger::logger.info(paste0("Write updated pecan.xml file into: ", file.path(settings$outdir, "pecan.xml"))) + PEcAn.settings::write.settings(settings, outputfile = "pecan.xml") } #write into bety - if(Write){ + if(write.db){ + PEcAn.logger::logger.info("Write into database!") #loop over each site for (i in 1:length(site_info$site_id)) { #loop over each ensemble @@ -154,4 +199,4 @@ ERA5_met_process <- function(settings, in.path, out.path, Write=FALSE){ save(Clim_paths, file=paste0(out.path, '/', 'Inputs.RData')) return(Clim_paths) } -} +} \ No newline at end of file From 7e4efbff0c8afa40460ea6e1c9fefe5ec2adc250 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 29 Nov 2023 12:24:09 -0500 Subject: [PATCH 06/48] Update doc --- modules/data.atmosphere/man/ERA5_met_process.Rd | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/modules/data.atmosphere/man/ERA5_met_process.Rd b/modules/data.atmosphere/man/ERA5_met_process.Rd index 35a685ec3d1..29710098dd0 100644 --- a/modules/data.atmosphere/man/ERA5_met_process.Rd +++ b/modules/data.atmosphere/man/ERA5_met_process.Rd @@ -4,7 +4,7 @@ \alias{ERA5_met_process} \title{Met Processes for ERA5 data} \usage{ -ERA5_met_process(settings, in.path, out.path, Write = FALSE) +ERA5_met_process(settings, in.path, out.path, write.db = FALSE, write = TRUE) } \arguments{ \item{settings}{a multi-settings object} @@ -13,11 +13,16 @@ ERA5_met_process(settings, in.path, out.path, Write = FALSE) \item{out.path}{output path} -\item{Write}{if write into Bety database} +\item{write.db}{if write into Bety database} + +\item{write}{if write the settings into pecan.xml file in the outdir of settings.} } \value{ -if Write is True then return input IDs with physical paths; if Write is False then return just physical paths of extracted ERA5 clim files. +if write.db is True then return input IDs with physical paths; if write.db is False then return just physical paths of extracted ERA5 clim files. } \description{ Met Processes for ERA5 data } +\author{ +Dongchen Zhang +} From cb95c52a1aa9852b1d59221950790c56ba5b29c1 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 29 Nov 2023 12:24:21 -0500 Subject: [PATCH 07/48] Add verbose to the function. --- modules/data.atmosphere/R/extract_ERA5.R | 85 ++++++++++--------- .../data.atmosphere/man/extract.nc.ERA5.Rd | 1 + 2 files changed, 48 insertions(+), 38 deletions(-) diff --git a/modules/data.atmosphere/R/extract_ERA5.R b/modules/data.atmosphere/R/extract_ERA5.R index f85b43cdec8..7b093c49695 100644 --- a/modules/data.atmosphere/R/extract_ERA5.R +++ b/modules/data.atmosphere/R/extract_ERA5.R @@ -10,6 +10,7 @@ #' @param newsite site name. #' @param vars variables to be extracted. If NULL all the variables will be returned. #' @param overwrite Logical if files needs to be overwritten. +#' @verbose Decide if we want to stop printing info. #' @details For the list of variables check out the documentation at \link{https://confluence.ecmwf.int/display/CKB/ERA5+data+documentation#ERA5datadocumentation-Spatialgrid} #' #' @return a list of xts objects with all the variables for the requested years @@ -33,15 +34,16 @@ extract.nc.ERA5 <- newsite, vars = NULL, overwrite = FALSE, + verbose = FALSE, ...) { - + # library(xts) # Distributing the job between whatever core is available. years <- seq(lubridate::year(start_date), lubridate::year(end_date), 1 - ) + ) ensemblesN <- seq(1, 10) @@ -49,7 +51,7 @@ extract.nc.ERA5 <- #for each ensemble one.year.out <- years %>% purrr::map(function(year) { - + # for each year point.data <- ensemblesN %>% purrr::map(function(ens) { @@ -57,13 +59,17 @@ extract.nc.ERA5 <- ncfile <- file.path(in.path, paste0(in.prefix, year, ".nc")) - PEcAn.logger::logger.info(paste0("Trying to open :", ncfile, " ")) - - if (!file.exists(ncfile)) - PEcAn.logger::logger.severe("The nc file was not found.") + #printing out initial information. + if (verbose) { + PEcAn.logger::logger.info(paste0("Trying to open :", ncfile, " ")) + + if (!file.exists(ncfile)) + PEcAn.logger::logger.severe("The nc file was not found.") + + #msg + PEcAn.logger::logger.info(paste0(year, " is being processed ", "for ensemble #", ens, " ")) + } - #msg - PEcAn.logger::logger.info(paste0(year, " is being processed ", "for ensemble #", ens, " ")) #open the file nc_data <- ncdf4::nc_open(ncfile) # time stamp @@ -80,10 +86,12 @@ extract.nc.ERA5 <- if (is.null(vars)) vars <- names(nc_data$var) # for the variables extract the data - all.data.point <- vars %>% + set_names(vars) %>% purrr::map_dfc(function(vname) { - PEcAn.logger::logger.info(paste0(" \t ",vname, "is being extracted ! ")) + if (verbose) { + PEcAn.logger::logger.info(paste0(" \t ",vname, "is being extracted ! ")) + } brick.tmp <- raster::brick(ncfile, varname = vname, level = ens) @@ -91,23 +99,23 @@ extract.nc.ERA5 <- raster::extract(brick.tmp, sp::SpatialPoints(cbind(slon, slat)), method = 'simple') - - if (!is.numeric(nn)) { - PEcAn.logger::logger.severe(paste0( - "Expected raster object to be numeric, but it has type `", - paste0(typeof(nn), collapse = " "), - "`" - )) + if (verbose) { + if (!is.numeric(nn)) { + PEcAn.logger::logger.severe(paste0( + "Expected raster object to be numeric, but it has type `", + paste0(typeof(nn), collapse = " "), + "`" + )) + } } - # replacing the missing/filled values with NA nn[nn == nc_data$var[[vname]]$missval] <- NA # send out the extracted var as a new col t(nn) - }) %>% - `colnames<-`(vars) + }) + #close the connection # send out as xts object @@ -126,28 +134,29 @@ extract.nc.ERA5 <- # This now changes the order to ens - year point.data <- ensemblesN %>% purrr::map(function(Ensn) { + rbind.xts <- do.call("::", list("xts", "rbind.xts")) one.year.out %>% purrr::map( ~ .x [[Ensn]]) %>% do.call("rbind.xts", .) }) - - -# Calling the met2CF inside extract bc in met process met2CF comes before extract ! - out <-met2CF.ERA5( - slat, - slon, - start_date, - end_date, - sitename=newsite, - outfolder, - point.data, - overwrite = FALSE, - verbose = TRUE - ) + + + # Calling the met2CF inside extract bc in met process met2CF comes before extract ! + out <-met2CF.ERA5( + slat, + slon, + start_date, + end_date, + sitename=newsite, + outfolder, + point.data, + overwrite = FALSE, + verbose = verbose + ) return(out) - }, error = function(e) { + }, error = function(e) { PEcAn.logger::logger.severe(paste0(conditionMessage(e))) - }) + }) - } + } \ No newline at end of file diff --git a/modules/data.atmosphere/man/extract.nc.ERA5.Rd b/modules/data.atmosphere/man/extract.nc.ERA5.Rd index 8b82e4490ba..cf91b8ab287 100644 --- a/modules/data.atmosphere/man/extract.nc.ERA5.Rd +++ b/modules/data.atmosphere/man/extract.nc.ERA5.Rd @@ -15,6 +15,7 @@ extract.nc.ERA5( newsite, vars = NULL, overwrite = FALSE, + verbose = FALSE, ... ) } From 936922319933c9a31e4a2ae9c5f8dfcad3063a98 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 29 Nov 2023 12:24:30 -0500 Subject: [PATCH 08/48] Improve agb prep function. --- modules/data.remote/R/Landtrendr_AGB_prep.R | 55 ++++++++++++++------- 1 file changed, 37 insertions(+), 18 deletions(-) diff --git a/modules/data.remote/R/Landtrendr_AGB_prep.R b/modules/data.remote/R/Landtrendr_AGB_prep.R index 8fabd4cd370..7dc18afabbd 100644 --- a/modules/data.remote/R/Landtrendr_AGB_prep.R +++ b/modules/data.remote/R/Landtrendr_AGB_prep.R @@ -18,8 +18,15 @@ #' @author Dongchen Zhang #' @importFrom magrittr %>% Landtrendr_AGB_prep <- function(site_info, start_date, end_date, time_points, - AGB_indir, outdir = NULL, export_csv = TRUE, - allow_download = FALSE, buffer = NULL, skip_buffer = TRUE){ + AGB_indir, outdir = NULL, export_csv = TRUE, + allow_download = FALSE, buffer = NULL, skip_buffer = TRUE){ + #Initialize the multicore computation. + if (future::supportsMulticore()) { + future::plan(future::multicore) + } else { + future::plan(future::multisession) + } + #if we export CSV but didn't provide any path if(as.logical(export_csv) && is.null(outdir)){ PEcAn.logger::logger.info("If you want to export CSV file, please ensure input the outdir!") @@ -79,21 +86,33 @@ Landtrendr_AGB_prep <- function(site_info, start_date, end_date, time_points, #if we have any site missing previously if(length(new_site_info$site_id) != 0){ if(is.null(buffer) | as.logical(skip_buffer)){ + #prepare lists for future::map parallelization. + l <- vector("list", length = length(new_site_info$site_id)) + for (i in seq_along(l)) { + l[[i]] <- list(site_info = list(site_id = new_site_info$site_id[i], + lat = new_site_info$lat[i], + lon = new_site_info$lon[i], + site_name = NA), + data_dir = AGB_indir, + product_dates = lubridate::year(time_points), + time_points = time_points) + } #extracting AGB data - med_agb_data <- PEcAn.data.remote::extract.LandTrendr.AGB(site_info = new_site_info, - dataset = "median", - fun = "mean", - data_dir = AGB_indir, - product_dates = lubridate::year(time_points))[[1]] %>% dplyr::select(-2) %>% - `colnames<-`(c("site_id", paste0(time_points, "_AbvGrndWood"))) - sdev_agb_data <- PEcAn.data.remote::extract.LandTrendr.AGB(site_info = new_site_info, - dataset = "stdv", - fun = "mean", - data_dir = AGB_indir, - product_dates = lubridate::year(time_points))[[1]] %>% dplyr::select(-c(1:2)) %>% - `colnames<-`(c(paste0(time_points, "_SD"))) - #Handle data - AGB_Output <- cbind(med_agb_data, sdev_agb_data) + AGB_Output <- l %>% furrr::future_map(function(ll) { + med_agb_data <- PEcAn.data.remote::extract.LandTrendr.AGB(site_info = ll$site_info, + dataset = "median", + fun = "mean", + data_dir = ll$data_dir, + product_dates = ll$product_dates)[[1]] %>% dplyr::select(-2) %>% + `colnames<-`(c("site_id", paste0(ll$time_points, "_AbvGrndWood"))) + sdev_agb_data <- PEcAn.data.remote::extract.LandTrendr.AGB(site_info = ll$site_info, + dataset = "stdv", + fun = "mean", + data_dir = ll$data_dir, + product_dates = ll$product_dates)[[1]] %>% dplyr::select(-c(1:2)) %>% + `colnames<-`(c(paste0(ll$time_points, "_SD"))) + cbind(med_agb_data, sdev_agb_data) + }, .progress = T) %>% bind_rows() }else{#buffer is not empty #extracting AGB data med <- PEcAn.data.remote::extract.LandTrendr.AGB(site_info = new_site_info, @@ -141,7 +160,7 @@ Landtrendr_AGB_prep <- function(site_info, start_date, end_date, time_points, Current_CSV <- rbind(Current_CSV, tibble::tibble(date, site_id, lat, lon, agb, sd))#in date, id, lat, lon, agb, sd } } - + #Compare with existing CSV file. (We name the CSV file as AGB.csv) if(export_csv){ if(exists("Previous_CSV")){#we already read the csv file previously. @@ -169,4 +188,4 @@ Landtrendr_AGB_prep <- function(site_info, start_date, end_date, time_points, } PEcAn.logger::logger.info("Landtrendr AGB Prep Completed!") list(AGB_Output = AGB_Output, time_points = time_points, var = "AbvGrndWood") -} +} \ No newline at end of file From 860f8fa5ff7d2487c4ee5a6b218bd7aaf752ac5b Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 29 Nov 2023 14:48:51 -0500 Subject: [PATCH 09/48] Add the rabbitmq configuration in the job file. --- models/sipnet/R/write.configs.SIPNET.R | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/models/sipnet/R/write.configs.SIPNET.R b/models/sipnet/R/write.configs.SIPNET.R index f26cb01cad7..e2059b5c259 100755 --- a/models/sipnet/R/write.configs.SIPNET.R +++ b/models/sipnet/R/write.configs.SIPNET.R @@ -61,7 +61,6 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs cdosetup <- paste(cdosetup, sep = "\n", paste(settings$host$cdosetup, collapse = "\n")) } - hostteardown <- "" if (!is.null(settings$model$postrun)) { hostteardown <- paste(hostteardown, sep = "\n", paste(settings$model$postrun, collapse = "\n")) @@ -69,6 +68,18 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs if (!is.null(settings$host$postrun)) { hostteardown <- paste(hostteardown, sep = "\n", paste(settings$host$postrun, collapse = "\n")) } + + # create rabbitmq specific setup. + cpcmd <- rmoutdircmd <- rmrundircmd <- "" + if (!is.null(settings$host$rabbitmq)) { + occmd <- gsub("@APPNAME@", settings$host$rabbitmq$appname, settings$host$rabbitmq$occmd) + #rsync cmd from remote to local host. + cpcmd <- paste(cpcmd, settings$host$rabbitmq$oc, "rsync", outdir, paste0("$(", settings$host$rabbitmq$oc, " ", occmd, "):", outdir)) + #delete files within rundir and outdir. + rmoutdircmd <- paste(rmoutdircmd, "rm", file.path(outdir, "*")) + rmrundircmd <- paste(rmrundircmd, "rm", file.path(rundir, "*")) + } + # create job.sh jobsh <- gsub("@HOST_SETUP@", hostsetup, jobsh) jobsh <- gsub("@CDO_SETUP@", cdosetup, jobsh) @@ -87,6 +98,10 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs jobsh <- gsub("@BINARY@", settings$model$binary, jobsh) jobsh <- gsub("@REVISION@", settings$model$revision, jobsh) + gsub("@CPCMD@", cpcmd, jobsh) + gsub("@RMOUTDIRCMD@", rmoutdircmd, jobsh) + gsub("@RMRUNDIRCMD@", rmrundircmd, jobsh) + if(is.null(settings$state.data.assimilation$NC.Prefix)){ settings$state.data.assimilation$NC.Prefix <- "sipnet.out" } From 7709e35ce8e2cce12d85c0c4e22aa881298e8159 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 29 Nov 2023 14:50:30 -0500 Subject: [PATCH 10/48] Add cmd for rabbitmq execution. --- models/template/inst/template_geo.job | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/models/template/inst/template_geo.job b/models/template/inst/template_geo.job index 026e7e29db9..b60222795e7 100644 --- a/models/template/inst/template_geo.job +++ b/models/template/inst/template_geo.job @@ -42,5 +42,12 @@ cp "@RUNDIR@/README.txt" "@OUTDIR@/README.txt" # host specific teardown @HOST_TEARDOWN@ +#copy files back. +@CPCMD@ + +#delete files in the run and out folder. +@RMRUNDIRCMD@ +@RMOUTDIRCMD@ + # all done -echo -e "MODEL FINISHED\nLogfile is located at '@OUTDIR@/logfile.txt'" >&3 +echo -e "MODEL FINISHED\nLogfile is located at '@OUTDIR@/logfile.txt'" >&3 \ No newline at end of file From f59166c79bb21fb8725498a6d1c3d00d24aa2cc6 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 29 Nov 2023 14:51:31 -0500 Subject: [PATCH 11/48] Add cmd to rsync the run folders from the local to the remote host before sending messages and after writing down the job files. --- modules/assim.sequential/R/sda.enkf_MultiSite.R | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index 569aea85880..6ae8c5f4ce6 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -425,6 +425,13 @@ sda.enkf.multisite <- function(settings, }) %>% stats::setNames(site.ids) + #if it's a rabbitmq job sumbmission, we will first copy and paste the whole run folder within the SDA to the remote host. + if (!is.null(settings$host$rabbitmq)) { + cmd <- paste0("oc rsync", settings$host$rundir, "$(", "oc" ) + sipnet.label <- "oc get pod -l app.kubernetes.io/name=pecan-model-sipnet-136 -o name):" + try(system(paste0("oc rsync ", settings$host$rundir, " $(", sipnet.label, settings$host$rundir), intern = TRUE)) + } + #I'm rewriting the runs because when I use the parallel approach for writing configs the run.txt will get messed up; because multiple cores want to write on it at the same time. runs.tmp <- list.dirs(rundir, full.names = F) runs.tmp <- runs.tmp[grepl("ENS-*|[0-9]", runs.tmp)] From 67f9b14c53f1d1b84b2cc19e6b5ec71518ab722b Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 29 Nov 2023 14:51:44 -0500 Subject: [PATCH 12/48] Updates. --- .../MultiSite-Exs/SDA/Create_Multi_settings.R | 84 ++++++++++--------- 1 file changed, 46 insertions(+), 38 deletions(-) diff --git a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R index eda487b4cf1..42c5ddacf32 100644 --- a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R +++ b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R @@ -47,6 +47,49 @@ obs_end_date <- "2021-07-15" obs_outdir <- "/projectnb/dietzelab/dongchen/All_NEON_SDA/test_OBS" timestep <- list(unit="year", num=1) +#specify model binary +model_binary <- "/usr2/postdoc/istfer/SIPNET/trunk//sipnet_if" + +#specify host section +host.flag <- "rabbitmq" +if (host.flag == "remote") { + #if we submit jobs through tunnel remotely. + host = structure(list( + name = "geo.bu.edu", + usr = "zhangdc", + folder = SDA_out_dir, + prerun = "module load R/4.1.2", + cdosetup = "module load cdo/2.0.6", + qsub = "qsub -l h_rt=24:00:00 -q 'geo*' -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash", + qsub.jobid = "Your job ([0-9]+) .*", + qstat = "qstat -j @JOBID@ || echo DONE", + tunnel = "~/Tunnel/Tunnel", + outdir = SDA_out_dir, + rundir = SDA_run_dir + )) +} else if (host.flag == "local") { + host = structure(list( + name = "localhost", + folder = SDA_out_dir, + outdir = SDA_out_dir, + rundir = SDA_run_dir + )) +} else if (host.flag == "rabbitmq") { + host = structure(list( + name = "localhost", + rabbitmq = structure(list( + uri = "amqp://guest:guest@pecan-rabbitmq:15672/%2F", + queue = "SIPNET_r136", + oc = "/data/bin/oc", + occmd = "get pod -l app=@APPNAME@ -o name", + appname = "dongchen-sda" + )), + folder = SDA_out_dir, + outdir = SDA_out_dir, + rundir = SDA_run_dir + )) + model_binary <- "/usr/local/bin/sipnet.r136" +} #Start building template template <- PEcAn.settings::Settings(list( ############################################################################ @@ -127,7 +170,7 @@ template <- PEcAn.settings::Settings(list( ########################################################################### database = structure(list( bety = structure( - list(user = "bety", password = "bety", host = "128.197.168.114", + list(user = "bety", password = "bety", host = "10.241.76.27", dbname = "bety", driver = "PostgreSQL", write = "FALSE" )) )), @@ -197,7 +240,7 @@ template <- PEcAn.settings::Settings(list( type = "SIPNET", revision = "ssr", delete.raw = FALSE, - binary = "/usr2/postdoc/istfer/SIPNET/trunk//sipnet_if", + binary = model_binary, jobtemplate = "~/sipnet_geo.job" )), @@ -209,19 +252,7 @@ template <- PEcAn.settings::Settings(list( ########################################################################### ########################################################################### #be carefull of the host section, you need to specify the host of your own!!! - host = structure(list( - name = "geo.bu.edu", - usr = "zhangdc", - folder = "/projectnb/dietzelab/dongchen/All_NEON_SDA/NEON42/SDA/out", - prerun = "module load R/4.1.2", - cdosetup = "module load cdo/2.0.6", - qsub = "qsub -l h_rt=24:00:00 -q 'geo*' -N @NAME@ -o @STDOUT@ -e @STDERR@ -S /bin/bash", - qsub.jobid = "Your job ([0-9]+) .*", - qstat = "qstat -j @JOBID@ || echo DONE", - tunnel = "~/Tunnel/Tunnel", - outdir = "/projectnb/dietzelab/dongchen/All_NEON_SDA/NEON42/SDA/out", - rundir = "/projectnb/dietzelab/dongchen/All_NEON_SDA/NEON42/SDA/run" - )), + host = host, ############################################################################ ############################################################################ @@ -292,29 +323,6 @@ writeChar(tmp, XML_out_dir) settings <- PEcAn.settings::read.settings(XML_out_dir) -#iteratively grab ERA5 paths for each site -for (i in 1:nSite) { - temp_ERA5_path <- settings[[i]]$run$inputs$met$path - temp_site_id <- settings[[i]]$run$site$id - temp_full_paths <- list.files(path=paste0(temp_ERA5_path, temp_site_id), pattern = '*.clim', full.names = T) - - #need a better way to code it up - #test works!!!! - #populated IC file paths into settings - Create_mult_list <- function(list.names, paths){ - out <- as.list(paths) - names(out) <- list.names - out - } - settings[[i]]$run$inputs$met$path <- Create_mult_list(rep("path", length(temp_full_paths)), temp_full_paths) - - #code on met_start and met_end - settings[[i]]$run$site$met.start <- start_date - settings[[i]]$run$site$met.end <- end_date - settings[[i]]$run$start.date <- start_date - settings[[i]]$run$end.date <- end_date -} - #add Lat and Lon to each site #grab Site IDs from settings site_ID <- c() From bd9ebf9f9105720c27c2c35844d78deeffac35d9 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 30 Nov 2023 12:05:04 -0500 Subject: [PATCH 13/48] Update naming for the OC command. --- models/sipnet/R/write.configs.SIPNET.R | 8 ++++---- modules/assim.sequential/R/sda.enkf_MultiSite.R | 5 ++--- .../inst/MultiSite-Exs/SDA/Create_Multi_settings.R | 11 +++++------ 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/models/sipnet/R/write.configs.SIPNET.R b/models/sipnet/R/write.configs.SIPNET.R index e2059b5c259..02f33ca8c2e 100755 --- a/models/sipnet/R/write.configs.SIPNET.R +++ b/models/sipnet/R/write.configs.SIPNET.R @@ -72,12 +72,12 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs # create rabbitmq specific setup. cpcmd <- rmoutdircmd <- rmrundircmd <- "" if (!is.null(settings$host$rabbitmq)) { - occmd <- gsub("@APPNAME@", settings$host$rabbitmq$appname, settings$host$rabbitmq$occmd) #rsync cmd from remote to local host. - cpcmd <- paste(cpcmd, settings$host$rabbitmq$oc, "rsync", outdir, paste0("$(", settings$host$rabbitmq$oc, " ", occmd, "):", outdir)) + cpcmd <- gsub("@OUTDIR@", outdir, settings$host$rabbitmq$cpfcmd) + #delete files within rundir and outdir. - rmoutdircmd <- paste(rmoutdircmd, "rm", file.path(outdir, "*")) - rmrundircmd <- paste(rmrundircmd, "rm", file.path(rundir, "*")) + rmoutdircmd <- paste("rm", file.path(outdir, "*")) + rmrundircmd <- paste("rm", file.path(rundir, "*")) } # create job.sh diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index 6ae8c5f4ce6..3288f32fc5d 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -427,9 +427,8 @@ sda.enkf.multisite <- function(settings, #if it's a rabbitmq job sumbmission, we will first copy and paste the whole run folder within the SDA to the remote host. if (!is.null(settings$host$rabbitmq)) { - cmd <- paste0("oc rsync", settings$host$rundir, "$(", "oc" ) - sipnet.label <- "oc get pod -l app.kubernetes.io/name=pecan-model-sipnet-136 -o name):" - try(system(paste0("oc rsync ", settings$host$rundir, " $(", sipnet.label, settings$host$rundir), intern = TRUE)) + cp2cmd <- gsub("@RUNDIR@", settings$host$rundir, settings$host$rabbitmq$cp2cmd) + try(system(cp2cmd, intern = TRUE)) } #I'm rewriting the runs because when I use the parallel approach for writing configs the run.txt will get messed up; because multiple cores want to write on it at the same time. diff --git a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R index 42c5ddacf32..991d423686f 100644 --- a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R +++ b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R @@ -9,9 +9,9 @@ start_date <- "2012/01/01" end_date <- "2021/12/31" #setup working space -outdir <- "/projectnb/dietzelab/dongchen/All_NEON_SDA/NEON42/SDA" -SDA_run_dir <- "/projectnb/dietzelab/dongchen/All_NEON_SDA/NEON42/SDA/run" -SDA_out_dir <- "/projectnb/dietzelab/dongchen/All_NEON_SDA/NEON42/SDA/out" +outdir <- "/projectnb/dietzelab/dongchen/All_NEON_SDA/NEON42/SDA/" +SDA_run_dir <- "/projectnb/dietzelab/dongchen/All_NEON_SDA/NEON42/SDA/run/" +SDA_out_dir <- "/projectnb/dietzelab/dongchen/All_NEON_SDA/NEON42/SDA/out/" ERA5_dir <- "/projectnb/dietzelab/dongchen/All_NEON_SDA/NEON42/ERA5_2012_2021/" XML_out_dir <- "/projectnb/dietzelab/dongchen/All_NEON_SDA/NEON42/SDA/pecan.xml" @@ -80,9 +80,8 @@ if (host.flag == "remote") { rabbitmq = structure(list( uri = "amqp://guest:guest@pecan-rabbitmq:15672/%2F", queue = "SIPNET_r136", - oc = "/data/bin/oc", - occmd = "get pod -l app=@APPNAME@ -o name", - appname = "dongchen-sda" + cp2cmd = "oc rsync @RUNDIR@ $(oc get pod -l app.kubernetes.io/name=pecan-model-sipnet-136 -o name):@RUNDIR@", + cpfcmd = "/data/bin/oc rsync @OUTDIR@ $(/data/bin/oc get pod -l app=dongchen-sda -o name):@OUTDIR@" )), folder = SDA_out_dir, outdir = SDA_out_dir, From 8a599f3cb13a839373752f0480b587d8eb6c9e7e Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 30 Nov 2023 12:08:30 -0500 Subject: [PATCH 14/48] Update documentation. --- base/remote/R/qsub_parallel.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/base/remote/R/qsub_parallel.R b/base/remote/R/qsub_parallel.R index 15a661ce88b..14d765c3360 100644 --- a/base/remote/R/qsub_parallel.R +++ b/base/remote/R/qsub_parallel.R @@ -4,7 +4,7 @@ #' @param files allow submit jobs based on job.sh file paths. #' @param prefix used for detecting if jobs are completed or not. #' @param sleep time (in second) that we wait each time for the jobs to be completed. -#' @param hybrid Decide if we want to detect the job completion by both files and job ids on the server or just by the job ids on the server.s +#' @param hybrid A Boolean argument decide the way of detecting job completion. If it's TRUE then we will detect both the outputted files and job ids on the server. If it's FALSE then we will only detect the job ids on the server. #' @export #' @examples #' \dontrun{ From 8bb9b0217a48edecbb95f545f0476609a734d8bf Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 30 Nov 2023 12:15:04 -0500 Subject: [PATCH 15/48] Update dependencies and namespace. --- modules/data.atmosphere/DESCRIPTION | 3 +++ modules/data.atmosphere/R/extract_ERA5.R | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/data.atmosphere/DESCRIPTION b/modules/data.atmosphere/DESCRIPTION index 097a134349e..e1a4dd75f41 100644 --- a/modules/data.atmosphere/DESCRIPTION +++ b/modules/data.atmosphere/DESCRIPTION @@ -66,7 +66,10 @@ Imports: Suggests: doParallel, foreach, + furrr, + future, parallel, + PEcAn.settings, progress, reticulate Remotes: diff --git a/modules/data.atmosphere/R/extract_ERA5.R b/modules/data.atmosphere/R/extract_ERA5.R index 7b093c49695..e6e956aa1a0 100644 --- a/modules/data.atmosphere/R/extract_ERA5.R +++ b/modules/data.atmosphere/R/extract_ERA5.R @@ -87,7 +87,7 @@ extract.nc.ERA5 <- vars <- names(nc_data$var) # for the variables extract the data all.data.point <- vars %>% - set_names(vars) %>% + purrr::set_names(vars) %>% purrr::map_dfc(function(vname) { if (verbose) { PEcAn.logger::logger.info(paste0(" \t ",vname, "is being extracted ! ")) From 0a2b30242a64849d55dcfeb93228fffbb163c87a Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 30 Nov 2023 12:21:30 -0500 Subject: [PATCH 16/48] Update files. --- Makefile.depends | 2 +- base/remote/man/qsub_parallel.Rd | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile.depends b/Makefile.depends index 652db3c287a..05570adf085 100644 --- a/Makefile.depends +++ b/Makefile.depends @@ -12,7 +12,7 @@ $(call depends,modules/allometry): | .install/base/db $(call depends,modules/assim.batch): | .install/modules/benchmark .install/base/db .install/modules/emulator .install/base/logger .install/modules/meta.analysis .install/base/remote .install/base/settings .install/modules/uncertainty .install/base/utils .install/base/workflow $(call depends,modules/assim.sequential): | .install/base/db .install/base/logger .install/base/remote .install/base/settings .install/modules/uncertainty .install/base/workflow .install/modules/benchmark .install/modules/data.remote $(call depends,modules/benchmark): | .install/base/db .install/base/logger .install/base/remote .install/base/settings .install/base/utils .install/modules/data.land -$(call depends,modules/data.atmosphere): | .install/base/db .install/base/logger .install/base/remote .install/base/utils +$(call depends,modules/data.atmosphere): | .install/base/db .install/base/logger .install/base/remote .install/base/utils .install/base/settings $(call depends,modules/data.hydrology): | .install/base/logger .install/base/utils $(call depends,modules/data.land): | .install/modules/benchmark .install/modules/data.atmosphere .install/base/db .install/base/logger .install/base/remote .install/base/utils .install/base/visualization .install/base/settings $(call depends,modules/data.remote): | .install/base/db .install/base/utils .install/base/logger .install/base/remote diff --git a/base/remote/man/qsub_parallel.Rd b/base/remote/man/qsub_parallel.Rd index d608ad30771..274104b8139 100644 --- a/base/remote/man/qsub_parallel.Rd +++ b/base/remote/man/qsub_parallel.Rd @@ -21,7 +21,7 @@ qsub_parallel( \item{sleep}{time (in second) that we wait each time for the jobs to be completed.} -\item{hybrid}{Decide if we want to detect the job completion by both files and job ids on the server or just by the job ids on the server.s} +\item{hybrid}{A Boolean argument decide the way of detecting job completion. If it's TRUE then we will detect both the outputted files and job ids on the server. If it's FALSE then we will only detect the job ids on the server.} } \description{ qsub_parallel From 5488a49d974d5fb486a497f9b2e25d108cb95cac Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 6 Dec 2023 12:24:03 -0500 Subject: [PATCH 17/48] bug fixes for write config sipnet function. --- models/sipnet/R/write.configs.SIPNET.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/models/sipnet/R/write.configs.SIPNET.R b/models/sipnet/R/write.configs.SIPNET.R index 02f33ca8c2e..8acd27986e8 100755 --- a/models/sipnet/R/write.configs.SIPNET.R +++ b/models/sipnet/R/write.configs.SIPNET.R @@ -98,9 +98,9 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs jobsh <- gsub("@BINARY@", settings$model$binary, jobsh) jobsh <- gsub("@REVISION@", settings$model$revision, jobsh) - gsub("@CPCMD@", cpcmd, jobsh) - gsub("@RMOUTDIRCMD@", rmoutdircmd, jobsh) - gsub("@RMRUNDIRCMD@", rmrundircmd, jobsh) + jobsh <- gsub("@CPCMD@", cpcmd, jobsh) + jobsh <- gsub("@RMOUTDIRCMD@", rmoutdircmd, jobsh) + jobsh <- gsub("@RMRUNDIRCMD@", rmrundircmd, jobsh) if(is.null(settings$state.data.assimilation$NC.Prefix)){ settings$state.data.assimilation$NC.Prefix <- "sipnet.out" From af80377e4f49acbcf5df73024ef3c90b19c7fd86 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 7 Dec 2023 13:54:12 -0500 Subject: [PATCH 18/48] Fix path issue. --- models/sipnet/R/write.configs.SIPNET.R | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/models/sipnet/R/write.configs.SIPNET.R b/models/sipnet/R/write.configs.SIPNET.R index 8acd27986e8..3fd4684dfcc 100755 --- a/models/sipnet/R/write.configs.SIPNET.R +++ b/models/sipnet/R/write.configs.SIPNET.R @@ -70,10 +70,14 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs } # create rabbitmq specific setup. - cpcmd <- rmoutdircmd <- rmrundircmd <- "" + cpruncmd <- cpoutcmd <- rmoutdircmd <- rmrundircmd <- "" if (!is.null(settings$host$rabbitmq)) { #rsync cmd from remote to local host. - cpcmd <- gsub("@OUTDIR@", outdir, settings$host$rabbitmq$cpfcmd) + cpruncmd <- gsub("@OUTDIR@", settings$host$rundir, settings$host$rabbitmq$cpfcmd) + cpruncmd <- gsub("@OUTFOLDER@", rundir, cpruncmd) + + cpoutcmd <- gsub("@OUTDIR@", settings$host$outdir, settings$host$rabbitmq$cpfcmd) + cpoutcmd <- gsub("@OUTFOLDER@", outdir, cpoutcmd) #delete files within rundir and outdir. rmoutdircmd <- paste("rm", file.path(outdir, "*")) @@ -98,7 +102,8 @@ write.config.SIPNET <- function(defaults, trait.values, settings, run.id, inputs jobsh <- gsub("@BINARY@", settings$model$binary, jobsh) jobsh <- gsub("@REVISION@", settings$model$revision, jobsh) - jobsh <- gsub("@CPCMD@", cpcmd, jobsh) + jobsh <- gsub("@CPRUNCMD@", cpruncmd, jobsh) + jobsh <- gsub("@CPOUTCMD@", cpoutcmd, jobsh) jobsh <- gsub("@RMOUTDIRCMD@", rmoutdircmd, jobsh) jobsh <- gsub("@RMRUNDIRCMD@", rmrundircmd, jobsh) From 4dcad989f3dc10d5f89501c46ba6063ba5b26bf7 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 7 Dec 2023 13:54:25 -0500 Subject: [PATCH 19/48] Copy run and out folder back. --- models/template/inst/template_geo.job | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/models/template/inst/template_geo.job b/models/template/inst/template_geo.job index b60222795e7..19dcb7d8bdd 100644 --- a/models/template/inst/template_geo.job +++ b/models/template/inst/template_geo.job @@ -43,7 +43,8 @@ cp "@RUNDIR@/README.txt" "@OUTDIR@/README.txt" @HOST_TEARDOWN@ #copy files back. -@CPCMD@ +@CPRUNCMD@ +@CPOUTCMD@ #delete files in the run and out folder. @RMRUNDIRCMD@ From f23f64e7015868b1bc22f1d54ccb5dad454c1069 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 7 Dec 2023 13:54:42 -0500 Subject: [PATCH 20/48] Fix path issue. --- .../inst/MultiSite-Exs/SDA/Create_Multi_settings.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R index 991d423686f..d8aba751ea4 100644 --- a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R +++ b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R @@ -81,7 +81,7 @@ if (host.flag == "remote") { uri = "amqp://guest:guest@pecan-rabbitmq:15672/%2F", queue = "SIPNET_r136", cp2cmd = "oc rsync @RUNDIR@ $(oc get pod -l app.kubernetes.io/name=pecan-model-sipnet-136 -o name):@RUNDIR@", - cpfcmd = "/data/bin/oc rsync @OUTDIR@ $(/data/bin/oc get pod -l app=dongchen-sda -o name):@OUTDIR@" + cpfcmd = "/data/bin/oc rsync @OUTFOLDER@ $(/data/bin/oc get pod -l app=dongchen-sda -o name):@OUTDIR@" )), folder = SDA_out_dir, outdir = SDA_out_dir, From 81e98a6fdf656d96027632b56c5612e591d198d0 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 7 Dec 2023 14:11:48 -0500 Subject: [PATCH 21/48] Add job completion detection by file name. --- base/workflow/R/start_model_runs.R | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/base/workflow/R/start_model_runs.R b/base/workflow/R/start_model_runs.R index 6350dbd5934..1e324db58ee 100644 --- a/base/workflow/R/start_model_runs.R +++ b/base/workflow/R/start_model_runs.R @@ -119,7 +119,14 @@ start_model_runs <- function(settings, write = TRUE, stop.on.error = TRUE) { out <- PEcAn.remote::start_rabbitmq( folder, settings$host$rabbitmq$uri, settings$host$rabbitmq$queue) PEcAn.logger::logger.debug("JOB.SH submit status:", out) - jobids[run] <- folder + + #if we want to detect the job completion by nc files. + if (!is.null(settings$host$rabbitmq$prefix)) { + jobids[run] <- out + } else { + #if we want to detect the job completion by rabbitmq.out files. + jobids[run] <- folder + } } else if (is_modellauncher) { # set up launcher script if we use modellauncher @@ -291,8 +298,13 @@ start_model_runs <- function(settings, write = TRUE, stop.on.error = TRUE) { # check to see if job is done job_finished <- FALSE if (is_rabbitmq) { - job_finished <- - file.exists(file.path(jobids[run], "rabbitmq.out")) + if (!is.null(settings$host$rabbitmq$prefix)) { + job_finished <- + file.exists(file.path(jobids[run], settings$host$rabbitmq$prefix)) + } else { + job_finished <- + file.exists(file.path(jobids[run], "rabbitmq.out")) + } } else if (is_qsub) { job_finished <- PEcAn.remote::qsub_run_finished( run = jobids[run], From ac8dd904559050c01105b4e0ba4406e26528b077 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 7 Dec 2023 14:12:10 -0500 Subject: [PATCH 22/48] add prefix to the settings for the NC file name each year. --- modules/assim.sequential/R/sda.enkf_MultiSite.R | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index 3288f32fc5d..323f2ad88c8 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -427,6 +427,7 @@ sda.enkf.multisite <- function(settings, #if it's a rabbitmq job sumbmission, we will first copy and paste the whole run folder within the SDA to the remote host. if (!is.null(settings$host$rabbitmq)) { + settings$host$rabbitmq$prefix <- paste0(obs.year, ".nc") cp2cmd <- gsub("@RUNDIR@", settings$host$rundir, settings$host$rabbitmq$cp2cmd) try(system(cp2cmd, intern = TRUE)) } From ca435b64e9534df367d38158a9aa33a8e8cde805 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Thu, 7 Dec 2023 14:12:25 -0500 Subject: [PATCH 23/48] add prefix arg in the settings. --- .../inst/MultiSite-Exs/SDA/Create_Multi_settings.R | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R index d8aba751ea4..826abdf2e6f 100644 --- a/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R +++ b/modules/assim.sequential/inst/MultiSite-Exs/SDA/Create_Multi_settings.R @@ -78,6 +78,7 @@ if (host.flag == "remote") { host = structure(list( name = "localhost", rabbitmq = structure(list( + prefix = NULL, uri = "amqp://guest:guest@pecan-rabbitmq:15672/%2F", queue = "SIPNET_r136", cp2cmd = "oc rsync @RUNDIR@ $(oc get pod -l app.kubernetes.io/name=pecan-model-sipnet-136 -o name):@RUNDIR@", From 1341adae31c620e80d9281d14367cd272d0bbad4 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Sat, 9 Dec 2023 12:11:18 -0500 Subject: [PATCH 24/48] Update qsub_parallel to include rabbitmq submission. --- base/remote/R/qsub_parallel.R | 152 +++++++++++++++++++--------------- 1 file changed, 87 insertions(+), 65 deletions(-) diff --git a/base/remote/R/qsub_parallel.R b/base/remote/R/qsub_parallel.R index 14d765c3360..8dc304f7ad4 100644 --- a/base/remote/R/qsub_parallel.R +++ b/base/remote/R/qsub_parallel.R @@ -23,6 +23,8 @@ qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out", sleep = folder <- NULL run_list <- readLines(con = file.path(settings$rundir, "runs.txt")) is_local <- PEcAn.remote::is.localhost(settings$host) + is_qsub <- !is.null(settings$host$qsub) + is_rabbitmq <- !is.null(settings$host$rabbitmq) # loop through runs and either call start run, or launch job on remote machine # parallel submit jobs cores <- parallel::detectCores() @@ -35,25 +37,32 @@ qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out", sleep = PEcAn.logger::logger.info("Submitting jobs!") # if we want to submit jobs separately. if(is.null(files)){ - jobids <- foreach::foreach(run = run_list, .packages="Kendall", .options.snow=opts, settings = rep(settings, length(files))) %dopar% { - run_id_string <- format(run, scientific = FALSE) - qsub <- settings$host$qsub - qsub <- gsub("@NAME@", paste0("PEcAn-", run_id_string), qsub) - qsub <- gsub("@STDOUT@", file.path(settings$host$outdir, run_id_string, "stdout.log"), qsub) - qsub <- gsub("@STDERR@", file.path(settings$host$outdir, run_id_string, "stderr.log"), qsub) - qsub <- strsplit(qsub, " (?=([^\"']*\"[^\"']*\")*[^\"']*$)", perl = TRUE) - # start the actual model run - cmd <- qsub[[1]] - if(PEcAn.remote::is.localhost(settings$host)){ - out <- system2(cmd, file.path(settings$host$rundir, run_id_string, "job.sh"), stdout = TRUE, stderr = TRUE) - }else{ - out <- PEcAn.remote::remote.execute.cmd(settings$host, cmd, file.path(settings$host$rundir, run_id_string, "job.sh"), stderr = TRUE) + if (is_qsub) { + jobids <- foreach::foreach(run = run_list, .packages="Kendall", .options.snow=opts, settings = rep(settings, length(run_list))) %dopar% { + run_id_string <- format(run, scientific = FALSE) + qsub <- settings$host$qsub + qsub <- gsub("@NAME@", paste0("PEcAn-", run_id_string), qsub) + qsub <- gsub("@STDOUT@", file.path(settings$host$outdir, run_id_string, "stdout.log"), qsub) + qsub <- gsub("@STDERR@", file.path(settings$host$outdir, run_id_string, "stderr.log"), qsub) + qsub <- strsplit(qsub, " (?=([^\"']*\"[^\"']*\")*[^\"']*$)", perl = TRUE) + # start the actual model run + cmd <- qsub[[1]] + if(PEcAn.remote::is.localhost(settings$host)){ + out <- system2(cmd, file.path(settings$host$rundir, run_id_string, "job.sh"), stdout = TRUE, stderr = TRUE) + }else{ + out <- PEcAn.remote::remote.execute.cmd(settings$host, cmd, file.path(settings$host$rundir, run_id_string, "job.sh"), stderr = TRUE) + } + jobid <- PEcAn.remote::qsub_get_jobid( + out = out[length(out)], + qsub.jobid = settings$host$qsub.jobid, + stop.on.error = TRUE) + return(jobid) + } + } else if (is_rabbitmq) { + out <- foreach::foreach(run = run_list, .packages="Kendall", .options.snow=opts, settings = rep(settings, length(run_list))) %dopar% { + run_id_string <- format(run, scientific = FALSE) + PEcAn.remote::start_rabbitmq(file.path(settings$host$rundir, run_id_string), settings$host$rabbitmq$uri, settings$host$rabbitmq$queue) } - jobid <- PEcAn.remote::qsub_get_jobid( - out = out[length(out)], - qsub.jobid = settings$host$qsub.jobid, - stop.on.error = TRUE) - return(jobid) } }else{ # if we want to submit merged job files. @@ -92,16 +101,12 @@ qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out", sleep = ## setup progressbar folders <- file.path(settings$host$outdir, run_list) L_folder <- length(folders) - L_jobid <- length(jobids) - pb <- utils::txtProgressBar(min = 0, max = L_folder, style = 3) - pb1 <- utils::txtProgressBar(min = 0, max = L_jobid, style = 3) - pbi <- pbi1 <- 0 + pbi <- 0 #here we not only detect if the target files are generated. #we also detect if the jobs are still existed on the server. - if (hybrid) { - while ((L_folder - length(folders)) < L_folder & - (L_jobid - length(jobids)) < L_jobid) { + if (is_rabbitmq) { + while ((L_folder - length(folders)) < L_folder) { Sys.sleep(sleep) completed_folders <- foreach::foreach(folder = folders) %dopar% { if(file.exists(file.path(folder, prefix))){ @@ -109,52 +114,69 @@ qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out", sleep = } } %>% unlist() folders <- folders[which(!folders %in% completed_folders)] - - #or we can try detect if the jobs are still on the server. - #specify the host and qstat arguments for the future_map function. - host <- settings$host - qstat <- host$qstat - completed_jobs <- jobids %>% furrr::future_map(function(id) { - if (PEcAn.remote::qsub_run_finished( - run = id, - host = host, - qstat = qstat)) { - return(id) - } - }) %>% unlist() - jobids <- jobids[which(!jobids %in% completed_jobs)] - - #compare two progresses and set the maximum progress for the progress bar. pbi <- L_folder - length(folders) utils::setTxtProgressBar(pb, pbi) - - pbi1 <- L_jobid - length(jobids) - utils::setTxtProgressBar(pb1, pbi1) } } else { - #special case that only detect the job ids on the server. - while ((L_jobid - length(jobids)) < L_jobid) { - #detect if the jobs are still on the server. - #specify the host and qstat arguments for the future_map function. - Sys.sleep(sleep) - host <- settings$host - qstat <- host$qstat - completed_jobs <- jobids %>% furrr::future_map(function(id) { - if (PEcAn.remote::qsub_run_finished( - run = id, - host = host, - qstat = qstat)) { - return(id) - } - }) %>% unlist() - jobids <- jobids[which(!jobids %in% completed_jobs)] - - #compare two progresses and set the maximum progress for the progress bar. - pbi1 <- L_jobid - length(jobids) - utils::setTxtProgressBar(pb1, pbi1) + L_jobid <- length(jobids) + pb1 <- utils::txtProgressBar(min = 0, max = L_jobid, style = 3) + pb1 <- 0 + if (hybrid) { + while ((L_folder - length(folders)) < L_folder & + (L_jobid - length(jobids)) < L_jobid) { + Sys.sleep(sleep) + completed_folders <- foreach::foreach(folder = folders) %dopar% { + if(file.exists(file.path(folder, prefix))){ + return(folder) + } + } %>% unlist() + folders <- folders[which(!folders %in% completed_folders)] + + #or we can try detect if the jobs are still on the server. + #specify the host and qstat arguments for the future_map function. + host <- settings$host + qstat <- host$qstat + completed_jobs <- jobids %>% furrr::future_map(function(id) { + if (PEcAn.remote::qsub_run_finished( + run = id, + host = host, + qstat = qstat)) { + return(id) + } + }) %>% unlist() + jobids <- jobids[which(!jobids %in% completed_jobs)] + + #compare two progresses and set the maximum progress for the progress bar. + pbi <- L_folder - length(folders) + utils::setTxtProgressBar(pb, pbi) + + pbi1 <- L_jobid - length(jobids) + utils::setTxtProgressBar(pb1, pbi1) + } + } else { + #special case that only detect the job ids on the server. + while ((L_jobid - length(jobids)) < L_jobid) { + #detect if the jobs are still on the server. + #specify the host and qstat arguments for the future_map function. + Sys.sleep(sleep) + host <- settings$host + qstat <- host$qstat + completed_jobs <- jobids %>% furrr::future_map(function(id) { + if (PEcAn.remote::qsub_run_finished( + run = id, + host = host, + qstat = qstat)) { + return(id) + } + }) %>% unlist() + jobids <- jobids[which(!jobids %in% completed_jobs)] + + #compare two progresses and set the maximum progress for the progress bar. + pbi1 <- L_jobid - length(jobids) + utils::setTxtProgressBar(pb1, pbi1) + } } } - close(pb) parallel::stopCluster(cl) PEcAn.logger::logger.info("Completed!") From 739c02480a0d2f2229e8e4e8fe68330a08162104 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Sat, 9 Dec 2023 12:11:23 -0500 Subject: [PATCH 25/48] revert back. --- base/workflow/R/start_model_runs.R | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/base/workflow/R/start_model_runs.R b/base/workflow/R/start_model_runs.R index 1e324db58ee..37771d9bf2b 100644 --- a/base/workflow/R/start_model_runs.R +++ b/base/workflow/R/start_model_runs.R @@ -119,14 +119,7 @@ start_model_runs <- function(settings, write = TRUE, stop.on.error = TRUE) { out <- PEcAn.remote::start_rabbitmq( folder, settings$host$rabbitmq$uri, settings$host$rabbitmq$queue) PEcAn.logger::logger.debug("JOB.SH submit status:", out) - - #if we want to detect the job completion by nc files. - if (!is.null(settings$host$rabbitmq$prefix)) { - jobids[run] <- out - } else { - #if we want to detect the job completion by rabbitmq.out files. - jobids[run] <- folder - } + jobids[run] <- folder } else if (is_modellauncher) { # set up launcher script if we use modellauncher @@ -298,13 +291,8 @@ start_model_runs <- function(settings, write = TRUE, stop.on.error = TRUE) { # check to see if job is done job_finished <- FALSE if (is_rabbitmq) { - if (!is.null(settings$host$rabbitmq$prefix)) { - job_finished <- - file.exists(file.path(jobids[run], settings$host$rabbitmq$prefix)) - } else { - job_finished <- - file.exists(file.path(jobids[run], "rabbitmq.out")) - } + job_finished <- + file.exists(file.path(jobids[run], "rabbitmq.out")) } else if (is_qsub) { job_finished <- PEcAn.remote::qsub_run_finished( run = jobids[run], @@ -313,7 +301,7 @@ start_model_runs <- function(settings, write = TRUE, stop.on.error = TRUE) { } if (job_finished) { - + # TODO check output log if (is_rabbitmq) { data <- readLines(file.path(jobids[run], "rabbitmq.out")) @@ -384,4 +372,4 @@ runModule_start_model_runs <- function(settings, stop.on.error=TRUE) { PEcAn.logger::logger.severe( "runModule_start_model_runs only works with Settings or MultiSettings") } -} # runModule_start_model_runs +} # runModule_start_model_runs \ No newline at end of file From 2031e542085fab34f80cc151ee1b62353f305e45 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Sat, 9 Dec 2023 12:11:33 -0500 Subject: [PATCH 26/48] improve function. --- modules/assim.sequential/R/sda.enkf_MultiSite.R | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/modules/assim.sequential/R/sda.enkf_MultiSite.R b/modules/assim.sequential/R/sda.enkf_MultiSite.R index 323f2ad88c8..c46fe720f8c 100644 --- a/modules/assim.sequential/R/sda.enkf_MultiSite.R +++ b/modules/assim.sequential/R/sda.enkf_MultiSite.R @@ -439,7 +439,11 @@ sda.enkf.multisite <- function(settings, paste(file.path(rundir, 'runs.txt')) ## testing Sys.sleep(0.01) ## testing if(control$parallel_qsub){ - PEcAn.remote::qsub_parallel(settings, files=PEcAn.remote::merge_job_files(settings, control$jobs.per.file), prefix = paste0(obs.year, ".nc")) + if (is.null(control$jobs.per.file)) { + PEcAn.remote::qsub_parallel(settings, prefix = paste0(obs.year, ".nc")) + } else { + PEcAn.remote::qsub_parallel(settings, files=PEcAn.remote::merge_job_files(settings, control$jobs.per.file), prefix = paste0(obs.year, ".nc")) + } }else{ PEcAn.workflow::start_model_runs(settings, write=settings$database$bety$write) } From 8e0546e98f667f7e5784d90890c579d700f74656 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Mon, 22 Jan 2024 14:11:20 -0500 Subject: [PATCH 27/48] Debug for anchor sites data prep. --- .../assim.sequential/R/SDA_OBS_Assembler.R | 2 +- modules/data.atmosphere/R/ERA5_met_process.R | 38 +++++++------- modules/data.remote/R/Landtrendr_AGB_prep.R | 51 ++++++++++--------- 3 files changed, 48 insertions(+), 43 deletions(-) diff --git a/modules/assim.sequential/R/SDA_OBS_Assembler.R b/modules/assim.sequential/R/SDA_OBS_Assembler.R index a1d6f2bc109..f6d99b5d623 100644 --- a/modules/assim.sequential/R/SDA_OBS_Assembler.R +++ b/modules/assim.sequential/R/SDA_OBS_Assembler.R @@ -188,7 +188,7 @@ SDA_OBS_Assembler <- function(settings){ if (sum(is.na(obs.mean[[i]][[j]]))){ na_ind <- which(is.na(obs.mean[[i]][[j]])) obs.mean[[i]][[j]] <- obs.mean[[i]][[j]][-na_ind] - if(length(obs.mean[[i]][[j]]) == 1){ + if(length(new_diag(obs.cov[[i]][[j]])) == 1){ obs.cov[[i]][[j]] <- obs.cov[[i]][[j]][-na_ind] }else{ obs.cov[[i]][[j]] <- obs.cov[[i]][[j]][-na_ind, -na_ind] diff --git a/modules/data.atmosphere/R/ERA5_met_process.R b/modules/data.atmosphere/R/ERA5_met_process.R index 75cb39cdeef..a3255909a2c 100644 --- a/modules/data.atmosphere/R/ERA5_met_process.R +++ b/modules/data.atmosphere/R/ERA5_met_process.R @@ -29,7 +29,7 @@ ERA5_met_process <- function(settings, in.path, out.path, write.db=FALSE, write #conversion from string to number site.list$lat <- as.numeric(site.list$lat) site.list$lon <- as.numeric(site.list$lon) - list(site_id=site.list$id, lat=site.list$lat, lon=site.list$lon, site_name=site.list$name) + list(site.id=site.list$id, lat=site.list$lat, lon=site.list$lon, site_name=site.list$name) }) %>% dplyr::bind_rows() %>% as.list()))) { @@ -80,18 +80,14 @@ ERA5_met_process <- function(settings, in.path, out.path, write.db=FALSE, write Input_IDs <- list() } - #restructure the site_info. - new.site.info <- vector("list", length(settings)) - for (i in seq_along(new.site.info)) { - new.site.info[[i]] <- list(site.id = site_info$site_id[i], - lat = as.numeric(site_info$lat[i]), - lon = as.numeric(site_info$lon[i]), - start_date = settings$state.data.assimilation$start.date, - end_date = settings$state.data.assimilation$end.date, - out.path = out.path, - in.path = in.path, - model.type = settings$model$type) - } + #restructure the site_info into list. + site_info$start_date <- rep(settings$state.data.assimilation$start.date, length(settings)) + site_info$end_date <- rep(settings$state.data.assimilation$end.date, length(settings)) + site_info$out.path <- rep(out.path, length(settings)) + site_info$in.path <- rep(in.path, length(settings)) + site_info$model.type <- rep(settings$model$type, length(settings)) + new.site.info <- split(as.data.frame(site_info), seq(nrow(as.data.frame(site_info)))) + #Extract ERA5 for each site. PEcAn.logger::logger.info("Started extracting ERA5 data!\n") Clim_paths <- furrr::future_map(new.site.info, function(site){ @@ -150,12 +146,16 @@ ERA5_met_process <- function(settings, in.path, out.path, write.db=FALSE, write #write the paths into settings. if (write) { #write paths into settings. - settings$run <- furrr::future_map2(settings$run, Clim_paths, function(site.list, paths){ - met.list <- as.list(paths) - names(met.list) <- rep("path", length(paths)) - site.list$run$inputs$met <- met.list - site.list - }) + for (i in seq_along(settings)) { + #fill in dates related to met files. + settings[[i]]$run$site$met.start <- + settings[[i]]$run$start.date <- + settings[[i]]$state.data.assimilation$start.date + settings[[i]]$run$site$met.end <- + settings[[i]]$run$end.date <- + settings[[i]]$state.data.assimilation$end.date + settings[[i]]$run$inputs$met <- as.list(unlist(Clim_paths[[i]])) %>% set_names(rep("path", length(Clim_paths[[i]]))) + } #write settings into xml file. PEcAn.logger::logger.info(paste0("Write updated pecan.xml file into: ", file.path(settings$outdir, "pecan.xml"))) diff --git a/modules/data.remote/R/Landtrendr_AGB_prep.R b/modules/data.remote/R/Landtrendr_AGB_prep.R index 7dc18afabbd..a5839af34ca 100644 --- a/modules/data.remote/R/Landtrendr_AGB_prep.R +++ b/modules/data.remote/R/Landtrendr_AGB_prep.R @@ -80,39 +80,44 @@ Landtrendr_AGB_prep <- function(site_info, start_date, end_date, time_points, AGB_Output <- matrix(NA, length(site_info$site_id), 2*length(time_points)+1) %>% `colnames<-`(c("site_id", paste0(time_points, "_AbvGrndWood"), paste0(time_points, "_SD"))) %>% as.data.frame()#we need: site_id, AGB, std, target time point. AGB_Output$site_id <- site_info$site_id - AGB_Output_temp <- AGB_Output } + AGB_Output_temp <- AGB_Output + new_site_info <- site_info %>% purrr::map(function(x)x[!stats::complete.cases(AGB_Output)]) #if we have any site missing previously if(length(new_site_info$site_id) != 0){ if(is.null(buffer) | as.logical(skip_buffer)){ #prepare lists for future::map parallelization. - l <- vector("list", length = length(new_site_info$site_id)) - for (i in seq_along(l)) { - l[[i]] <- list(site_info = list(site_id = new_site_info$site_id[i], - lat = new_site_info$lat[i], - lon = new_site_info$lon[i], - site_name = NA), - data_dir = AGB_indir, - product_dates = lubridate::year(time_points), - time_points = time_points) - } + new_site_info$AGB_indir <- rep(AGB_indir, length(new_site_info$site_id)) + new_site_info$start_date <- rep(start_date, length(new_site_info$site_id)) + new_site_info$end_date <- rep(end_date, length(new_site_info$site_id)) + l <- split(as.data.frame(new_site_info), seq(nrow(as.data.frame(new_site_info)))) + #extracting AGB data AGB_Output <- l %>% furrr::future_map(function(ll) { - med_agb_data <- PEcAn.data.remote::extract.LandTrendr.AGB(site_info = ll$site_info, + time_points <- seq(as.Date(ll$start_date), as.Date(ll$end_date), "1 year") + #Landtrendr AGB doesn't provide data after 2017. + time_points <- time_points[which(lubridate::year(time_points) < 2018)] + product_dates <- lubridate::year(time_points) + site_info <- list(site_id = ll$site_id, + lat = ll$lat, + lon = ll$lon, + site_name = NA) + + med_agb_data <- PEcAn.data.remote::extract.LandTrendr.AGB(site_info = site_info, dataset = "median", fun = "mean", - data_dir = ll$data_dir, - product_dates = ll$product_dates)[[1]] %>% dplyr::select(-2) %>% - `colnames<-`(c("site_id", paste0(ll$time_points, "_AbvGrndWood"))) - sdev_agb_data <- PEcAn.data.remote::extract.LandTrendr.AGB(site_info = ll$site_info, + data_dir = ll$AGB_indir, + product_dates = product_dates)[[1]] %>% dplyr::select(-2) %>% + `colnames<-`(c("site_id", paste0(time_points, "_AbvGrndWood"))) + sdev_agb_data <- PEcAn.data.remote::extract.LandTrendr.AGB(site_info = site_info, dataset = "stdv", fun = "mean", - data_dir = ll$data_dir, - product_dates = ll$product_dates)[[1]] %>% dplyr::select(-c(1:2)) %>% - `colnames<-`(c(paste0(ll$time_points, "_SD"))) + data_dir = ll$AGB_indir, + product_dates = product_dates)[[1]] %>% dplyr::select(-c(1:2)) %>% + `colnames<-`(c(paste0(time_points, "_SD"))) cbind(med_agb_data, sdev_agb_data) - }, .progress = T) %>% bind_rows() + }, .progress = T) %>% dplyr::bind_rows() }else{#buffer is not empty #extracting AGB data med <- PEcAn.data.remote::extract.LandTrendr.AGB(site_info = new_site_info, @@ -149,7 +154,7 @@ Landtrendr_AGB_prep <- function(site_info, start_date, end_date, time_points, `colnames<-`(c("date", "site_id", "lat", "lon", "agb", "sd")) for (id in AGB_Output$site_id) { - site_AGB <- AGB_Output[which(AGB_Output$site_id==id),] + site_AGB <- unlist(AGB_Output[which(AGB_Output$site_id==id),]) for (i in seq_along(time_points)) { date <- lubridate::year(time_points[i]) site_id <- id @@ -181,8 +186,8 @@ Landtrendr_AGB_prep <- function(site_info, start_date, end_date, time_points, t <- time_points[i]#otherwise the t will be number instead of date. for (id in site_info$site_id) { site_AGB <- Current_CSV[which(Current_CSV$site_id == id),] - AGB_Output[which(AGB_Output$site_id==id), paste0(t, "_AbvGrndWood")] <- site_AGB[which(site_AGB$date == lubridate::year(t)), "agb"] - AGB_Output[which(AGB_Output$site_id==id), paste0(t, "_SD")] <- site_AGB[which(site_AGB$date == lubridate::year(t)), "sd"] + AGB_Output[which(AGB_Output$site_id==id), paste0(t, "_AbvGrndWood")] <- as.numeric(site_AGB[which(site_AGB$date == lubridate::year(t)), "agb"]) + AGB_Output[which(AGB_Output$site_id==id), paste0(t, "_SD")] <- as.numeric(site_AGB[which(site_AGB$date == lubridate::year(t)), "sd"]) } } } From af261e994e2e710a00c739994fa3aea28e83917e Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 2 Feb 2024 14:29:30 -0500 Subject: [PATCH 28/48] Added logger info to this function. --- modules/assim.sequential/R/SDA_OBS_Assembler.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/assim.sequential/R/SDA_OBS_Assembler.R b/modules/assim.sequential/R/SDA_OBS_Assembler.R index f6d99b5d623..9826ba14a64 100644 --- a/modules/assim.sequential/R/SDA_OBS_Assembler.R +++ b/modules/assim.sequential/R/SDA_OBS_Assembler.R @@ -21,6 +21,7 @@ SDA_OBS_Assembler <- function(settings){ #check if we want to proceed the free run without any observations. if (as.logical(settings$state.data.assimilation$free.run)) { + PEcAn.logger::logger.info("Create obs for free run!") #calculate time points. time_points <- obs_timestep2timepoint(Obs_Prep$start.date, Obs_Prep$end.date, Obs_Prep$timestep) @@ -62,6 +63,7 @@ SDA_OBS_Assembler <- function(settings){ if (names(Obs_Prep)[i] %in% c("timestep", "start.date", "end.date", "outdir")){ next }else{ + PEcAn.logger::logger.info(paste("Entering", names(Obs_Prep)[i])) fun_name <- names(Obs_Prep)[i] var_ind <- c(var_ind, i) } From 776f8c20d45c2dc590cc19df039f7593d9de4dad Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 2 Feb 2024 14:30:06 -0500 Subject: [PATCH 29/48] Optimized SoilGrids SOC prep functions. --- modules/data.land/R/Soilgrids_SoilC_prep.R | 4 +- .../data.land/R/soilgrids_soc_extraction.R | 103 ++++++++---------- 2 files changed, 47 insertions(+), 60 deletions(-) diff --git a/modules/data.land/R/Soilgrids_SoilC_prep.R b/modules/data.land/R/Soilgrids_SoilC_prep.R index 516a8842e90..c2d65ad0647 100644 --- a/modules/data.land/R/Soilgrids_SoilC_prep.R +++ b/modules/data.land/R/Soilgrids_SoilC_prep.R @@ -31,12 +31,14 @@ Soilgrids_SoilC_prep <- function(site_info, start_date, end_date, time_points, SoilC_Output <- matrix(NA, length(site_info$site_id), 2*length(time_points)+1) %>% `colnames<-`(c("site_id", paste0(time_points, "_TotSoilCarb"), paste0(time_points, "_SD"))) %>% as.data.frame()#we need: site_id, agb, sd, target time point. SoilC_Output$site_id <- site_info$site_id - #loop over time and site for (i in seq_along(time_points)) { t <- time_points[i] for (id in site_info$site_id) { site_SoilC <- Previous_CSV[which(Previous_CSV$Site_ID == id),] + if (dim(site_SoilC)[1] == 0) { + next + } SoilC_Output[which(SoilC_Output$site_id==id), paste0(t, "_TotSoilCarb")] <- site_SoilC$Total_soilC_0.200cm SoilC_Output[which(SoilC_Output$site_id==id), paste0(t, "_SD")] <- site_SoilC$Std_soilC_0.200cm } diff --git a/modules/data.land/R/soilgrids_soc_extraction.R b/modules/data.land/R/soilgrids_soc_extraction.R index 3e1bc5c55d0..2131832ca50 100644 --- a/modules/data.land/R/soilgrids_soc_extraction.R +++ b/modules/data.land/R/soilgrids_soc_extraction.R @@ -54,14 +54,18 @@ ##' @author Qianyu Li, Shawn P. Serbin ##' soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) { - - if (is.null(site_info)) { - PEcAn.logger::logger.error("No site information found. Please provide a BETY DB site list containing at least the site id and PostGIS geometry\ + if (future::supportsMulticore()) { + future::plan(future::multicore) + } else { + future::plan(future::multisession) + } + if (is.null(site_info)) { + PEcAn.logger::logger.error("No site information found. Please provide a BETY DB site list containing at least the site id and PostGIS geometry\ as lon and lat") } # prepare site info for extraction - internal_site_info <- data.frame(site_info$id, site_info$sitename, site_info$lat,site_info$lon) + internal_site_info <- data.frame(site_info$site_id, site_info$site_name, site_info$lat,site_info$lon) #create a variable to store mean and quantile of organic carbon density (ocd) for each soil depth ocdquant <- matrix(NA, nrow = 6, ncol = length(internal_site_info$site_info.lon) * 4) #row represents soil depth, col represents mean, 5%, 50% and 95%-quantile of ocd for all sites lonlat <- cbind(internal_site_info$site_info.lon, internal_site_info$site_info.lat) @@ -74,55 +78,31 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) { p <- terra::vect(lonlat, crs = "+proj=longlat +datum=WGS84") # Users need to provide lon/lat newcrs <- "+proj=igh +datum=WGS84 +no_defs +towgs84=0,0,0" p_reproj <- terra::project(p, newcrs) # Transform the point vector to data with Homolosine projection - - # setup progress bar - if (verbose) { - j <- 1 - pb <- utils::txtProgressBar(min = 0, max = length(depths), char="*", width=70, style = 3) - } - + data_tag <- c("_mean.vrt", "_Q0.05.vrt", "_Q0.5.vrt", "_Q0.95.vrt") + name_tag <- expand.grid(depths, data_tag, stringsAsFactors = F)#find the combinations between data and depth tags. + L <- split(as.data.frame(name_tag), seq(nrow(as.data.frame(name_tag))))#convert tags into lists. + ocd_real <- L %>% furrr::future_map(function(l){ + ocd_url <- paste0(base_data_url, l[[1]], l[[2]]) + ocd_map <- terra::extract(terra::rast(ocd_url), p_reproj) + unlist(ocd_map[, -1])/10 + }, .progress = T) for (dep in seq_along(depths)) { - # setup virtual raster URLs for each layer - ocd_mean.url <- paste0(base_data_url,depths[dep],"_mean.vrt") - ocd_Q0.05.url <- paste0(base_data_url, depths[dep], "_Q0.05.vrt") - ocd_Q0.50.url <- paste0(base_data_url, depths[dep], "_Q0.5.vrt") - ocd_Q0.95.url <- paste0(base_data_url, depths[dep], "_Q0.95.vrt") - - # create virtual rasters && extract SOC values - the original unit is hg/m3 - ocd_mean <- terra::extract(terra::rast(ocd_mean.url), p_reproj) - ocd_Q0.05_map <- terra::extract(terra::rast(ocd_Q0.05.url), p_reproj) - ocd_Q0.50_map <- terra::extract(terra::rast(ocd_Q0.50.url), p_reproj) - ocd_Q0.95_map <- terra::extract(terra::rast(ocd_Q0.95.url), p_reproj) - - #change the unit to more common kg/m3 - ocd_mean_real <- ocd_mean[, -1] / 10 - ocd_Q0.05_real <- ocd_Q0.05_map[, -1] / 10 - ocd_Q0.50_real <- ocd_Q0.50_map[, -1] / 10 - ocd_Q0.95_real <- ocd_Q0.95_map[, -1] / 10 - - ocdquant[dep, ] <-c(ocd_mean_real,ocd_Q0.05_real,ocd_Q0.50_real,ocd_Q0.95_real) - ### Display progress to console - if (verbose) { - utils::setTxtProgressBar(pb, j) - j <- j+1 - utils::flush.console()} - - # cleanup interim results - rm(ocd_mean.url, ocd_Q0.05.url, ocd_Q0.50.url, ocd_Q0.95.url, - ocd_mean, ocd_Q0.05_map, ocd_Q0.50_map, ocd_Q0.95_map, - ocd_mean_real, ocd_Q0.05_real, ocd_Q0.50_real, ocd_Q0.95_real) - } - - - if (verbose) { - close(pb) + dep.ind <- which(grepl(depths[dep], name_tag[, 1])) + ocdquant[dep, ] <- ocd_real[dep.ind] %>% unlist } + na.ind <- which(is.na(ocdquant[1, 1:length(site_info$site_id)])) + internal_site_info <- data.frame(site_info$site_id[-na.ind], + site_info$site_name[-na.ind], + site_info$lat[-na.ind], + site_info$lon[-na.ind]) %>% `colnames<-`(names(site_info)) # parse extracted data and prepare for output - quantile_name <-c(paste("Mean_",site_info$id,sep=""),paste("0.05_",site_info$id,sep=""),paste("0.5_",site_info$id,sep=""),paste("0.95_",site_info$id,sep="")) + quantile_name <-c(paste("Mean_",site_info$site_id,sep=""),paste("0.05_",site_info$site_id,sep=""),paste("0.5_",site_info$site_id,sep=""),paste("0.95_",site_info$site_id,sep="")) colnames(ocdquant) <- quantile_name ocdquant_dep <- cbind(ocdquant,depths) ocd_df <- tidyr::pivot_longer(as.data.frame(ocdquant_dep),cols=tidyselect::all_of(quantile_name),names_to=c("Quantile", "Siteid"),names_sep = "_") + #remove NA from ocd_df + ocd_df <- na.omit(ocd_df) colnames(ocd_df) <- c("Depth","Quantile", "Siteid","Value") ocd_df$Value<-as.numeric(ocd_df$Value) f1<-factor(ocd_df$Siteid,levels=unique(ocd_df$Siteid)) @@ -161,21 +141,21 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) { bestPar <- sapply(score, function(f) { f$par }) mean <- bestPar[1,] / bestPar[2,] std <- sqrt(bestPar[1,] / bestPar[2,] ^ 2) - mean_site <- matrix(mean, length(internal_site_info$site_info.lon), 6) - rownames(mean_site) <- as.numeric(internal_site_info$site_info.id) + mean_site <- matrix(mean, length(internal_site_info$lon), 6) + rownames(mean_site) <- as.numeric(internal_site_info$site_id) colnames(mean_site) <- depths - mean_site.2 <- data.frame(site_id=internal_site_info$site_info.id, - lat=internal_site_info$site_info.lat, - lon=internal_site_info$site_info.lon, + mean_site.2 <- data.frame(site_id=internal_site_info$site_id, + lat=internal_site_info$lat, + lon=internal_site_info$lon, mean_site) colnames(mean_site.2)[4:9] <- depths - std_site <- matrix(std, length(internal_site_info$site_info.lon), 6) - rownames(std_site) <- as.numeric(internal_site_info$site_info.id) + std_site <- matrix(std, length(internal_site_info$lon), 6) + rownames(std_site) <- as.numeric(internal_site_info$site_id) colnames(std_site) <- depths - std_site.2 <- data.frame(site_id=internal_site_info$site_info.id, - lat=internal_site_info$site_info.lat, - lon=internal_site_info$site_info.lon, + std_site.2 <- data.frame(site_id=internal_site_info$site_id, + lat=internal_site_info$lat, + lon=internal_site_info$lon, std_site) colnames(std_site.2)[4:9] <- depths #calculate organic carbon stock (ocs) as the sum of organic carbon density multiplied by layer thickness, the unit of ocs is kg/m2, based on Eq. (6)in paper https://www.sciencedirect.com/science/article/pii/S2215016122000462 @@ -184,7 +164,13 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) { ocs_std <- sqrt((std_site[,1]*layer_thick[1])^2+(std_site[,2]*layer_thick[2])^2+(std_site[,3]*layer_thick[3])^2+(std_site[,4]*layer_thick[4])^2+(std_site[,5]*layer_thick[5])^2+(std_site[,6]*layer_thick[6])^2) ocs_sum_30cm <- mean_site[,1]*layer_thick[1]+mean_site[,2]*layer_thick[2]+mean_site[,3]*layer_thick[3] ocs_std_30cm <- sqrt((std_site[,1]*layer_thick[1])^2+(std_site[,2]*layer_thick[2])^2+(std_site[,3]*layer_thick[3])^2) - soilgrids_soilC_data <- data.frame(internal_site_info$site_info.id,internal_site_info$site_info.sitename,internal_site_info$site_info.lat,internal_site_info$site_info.lon,ocs_sum,ocs_std,ocs_sum_30cm,ocs_std_30cm) + soilgrids_soilC_data <- data.frame(internal_site_info$site_id, + internal_site_info$site_name, + internal_site_info$lat, + internal_site_info$lon, + ocs_sum,ocs_std, + ocs_sum_30cm, + ocs_std_30cm) colnames(soilgrids_soilC_data)<- c("Site_ID","Site_Name","Latitude","Longitude","Total_soilC_0-200cm","Std_soilC_0-200cm","Total_soilC_0-30cm","Std_soilC_0-30cm") rownames(soilgrids_soilC_data) <- NULL @@ -197,5 +183,4 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) { } # return the results to the terminal as well return(soilgrids_soilC_data) -} - +} \ No newline at end of file From df68373d2ba91afecba3fd93127a0ece1aee52f8 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 2 Feb 2024 14:30:25 -0500 Subject: [PATCH 30/48] Updated AGB prep function. --- modules/data.remote/R/Landtrendr_AGB_prep.R | 29 +++++++++------------ 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/modules/data.remote/R/Landtrendr_AGB_prep.R b/modules/data.remote/R/Landtrendr_AGB_prep.R index a5839af34ca..0f4216e7b1b 100644 --- a/modules/data.remote/R/Landtrendr_AGB_prep.R +++ b/modules/data.remote/R/Landtrendr_AGB_prep.R @@ -26,7 +26,6 @@ Landtrendr_AGB_prep <- function(site_info, start_date, end_date, time_points, } else { future::plan(future::multisession) } - #if we export CSV but didn't provide any path if(as.logical(export_csv) && is.null(outdir)){ PEcAn.logger::logger.info("If you want to export CSV file, please ensure input the outdir!") @@ -34,11 +33,9 @@ Landtrendr_AGB_prep <- function(site_info, start_date, end_date, time_points, } #Landtrendr AGB doesn't provide data after 2017. time_points <- time_points[which(lubridate::year(time_points) < 2018)] - #check the integrity of AGB files. AGB_median_years <- as.numeric(gsub(".*?([0-9]+).*", "\\1", list.files(AGB_indir, pattern = "*median.tif"))) missing_years_median <- lubridate::year((time_points[which(!lubridate::year(time_points)%in%AGB_median_years)])) #for landtrendr AGB data, we only have data before 2018. - #starting downloading if(length(missing_years_median)>0){ if(as.logical(allow_download)){ @@ -50,31 +47,37 @@ Landtrendr_AGB_prep <- function(site_info, start_date, end_date, time_points, return(0) } } - #grab previous data to see which site has incomplete observations, if so, download the site for the whole time period. #if we have previous downloaded CSV file if(!is.null(outdir)){ if(file.exists(file.path(outdir, "AGB.csv")) && length(buffer)==0 && as.logical(skip_buffer)){ + PEcAn.logger::logger.info("Extracting previous AGB file!") Previous_CSV <- as.data.frame(utils::read.csv(file.path(outdir, "AGB.csv"))) AGB_Output <- matrix(NA, length(site_info$site_id), 2*length(time_points)+1) %>% `colnames<-`(c("site_id", paste0(time_points, "_AbvGrndWood"), paste0(time_points, "_SD"))) %>% as.data.frame()#we need: site_id, agb, sd, target time point. AGB_Output$site_id <- site_info$site_id - #Calculate AGB for each time step and site. #loop over time and site - for (i in seq_along(time_points)) { - t <- time_points[i] + AGB.list <- time_points %>% furrr::future_map(function(t){ + out.t <- data.frame() for (id in site_info$site_id) { site_AGB <- Previous_CSV[which(Previous_CSV$site_id == id),] if(length(site_AGB$agb[which(site_AGB$date == lubridate::year(t))])==1){ - AGB_Output[which(AGB_Output$site_id==id), paste0(t, "_AbvGrndWood")] <- site_AGB$agb[which(site_AGB$date == lubridate::year(t))] - AGB_Output[which(AGB_Output$site_id==id), paste0(t, "_SD")] <- site_AGB$sd[which(site_AGB$date == lubridate::year(t))] + out.t <- rbind(out.t, list(mean = site_AGB$agb[which(site_AGB$date == lubridate::year(t))], + sd = site_AGB$sd[which(site_AGB$date == lubridate::year(t))])) + } else { + out.t <- rbind(out.t, list(mean = NA, sd = NA)) } } + out.t %>% purrr::set_names(c(paste0(t, "_AbvGrndWood"), paste0(t, "_SD"))) + }, .progress = T) + for (i in seq_along(time_points)) { + t <- time_points[i]#otherwise the t will be number instead of date. + AGB_Output[, paste0(t, "_AbvGrndWood")] <- AGB.list[[i]][,paste0(t, "_AbvGrndWood")] + AGB_Output[, paste0(t, "_SD")] <- AGB.list[[i]][,paste0(t, "_SD")] } } } - #only Site that has NA for any time points need to be downloaded. if(!exists("AGB_Output")){ AGB_Output <- matrix(NA, length(site_info$site_id), 2*length(time_points)+1) %>% @@ -82,7 +85,6 @@ Landtrendr_AGB_prep <- function(site_info, start_date, end_date, time_points, AGB_Output$site_id <- site_info$site_id } AGB_Output_temp <- AGB_Output - new_site_info <- site_info %>% purrr::map(function(x)x[!stats::complete.cases(AGB_Output)]) #if we have any site missing previously if(length(new_site_info$site_id) != 0){ @@ -148,11 +150,9 @@ Landtrendr_AGB_prep <- function(site_info, start_date, end_date, time_points, as.data.frame%>% `colnames<-`(c("site_id", paste0(time_points, "_AbvGrndWood"), paste0(time_points, "_SD"))) } - #prepare CSV from AGB_Output Current_CSV <- matrix(NA, 0, 6) %>% `colnames<-`(c("date", "site_id", "lat", "lon", "agb", "sd")) - for (id in AGB_Output$site_id) { site_AGB <- unlist(AGB_Output[which(AGB_Output$site_id==id),]) for (i in seq_along(time_points)) { @@ -165,7 +165,6 @@ Landtrendr_AGB_prep <- function(site_info, start_date, end_date, time_points, Current_CSV <- rbind(Current_CSV, tibble::tibble(date, site_id, lat, lon, agb, sd))#in date, id, lat, lon, agb, sd } } - #Compare with existing CSV file. (We name the CSV file as AGB.csv) if(export_csv){ if(exists("Previous_CSV")){#we already read the csv file previously. @@ -176,11 +175,9 @@ Landtrendr_AGB_prep <- function(site_info, start_date, end_date, time_points, utils::write.csv(Current_CSV, file = file.path(outdir, "AGB.csv"), row.names = FALSE) } } - #write current csv into AGB_Output data frame. #recreate the AGB_Output object AGB_Output <- AGB_Output_temp - #loop over time and site for (i in seq_along(time_points)) { t <- time_points[i]#otherwise the t will be number instead of date. From e248d6642d934dee2e7621be94f8f1650338a5e5 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 2 Feb 2024 14:30:38 -0500 Subject: [PATCH 31/48] Updated LAI prep functions. --- modules/data.remote/R/MODIS_LAI_prep.R | 190 ++++++++++++++-------- modules/data.remote/man/MODIS_LAI_prep.Rd | 12 -- 2 files changed, 122 insertions(+), 80 deletions(-) diff --git a/modules/data.remote/R/MODIS_LAI_prep.R b/modules/data.remote/R/MODIS_LAI_prep.R index 6f81a65060f..d5f070b5729 100644 --- a/modules/data.remote/R/MODIS_LAI_prep.R +++ b/modules/data.remote/R/MODIS_LAI_prep.R @@ -1,11 +1,7 @@ #' Prepare MODIS LAI data for the SDA workflow. #' #' @param site_info Bety list of site info including site_id, lon, and lat. -#' @param start_date Start date of SDA workflow. -#' @param end_date End date of SDA workflow. #' @param time_points A vector contains each time point within the start and end date. -#' @param NCore Number of CPU to be used for LAI extraction. -#' @param run_parallel Bool variable decide if you want to proceed parallelly. #' @param outdir Where the final CSV file will be stored. #' @param search_window search window for locate available LAI values. #' @param export_csv Decide if we want to export the CSV file. @@ -16,54 +12,55 @@ #' @examples #' @author Dongchen Zhang #' @importFrom magrittr %>% -MODIS_LAI_prep <- function(site_info, start_date, end_date, time_points, - NCore = NULL, run_parallel = FALSE, outdir = NULL, search_window = 30, export_csv = FALSE){ +MODIS_LAI_prep <- function(site_info, time_points, outdir = NULL, search_window = 30, export_csv = FALSE){ + #initialize future parallel computation. + if (future::supportsMulticore()) { + future::plan(future::multicore, workers = 10) + } else { + future::plan(future::multisession, workers = 10) #10 is the maximum number of requests permitted for the MODIS server. + } + #if we export CSV but didn't provide any path if(as.logical(export_csv) && is.null(outdir)){ PEcAn.logger::logger.info("If you want to export CSV file, please ensure input the outdir!") return(0) } - - #collect available dates for all sites. - time_exist <- c() - for (i in seq_along(site_info$site_id)) { - time_exist <- c(time_exist, MODISTools::mt_dates("MOD15A2H", site_info$lat[i], site_info$lon[i])["calendar_date"]) - } - PEcAn.logger::logger.info("MODIS LAI Dates Matches Completed!") - time_exist <- sort(unique(do.call("c", time_exist))) - - new_time_points <- as.Date(c()) + #convert time points into paired start and end dates. + start.end.dates <- data.frame() for (i in seq_along(time_points)) { - if(sum((abs(lubridate::days(lubridate::date(time_points[i]))@day- - lubridate::days(lubridate::date(time_exist))@day)<=search_window))>0){ - new_time_points <- c(new_time_points, time_points[i]) - } + start.end.dates <- rbind(start.end.dates, + list(start_date = as.character(time_points[i] - lubridate::days(search_window)), + end_date = as.character(time_points[i] + lubridate::days(search_window)))) + } - time_points <- new_time_points - #grab previous data to see which site has incomplete observations, if so, download the site for the whole time period. #if we have previous downloaded CSV file if(file.exists(file.path(outdir, "LAI.csv"))){ + PEcAn.logger::logger.info("Extracting previous LAI file!") Previous_CSV <- utils::read.csv(file.path(outdir, "LAI.csv")) LAI_Output <- matrix(NA, length(site_info$site_id), 2*length(time_points)+1) %>% `colnames<-`(c("site_id", paste0(time_points, "_LAI"), paste0(time_points, "_SD"))) %>% as.data.frame()#we need: site_id, LAI, std, target time point. LAI_Output$site_id <- site_info$site_id - #Calculate LAI for each time step and site. #loop over time and site - for (i in seq_along(time_points)) { - t <- time_points[i]#otherwise the t will be number instead of date. + LAI.list <- time_points %>% furrr::future_map(function(t){ + out.t <- data.frame() for (id in site_info$site_id) { site_LAI <- Previous_CSV[which(Previous_CSV$site_id == id),] site_LAI$sd[which(site_LAI$sd<=0.66)] <- 0.66 diff_days <- abs(lubridate::days(lubridate::date(site_LAI$date)-lubridate::date(t))@day) - if(sum(diff_days <= as.numeric(search_window))){#data found - IND <- which((diff_days <= as.numeric(search_window))) - IND1 <- which(site_LAI$lai[IND] == max(site_LAI$lai[IND]))[1] - LAI_Output[which(LAI_Output$site_id==id), paste0(t, "_LAI")] <- max(site_LAI$lai[IND[IND1]]) - LAI_Output[which(LAI_Output$site_id==id), paste0(t, "_SD")] <- site_LAI$sd[IND[IND1]] + if(any(diff_days <= search_window)){#data found + out.t <- rbind(out.t, list(mean = site_LAI$lai[which.min(diff_days)], sd = site_LAI$sd[which.min(diff_days)])) + } else { + out.t <- rbind(out.t, list(mean = NA, sd = NA)) } } + out.t %>% purrr::set_names(c(paste0(t, "_LAI"), paste0(t, "_SD"))) + }, .progress = T) + for (i in seq_along(time_points)) { + t <- time_points[i]#otherwise the t will be number instead of date. + LAI_Output[, paste0(t, "_LAI")] <- LAI.list[[i]][,paste0(t, "_LAI")] + LAI_Output[, paste0(t, "_SD")] <- LAI.list[[i]][,paste0(t, "_SD")] } }else{#we don't have any previous downloaded CSV file. LAI_Output <- matrix(NA, length(site_info$site_id), 2*length(time_points)+1) %>% @@ -72,45 +69,101 @@ MODIS_LAI_prep <- function(site_info, start_date, end_date, time_points, } #only Site that has NA for any time points need to be downloaded. new_site_info <- site_info %>% purrr::map(function(x)x[!stats::complete.cases(LAI_Output)]) - + #filter out unreachable sites. + PEcAn.logger::logger.info("filter out unreachable sites!") + non.reachable.ind <- split(as.data.frame(new_site_info), seq(nrow(as.data.frame(new_site_info)))) %>% + furrr::future_map(function(s){ + if (! "try-error" %in% class(try(mean <- MODISTools::mt_dates(product = "MOD11A2", + lat = s$lat, + lon = s$lon)))) { + return(TRUE) + } else { + return(FALSE) + } + }, .progress = T) %>% unlist + new_site_info <- new_site_info %>% purrr::map(function(x)x[-which(non.reachable.ind)]) #if we have any site missing previously #TODO: only download data for specific date when we have missing data. if(length(new_site_info$site_id) != 0){ - #converting from date to YEAR-DOY(example: 2012-01-01 to 2012001) - start_YEARDOY <- paste0(lubridate::year(start_date),sprintf("%03d", 1))#using 1 and 365 DOY to avoid any possible missing data. - end_YEARDOY <- paste0(lubridate::year(end_date),sprintf("%03d", 365)) - - #download LAI data and LAI std - lai_data <- PEcAn.data.remote::call_MODIS(outdir = NULL, - var = "LAI", - site_info = new_site_info, - product_dates = c(start_YEARDOY, end_YEARDOY), - run_parallel = as.logical(run_parallel), - ncores = NCore, - product = "MOD15A2H", - band = "Lai_500m", - package_method = "MODISTools", - QC_filter = TRUE, - progress = FALSE) - - lai_sd <- PEcAn.data.remote::call_MODIS(outdir = NULL, - var = "LAI", - site_info = new_site_info, - product_dates = c(start_YEARDOY, end_YEARDOY), - run_parallel = as.logical(run_parallel), - ncores = NCore, - product = "MOD15A2H", - band = "LaiStdDev_500m", - package_method = "MODISTools", - QC_filter = TRUE, progress = FALSE) - - #combine data together and pick what we want - LAI <- cbind(lai_data %>% as.data.frame %>% dplyr::select("calendar_date", "site_id", "lat", "lon", "data", "qc"), lai_sd$data) %>% - `colnames<-`(c("date", "site_id", "lat", "lon", "lai", "qc", "sd")) - - #QC filter - LAI <- LAI[-which(LAI$qc=="001"),] %>% dplyr::select(-6)#remove qc band - + product <- "MCD15A3H" + PEcAn.logger::logger.info("Extracting LAI mean products!") + lai_mean <- split(as.data.frame(new_site_info), seq(nrow(as.data.frame(new_site_info)))) %>% + furrr::future_map(function(s){ + split(as.data.frame(start.end.dates), seq(nrow(as.data.frame(start.end.dates)))) %>% + purrr::map(function(dates){ + if (! "try-error" %in% class(try(mean <- MODISTools::mt_subset(product = product, + lat = s$lat, + lon = s$lon, + band = "Lai_500m", + start = dates$start_date, + end = dates$end_date, + progress = FALSE)))) { + return(list(mean = mean$value, date = mean$calendar_date)) + } else { + return(NA) + } + }) %>% dplyr::bind_rows() + }, .progress = T) + PEcAn.logger::logger.info("Extracting LAI std products!") + lai_std <- split(as.data.frame(new_site_info), seq(nrow(as.data.frame(new_site_info)))) %>% + furrr::future_map(function(s){ + split(as.data.frame(start.end.dates), seq(nrow(as.data.frame(start.end.dates)))) %>% + purrr::map(function(dates){ + if (! "try-error" %in% class(try(std <- MODISTools::mt_subset(product = product, + lat = s$lat, + lon = s$lon, + band = "LaiStdDev_500m", + start = dates$start_date, + end = dates$end_date, + progress = FALSE)))) { + return(std$value) + } else { + return(NA) + } + }) %>% unlist %>% set_names(NULL) + }, .progress = T) + PEcAn.logger::logger.info("Extracting LAI qc products!") + lai_qc <- split(as.data.frame(new_site_info), seq(nrow(as.data.frame(new_site_info)))) %>% + furrr::future_map(function(s){ + split(as.data.frame(start.end.dates), seq(nrow(as.data.frame(start.end.dates)))) %>% + purrr::map(function(dates){ + if (! "try-error" %in% class(try(qc <- MODISTools::mt_subset(product = product, + lat = s$lat, + lon = s$lon, + band = "FparLai_QC", + start = dates$start_date, + end = dates$end_date, + progress = FALSE)))) { + qc$value %>% purrr::map(function(v){ + qc_flag <- intToBits(as.integer(v)) # NB big-endian (ones place first) + qc_flag <- as.integer(rev(utils::head(qc_flag, 3))) # now ones place last + paste(qc_flag, collapse = "") + }) + } else { + return(NA) + } + }) %>% unlist %>% set_names(NULL) + }, .progress = T) + # LAI <- data.frame(matrix(NA, 0, 6)) %>% `colnames<-`(c("date", "site_id", "lat", "lon", "lai", "sd")) + LAI <- data.frame() + for (i in seq_along(lai_std)) { + for (j in seq_along(lai_std[[i]])) { + # skip pixels with NA observation. + if (is.na(lai_std[[i]][j])) { + next + } + # skip bad pixels based on qc band. + if (! lai_qc[[i]][j] %in% c("000")) { + next + } + LAI <- rbind(LAI, list(date = lai_mean[[i]]$date[j], + site_id = site_info$site_id[i], + lat = site_info$lat[i], + lon = site_info$lon[i], + lai = lai_mean[[i]]$mean[j]*0.1, + sd = lai_std[[i]][j]*0.1)) + } + } #Compare with existing CSV file. (We name the CSV file as LAI.csv) if(as.logical(export_csv)){ if(exists("Previous_CSV")){#we already read the csv file previously. @@ -121,8 +174,9 @@ MODIS_LAI_prep <- function(site_info, start_date, end_date, time_points, Current_CSV <- LAI utils::write.csv(Current_CSV, file = file.path(outdir, "LAI.csv"), row.names = FALSE) } + } else { + Current_CSV <- LAI } - #Calculate LAI for each time step and site. #loop over time and site for (i in seq_along(time_points)) { @@ -142,4 +196,4 @@ MODIS_LAI_prep <- function(site_info, start_date, end_date, time_points, } PEcAn.logger::logger.info("MODIS LAI Prep Completed!") list(LAI_Output = LAI_Output, time_points = time_points, var = "LAI") -} +} \ No newline at end of file diff --git a/modules/data.remote/man/MODIS_LAI_prep.Rd b/modules/data.remote/man/MODIS_LAI_prep.Rd index 47d427b1326..5e089472872 100644 --- a/modules/data.remote/man/MODIS_LAI_prep.Rd +++ b/modules/data.remote/man/MODIS_LAI_prep.Rd @@ -6,11 +6,7 @@ \usage{ MODIS_LAI_prep( site_info, - start_date, - end_date, time_points, - NCore = NULL, - run_parallel = FALSE, outdir = NULL, search_window = 30, export_csv = FALSE @@ -19,16 +15,8 @@ MODIS_LAI_prep( \arguments{ \item{site_info}{Bety list of site info including site_id, lon, and lat.} -\item{start_date}{Start date of SDA workflow.} - -\item{end_date}{End date of SDA workflow.} - \item{time_points}{A vector contains each time point within the start and end date.} -\item{NCore}{Number of CPU to be used for LAI extraction.} - -\item{run_parallel}{Bool variable decide if you want to proceed parallelly.} - \item{outdir}{Where the final CSV file will be stored.} \item{search_window}{search window for locate available LAI values.} From b1c8b43fa2a46e18ee211f91a64ae2f5b1477aca Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Fri, 2 Feb 2024 14:30:49 -0500 Subject: [PATCH 32/48] Updated SMAP prep function. --- modules/data.remote/R/SMAP_SMP_prep.R | 50 ++++++++++++++------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/modules/data.remote/R/SMAP_SMP_prep.R b/modules/data.remote/R/SMAP_SMP_prep.R index 06635f38a47..5eca6d16750 100644 --- a/modules/data.remote/R/SMAP_SMP_prep.R +++ b/modules/data.remote/R/SMAP_SMP_prep.R @@ -23,20 +23,21 @@ SMAP_SMP_prep <- function(site_info, start_date, end_date, time_points, #Feel free to contact Dongchen Zhang (zhangdc@bu.edu) who wrote this code. #The SMAP.csv file will be generated the first time when you use this code. #for the next time, it will save you lot of time if you can provide the SMAP.csv directly. - + #Initialize the multicore computation. + if (future::supportsMulticore()) { + future::plan(future::multicore) + } else { + future::plan(future::multisession) + } #check if SMAP.csv exists. if(!file.exists(file.path(outdir, "SMAP.csv")) | as.logical(update_csv)){ if(!file.exists(file.path(outdir, "SMAP_gee.csv"))){ PEcAn.logger::logger.info("Please Provide SMAP dir that contains at least the SMAP_gee.csv file!") return(0) }else{ - SMAP_gee <- utils::read.csv(file.path(outdir, "SMAP_gee.csv")) - - #prepare CSV from Current SMAP_gee file - SMAP_CSV <- matrix(NA, 0, 6) %>% `colnames<-`(c("date", "site_id", "lat", "lon", "smp", "sd")) - - for (i in 2 : dim(SMAP_gee)[1]) { - String <- strsplit(gsub(",", "", gsub("\\[|\\]", "", SMAP_gee[i,2])), " ")[[1]] + SMAP_CSV <- utils::read.csv(file.path(outdir, "SMAP_gee.csv"))[-1,2] %>% + furrr::future_map(function(string){ + String <- strsplit(gsub(",", "", gsub("\\[|\\]", "", string)), " ")[[1]] date <- as.Date(strsplit(String[1], "_")[[1]][5], "%Y%m%d") lon <- as.numeric(String[2]) lat <- as.numeric(String[3]) @@ -48,9 +49,9 @@ SMAP_SMP_prep <- function(site_info, start_date, end_date, time_points, Distance <- sp::spDistsN1(Longlat_matrix, Longlat_matrix[1,], longlat = TRUE)[-1] distloc <- match(min(Distance), Distance) site_id <- site_info$site_id[distloc] - - SMAP_CSV <- rbind(SMAP_CSV, tibble::tibble(date, site_id, lat, lon, smp, sd))#in date, id, lat, lon, smp, sd - } + list(date = date, site_id = site_id, lat = lat, lon = lon, smp = smp, sd = sd)#in date, id, lat, lon, smp, sd + }, .progress = T) %>% dplyr::bind_rows() + #write out csv file. if(as.logical((export_csv))){ utils::write.csv(SMAP_CSV, file = file.path(outdir, "SMAP.csv"), row.names = F) } @@ -67,31 +68,32 @@ SMAP_SMP_prep <- function(site_info, start_date, end_date, time_points, return(0) } } - time_points <- time_points[which(lubridate::year(time_points)>=2015)] #filter out any time points that are before 2015 - #initialize SMAP_Output SMAP_Output <- matrix(NA, length(site_info$site_id), 2*length(time_points)+1) %>% `colnames<-`(c("site_id", paste0(time_points, "_SoilMoistFrac"), paste0(time_points, "_SD"))) %>% as.data.frame()#we need: site_id, LAI, std, target time point. SMAP_Output$site_id <- site_info$site_id - #Calculate SMAP for each time step and site. #loop over time and site - for (i in seq_along(time_points)) { - t <- time_points[i]#otherwise the t will be number instead of date. + PEcAn.logger::logger.info("Extracting previous SMAP file!") + SMAP.list <- time_points %>% furrr::future_map(function(t){ + out.t <- data.frame() for (id in site_info$site_id) { site_SMP <- SMAP_CSV[which(SMAP_CSV$site_id == id),] diff_days <- abs(lubridate::days(lubridate::date(site_SMP$date)-lubridate::date(t))@day) - if(sum(diff_days <= search_window)){#data found - SMAP_Output[which(SMAP_Output$site_id==id), paste0(t, "_SoilMoistFrac")] <- site_SMP$smp[which.min(diff_days)] - SMAP_Output[which(SMAP_Output$site_id==id), paste0(t, "_SD")] <- site_SMP$sd[which.min(diff_days)] + if(any(diff_days <= search_window)){#data found + out.t <- rbind(out.t, list(mean = site_SMP$smp[which.min(diff_days)], sd = site_SMP$sd[which.min(diff_days)])) + } else { + out.t <- rbind(out.t, list(mean = NA, sd = NA)) } } + out.t %>% purrr::set_names(c(paste0(t, "_SoilMoistFrac"), paste0(t, "_SD"))) + }, .progress = T) + for (i in seq_along(time_points)) { + t <- time_points[i]#otherwise the t will be number instead of date. + SMAP_Output[, paste0(t, "_SoilMoistFrac")] <- SMAP.list[[i]][,paste0(t, "_SoilMoistFrac")] + SMAP_Output[, paste0(t, "_SD")] <- SMAP.list[[i]][,paste0(t, "_SD")] } PEcAn.logger::logger.info("SMAP SMP Prep Completed!") list(SMP_Output = SMAP_Output, time_points = time_points, var = "SoilMoistFrac") -} - - - - +} \ No newline at end of file From 194ad76c701f5ba95e5fe9be0fac5fbc36c51ab7 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 13 Feb 2024 14:20:16 -0500 Subject: [PATCH 33/48] Add furrr to the dependencies of remote. --- docker/depends/pecan_package_dependencies.csv | 1 + modules/data.remote/DESCRIPTION | 1 + 2 files changed, 2 insertions(+) diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv index e5b0618d941..bf8aa1bc253 100644 --- a/docker/depends/pecan_package_dependencies.csv +++ b/docker/depends/pecan_package_dependencies.csv @@ -71,6 +71,7 @@ "fs","*","modules/data.land","Imports",FALSE "furrr","*","modules/assim.sequential","Imports",FALSE "furrr","*","modules/data.atmosphere","Suggests",FALSE +"furrr","*","modules/data.remote","Imports",FALSE "future","*","modules/assim.sequential","Imports",FALSE "future","*","modules/data.atmosphere","Suggests",FALSE "geonames","> 0.998","modules/data.atmosphere","Imports",FALSE diff --git a/modules/data.remote/DESCRIPTION b/modules/data.remote/DESCRIPTION index 617f49b1338..a51a2b97d03 100644 --- a/modules/data.remote/DESCRIPTION +++ b/modules/data.remote/DESCRIPTION @@ -14,6 +14,7 @@ Description: PEcAn module for processing remote data. Python module requirements Imports: curl, DBI, + furrr, glue, ncdf4, PEcAn.DB, From 03ef6ead877f8b3e674ca88112ff3d3f51364c72 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 13 Feb 2024 16:28:15 -0500 Subject: [PATCH 34/48] add namespace for %>% --- base/remote/NAMESPACE | 1 + base/remote/R/qsub_parallel.R | 1 + 2 files changed, 2 insertions(+) diff --git a/base/remote/NAMESPACE b/base/remote/NAMESPACE index 7bbfc743fcc..0f8751522cf 100644 --- a/base/remote/NAMESPACE +++ b/base/remote/NAMESPACE @@ -22,4 +22,5 @@ export(start_qsub) export(start_rabbitmq) export(start_serial) export(test_remote) +importFrom(dplyr,"%>%") importFrom(foreach,"%dopar%") diff --git a/base/remote/R/qsub_parallel.R b/base/remote/R/qsub_parallel.R index 8dc304f7ad4..396e959c152 100644 --- a/base/remote/R/qsub_parallel.R +++ b/base/remote/R/qsub_parallel.R @@ -13,6 +13,7 @@ #' @author Dongchen Zhang #' #' @importFrom foreach %dopar% +#' @importFrom dplyr %>% qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out", sleep = 10, hybrid = TRUE) { if("try-error" %in% class(try(find.package("doSNOW"), silent = T))){ PEcAn.logger::logger.info("Package doSNOW is not installed! Please install it and rerun the function!") From 970a701bb5e443d18b98244ec22d818bb69fe2e7 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 13 Feb 2024 16:40:38 -0500 Subject: [PATCH 35/48] Change to purrr. --- base/remote/NAMESPACE | 2 +- base/remote/R/qsub_parallel.R | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/base/remote/NAMESPACE b/base/remote/NAMESPACE index 0f8751522cf..27cce2707d5 100644 --- a/base/remote/NAMESPACE +++ b/base/remote/NAMESPACE @@ -22,5 +22,5 @@ export(start_qsub) export(start_rabbitmq) export(start_serial) export(test_remote) -importFrom(dplyr,"%>%") importFrom(foreach,"%dopar%") +importFrom(purrr,"%>%") diff --git a/base/remote/R/qsub_parallel.R b/base/remote/R/qsub_parallel.R index 396e959c152..e0cd3ef2d6f 100644 --- a/base/remote/R/qsub_parallel.R +++ b/base/remote/R/qsub_parallel.R @@ -13,7 +13,7 @@ #' @author Dongchen Zhang #' #' @importFrom foreach %dopar% -#' @importFrom dplyr %>% +#' @importFrom purrr %>% qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out", sleep = 10, hybrid = TRUE) { if("try-error" %in% class(try(find.package("doSNOW"), silent = T))){ PEcAn.logger::logger.info("Package doSNOW is not installed! Please install it and rerun the function!") From 0b0899da26a00a567728c3a22b8390a2e62b28a9 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 13 Feb 2024 16:44:49 -0500 Subject: [PATCH 36/48] add namespace --- base/remote/DESCRIPTION | 1 + base/remote/NAMESPACE | 2 +- base/remote/R/qsub_parallel.R | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/base/remote/DESCRIPTION b/base/remote/DESCRIPTION index e55509e8e1c..f32ddb03a59 100644 --- a/base/remote/DESCRIPTION +++ b/base/remote/DESCRIPTION @@ -17,6 +17,7 @@ Authors@R: c(person("David", "LeBauer", role = c("aut"), Description: This package contains utilities for communicating with and executing code on local and remote hosts. In particular, it has PEcAn-specific utilities for starting ecosystem model runs. Imports: + dplyr, foreach, PEcAn.logger, httr, diff --git a/base/remote/NAMESPACE b/base/remote/NAMESPACE index 27cce2707d5..0f8751522cf 100644 --- a/base/remote/NAMESPACE +++ b/base/remote/NAMESPACE @@ -22,5 +22,5 @@ export(start_qsub) export(start_rabbitmq) export(start_serial) export(test_remote) +importFrom(dplyr,"%>%") importFrom(foreach,"%dopar%") -importFrom(purrr,"%>%") diff --git a/base/remote/R/qsub_parallel.R b/base/remote/R/qsub_parallel.R index e0cd3ef2d6f..396e959c152 100644 --- a/base/remote/R/qsub_parallel.R +++ b/base/remote/R/qsub_parallel.R @@ -13,7 +13,7 @@ #' @author Dongchen Zhang #' #' @importFrom foreach %dopar% -#' @importFrom purrr %>% +#' @importFrom dplyr %>% qsub_parallel <- function(settings, files = NULL, prefix = "sipnet.out", sleep = 10, hybrid = TRUE) { if("try-error" %in% class(try(find.package("doSNOW"), silent = T))){ PEcAn.logger::logger.info("Package doSNOW is not installed! Please install it and rerun the function!") From 629cdb3a1091ca477e8abf66566ac23079d41594 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 13 Feb 2024 16:54:21 -0500 Subject: [PATCH 37/48] update documentation --- docker/depends/pecan_package_dependencies.csv | 1 + 1 file changed, 1 insertion(+) diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv index 19178d2aaf6..a52e9c9d641 100644 --- a/docker/depends/pecan_package_dependencies.csv +++ b/docker/depends/pecan_package_dependencies.csv @@ -49,6 +49,7 @@ "dplR","*","modules/data.land","Imports",FALSE "dplyr","*","base/db","Imports",FALSE "dplyr","*","base/qaqc","Imports",FALSE +"dplyr","*","base/remote","Imports",FALSE "dplyr","*","base/utils","Imports",FALSE "dplyr","*","base/workflow","Imports",FALSE "dplyr","*","models/biocro","Imports",FALSE From fb6a7eb571438646b21568742a950e94200e404e Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 13 Feb 2024 17:07:23 -0500 Subject: [PATCH 38/48] Add dependency. --- base/remote/DESCRIPTION | 1 + docker/depends/pecan_package_dependencies.csv | 1 + 2 files changed, 2 insertions(+) diff --git a/base/remote/DESCRIPTION b/base/remote/DESCRIPTION index f32ddb03a59..190e2816bf5 100644 --- a/base/remote/DESCRIPTION +++ b/base/remote/DESCRIPTION @@ -19,6 +19,7 @@ Description: This package contains utilities for communicating with and executin Imports: dplyr, foreach, + furrr, PEcAn.logger, httr, jsonlite, diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv index a52e9c9d641..4ddd1f64dc1 100644 --- a/docker/depends/pecan_package_dependencies.csv +++ b/docker/depends/pecan_package_dependencies.csv @@ -70,6 +70,7 @@ "foreach","*","modules/data.remote","Imports",FALSE "fs","*","base/db","Imports",FALSE "fs","*","modules/data.land","Imports",FALSE +"furrr","*","base/remote","Imports",FALSE "furrr","*","modules/assim.sequential","Imports",FALSE "furrr","*","modules/data.atmosphere","Suggests",FALSE "furrr","*","modules/data.remote","Imports",FALSE From 05f500671274399ac6c9b1a67e878c5fea14e6ce Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 13 Feb 2024 17:48:52 -0500 Subject: [PATCH 39/48] GitHub bug fixes. --- modules/data.atmosphere/R/ERA5_met_process.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/data.atmosphere/R/ERA5_met_process.R b/modules/data.atmosphere/R/ERA5_met_process.R index a3255909a2c..2f3bd7ddcee 100644 --- a/modules/data.atmosphere/R/ERA5_met_process.R +++ b/modules/data.atmosphere/R/ERA5_met_process.R @@ -81,8 +81,8 @@ ERA5_met_process <- function(settings, in.path, out.path, write.db=FALSE, write } #restructure the site_info into list. - site_info$start_date <- rep(settings$state.data.assimilation$start.date, length(settings)) - site_info$end_date <- rep(settings$state.data.assimilation$end.date, length(settings)) + site_info$start_date <- start_date <- rep(settings$state.data.assimilation$start.date, length(settings)) + site_info$end_date <- end_date <- rep(settings$state.data.assimilation$end.date, length(settings)) site_info$out.path <- rep(out.path, length(settings)) site_info$in.path <- rep(in.path, length(settings)) site_info$model.type <- rep(settings$model$type, length(settings)) @@ -154,7 +154,7 @@ ERA5_met_process <- function(settings, in.path, out.path, write.db=FALSE, write settings[[i]]$run$site$met.end <- settings[[i]]$run$end.date <- settings[[i]]$state.data.assimilation$end.date - settings[[i]]$run$inputs$met <- as.list(unlist(Clim_paths[[i]])) %>% set_names(rep("path", length(Clim_paths[[i]]))) + settings[[i]]$run$inputs$met <- as.list(unlist(Clim_paths[[i]])) %>% purrr::set_names(rep("path", length(Clim_paths[[i]]))) } #write settings into xml file. From 295b8e7e6ce33c6fd5c7327a6d20ec1552a32b12 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 13 Feb 2024 18:15:08 -0500 Subject: [PATCH 40/48] GitHub bug fix. --- modules/data.atmosphere/R/extract_ERA5.R | 3 ++- modules/data.atmosphere/man/extract.nc.ERA5.Rd | 4 ++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/data.atmosphere/R/extract_ERA5.R b/modules/data.atmosphere/R/extract_ERA5.R index e6e956aa1a0..ddbcb78d4af 100644 --- a/modules/data.atmosphere/R/extract_ERA5.R +++ b/modules/data.atmosphere/R/extract_ERA5.R @@ -10,7 +10,8 @@ #' @param newsite site name. #' @param vars variables to be extracted. If NULL all the variables will be returned. #' @param overwrite Logical if files needs to be overwritten. -#' @verbose Decide if we want to stop printing info. +#' @param verbose Decide if we want to stop printing info. +#' @param ... other inputs. #' @details For the list of variables check out the documentation at \link{https://confluence.ecmwf.int/display/CKB/ERA5+data+documentation#ERA5datadocumentation-Spatialgrid} #' #' @return a list of xts objects with all the variables for the requested years diff --git a/modules/data.atmosphere/man/extract.nc.ERA5.Rd b/modules/data.atmosphere/man/extract.nc.ERA5.Rd index cf91b8ab287..7dd24354abd 100644 --- a/modules/data.atmosphere/man/extract.nc.ERA5.Rd +++ b/modules/data.atmosphere/man/extract.nc.ERA5.Rd @@ -39,6 +39,10 @@ extract.nc.ERA5( \item{vars}{variables to be extracted. If NULL all the variables will be returned.} \item{overwrite}{Logical if files needs to be overwritten.} + +\item{verbose}{Decide if we want to stop printing info.} + +\item{...}{other inputs.} } \value{ a list of xts objects with all the variables for the requested years From f7cd4fd71778d9db00441f2d18b683f330bf62ea Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 13 Feb 2024 18:33:33 -0500 Subject: [PATCH 41/48] Update namespace and dependencies. --- docker/depends/pecan_package_dependencies.csv | 3 ++- modules/assim.batch/DESCRIPTION | 1 - modules/data.land/DESCRIPTION | 2 ++ modules/data.land/R/soilgrids_soc_extraction.R | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv index 4ddd1f64dc1..4f15952dc31 100644 --- a/docker/depends/pecan_package_dependencies.csv +++ b/docker/depends/pecan_package_dependencies.csv @@ -56,7 +56,6 @@ "dplyr","*","models/ed","Imports",FALSE "dplyr","*","models/ldndc","Imports",FALSE "dplyr","*","models/stics","Imports",FALSE -"dplyr","*","modules/assim.batch","Imports",FALSE "dplyr","*","modules/assim.sequential","Imports",FALSE "dplyr","*","modules/benchmark","Imports",FALSE "dplyr","*","modules/data.atmosphere","Imports",FALSE @@ -73,9 +72,11 @@ "furrr","*","base/remote","Imports",FALSE "furrr","*","modules/assim.sequential","Imports",FALSE "furrr","*","modules/data.atmosphere","Suggests",FALSE +"furrr","*","modules/data.land","Imports",FALSE "furrr","*","modules/data.remote","Imports",FALSE "future","*","modules/assim.sequential","Imports",FALSE "future","*","modules/data.atmosphere","Suggests",FALSE +"future","*","modules/data.land","Imports",FALSE "geonames","> 0.998","modules/data.atmosphere","Imports",FALSE "getPass","*","base/remote","Suggests",FALSE "ggmcmc","*","modules/meta.analysis","Suggests",FALSE diff --git a/modules/assim.batch/DESCRIPTION b/modules/assim.batch/DESCRIPTION index 7452bc62393..f56a0c91ccb 100644 --- a/modules/assim.batch/DESCRIPTION +++ b/modules/assim.batch/DESCRIPTION @@ -23,7 +23,6 @@ Imports: MASS, methods, mlegp, - dplyr, ellipse, graphics, grDevices, diff --git a/modules/data.land/DESCRIPTION b/modules/data.land/DESCRIPTION index eb187ac1667..bb2be86c649 100644 --- a/modules/data.land/DESCRIPTION +++ b/modules/data.land/DESCRIPTION @@ -29,6 +29,8 @@ Imports: dplyr, dplR, fs, + future, + furrr, httr, lubridate, magrittr, diff --git a/modules/data.land/R/soilgrids_soc_extraction.R b/modules/data.land/R/soilgrids_soc_extraction.R index 2131832ca50..fd20e42093a 100644 --- a/modules/data.land/R/soilgrids_soc_extraction.R +++ b/modules/data.land/R/soilgrids_soc_extraction.R @@ -52,7 +52,7 @@ ##' ##' @export ##' @author Qianyu Li, Shawn P. Serbin -##' +##' @importFrom magrittr %>% soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) { if (future::supportsMulticore()) { future::plan(future::multicore) From 9e258103c5ee49d301d1fb106051c238070f04b2 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 13 Feb 2024 20:06:17 -0500 Subject: [PATCH 42/48] update documentation. --- modules/assim.batch/tests/Rcheck_reference.log | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/assim.batch/tests/Rcheck_reference.log b/modules/assim.batch/tests/Rcheck_reference.log index 5e4b5185f07..ceaf7cc3ef1 100644 --- a/modules/assim.batch/tests/Rcheck_reference.log +++ b/modules/assim.batch/tests/Rcheck_reference.log @@ -9,7 +9,7 @@ * package encoding: UTF-8 * checking package namespace information ... OK * checking package dependencies ... NOTE -Imports includes 27 non-default packages. +Imports includes 26 non-default packages. Importing from so many packages makes the package vulnerable to any of them becoming unavailable. Move as many as possible to Suggests and use conditionally. From 0573ef33e491513875bfdee9efeb6625bfaa45e3 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 13 Feb 2024 20:18:40 -0500 Subject: [PATCH 43/48] Update namespace and dependencies. --- Makefile.depends | 2 +- base/visualization/DESCRIPTION | 3 --- docker/depends/pecan_package_dependencies.csv | 3 --- modules/data.land/R/soilgrids_soc_extraction.R | 2 +- 4 files changed, 2 insertions(+), 8 deletions(-) diff --git a/Makefile.depends b/Makefile.depends index fafd9065418..10e0eb373ed 100644 --- a/Makefile.depends +++ b/Makefile.depends @@ -5,7 +5,7 @@ $(call depends,base/qaqc): | .install/base/db .install/base/logger .install/base $(call depends,base/remote): | .install/base/logger $(call depends,base/settings): | .install/base/db .install/base/logger .install/base/remote .install/base/utils $(call depends,base/utils): | .install/base/logger -$(call depends,base/visualization): | .install/base/db .install/base/logger .install/base/utils +$(call depends,base/visualization): | .install/base/logger $(call depends,base/workflow): | .install/base/db .install/base/logger .install/base/remote .install/base/settings .install/base/utils .install/modules/data.atmosphere .install/modules/data.land .install/modules/uncertainty $(call depends,models/basgra): | .install/base/logger .install/base/utils .install/modules/data.atmosphere $(call depends,models/biocro): | .install/base/db .install/base/logger .install/base/remote .install/base/settings .install/base/utils .install/modules/data.atmosphere .install/modules/data.land diff --git a/base/visualization/DESCRIPTION b/base/visualization/DESCRIPTION index 2aece439bdd..9d210b6fa0f 100644 --- a/base/visualization/DESCRIPTION +++ b/base/visualization/DESCRIPTION @@ -31,11 +31,8 @@ Description: The Predictive Ecosystem Carbon Analyzer (PEcAn) is a scientific Imports: data.table, ggplot2, - maps, ncdf4 (>= 1.15), - PEcAn.DB, PEcAn.logger, - PEcAn.utils, plyr (>= 1.8.4), reshape2, rlang, diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv index 4f15952dc31..ecd833b9dde 100644 --- a/docker/depends/pecan_package_dependencies.csv +++ b/docker/depends/pecan_package_dependencies.csv @@ -161,7 +161,6 @@ "magrittr","*","modules/data.atmosphere","Imports",FALSE "magrittr","*","modules/data.land","Imports",FALSE "magrittr","*","modules/data.remote","Imports",FALSE -"maps","*","base/visualization","Imports",FALSE "markdown","*","modules/allometry","Suggests",FALSE "markdown","*","modules/photosynthesis","Suggests",FALSE "MASS","*","base/utils","Suggests",FALSE @@ -268,7 +267,6 @@ "PEcAn.DB","*","base/all","Depends",TRUE "PEcAn.DB","*","base/qaqc","Imports",TRUE "PEcAn.DB","*","base/settings","Imports",TRUE -"PEcAn.DB","*","base/visualization","Imports",TRUE "PEcAn.DB","*","base/workflow","Imports",TRUE "PEcAn.DB","*","models/biocro","Suggests",TRUE "PEcAn.DB","*","models/ldndc","Imports",TRUE @@ -381,7 +379,6 @@ "PEcAn.utils","*","base/db","Imports",TRUE "PEcAn.utils","*","base/qaqc","Suggests",TRUE "PEcAn.utils","*","base/settings","Imports",TRUE -"PEcAn.utils","*","base/visualization","Imports",TRUE "PEcAn.utils","*","base/workflow","Imports",TRUE "PEcAn.utils","*","models/biocro","Imports",TRUE "PEcAn.utils","*","models/clm45","Depends",TRUE diff --git a/modules/data.land/R/soilgrids_soc_extraction.R b/modules/data.land/R/soilgrids_soc_extraction.R index fd20e42093a..a9ffbba433c 100644 --- a/modules/data.land/R/soilgrids_soc_extraction.R +++ b/modules/data.land/R/soilgrids_soc_extraction.R @@ -102,7 +102,7 @@ soilgrids_soilC_extract <- function (site_info, outdir=NULL, verbose=TRUE) { ocdquant_dep <- cbind(ocdquant,depths) ocd_df <- tidyr::pivot_longer(as.data.frame(ocdquant_dep),cols=tidyselect::all_of(quantile_name),names_to=c("Quantile", "Siteid"),names_sep = "_") #remove NA from ocd_df - ocd_df <- na.omit(ocd_df) + ocd_df <- stats::na.omit(ocd_df) colnames(ocd_df) <- c("Depth","Quantile", "Siteid","Value") ocd_df$Value<-as.numeric(ocd_df$Value) f1<-factor(ocd_df$Siteid,levels=unique(ocd_df$Siteid)) From 0af5c7b9da51400dca776bf0576ef06957e471a4 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 13 Feb 2024 20:30:34 -0500 Subject: [PATCH 44/48] Update dependencies. --- Makefile.depends | 2 +- docker/depends/pecan_package_dependencies.csv | 1 - models/ldndc/DESCRIPTION | 1 - 3 files changed, 1 insertion(+), 3 deletions(-) diff --git a/Makefile.depends b/Makefile.depends index 10e0eb373ed..8c517a6b45c 100644 --- a/Makefile.depends +++ b/Makefile.depends @@ -17,7 +17,7 @@ $(call depends,models/ed): | .install/base/logger .install/base/remote .install/ $(call depends,models/fates): | .install/base/logger .install/base/remote .install/base/utils $(call depends,models/gday): | .install/base/logger .install/base/remote .install/base/utils $(call depends,models/jules): | .install/base/logger .install/base/remote .install/base/utils .install/modules/data.atmosphere -$(call depends,models/ldndc): | .install/base/db .install/base/logger .install/base/remote .install/base/utils .install/modules/data.atmosphere +$(call depends,models/ldndc): | .install/base/logger .install/base/remote .install/base/utils .install/modules/data.atmosphere $(call depends,models/linkages): | .install/base/logger .install/base/remote .install/base/utils .install/modules/data.atmosphere $(call depends,models/lpjguess): | .install/base/logger .install/base/remote .install/base/utils $(call depends,models/maat): | .install/base/logger .install/base/remote .install/base/settings .install/base/utils .install/modules/data.atmosphere diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv index ecd833b9dde..52f7ce3a00f 100644 --- a/docker/depends/pecan_package_dependencies.csv +++ b/docker/depends/pecan_package_dependencies.csv @@ -269,7 +269,6 @@ "PEcAn.DB","*","base/settings","Imports",TRUE "PEcAn.DB","*","base/workflow","Imports",TRUE "PEcAn.DB","*","models/biocro","Suggests",TRUE -"PEcAn.DB","*","models/ldndc","Imports",TRUE "PEcAn.DB","*","models/stics","Imports",TRUE "PEcAn.DB","*","models/template","Imports",TRUE "PEcAn.DB","*","modules/allometry","Imports",TRUE diff --git a/models/ldndc/DESCRIPTION b/models/ldndc/DESCRIPTION index a6b59412583..3f08af471a8 100644 --- a/models/ldndc/DESCRIPTION +++ b/models/ldndc/DESCRIPTION @@ -7,7 +7,6 @@ Authors@R: c(person("Henri", "Kajasilta", role = c("aut", "cre"), email = "henri.kajasilta@fmi.fi")) Description: This module provides functions to link the (LDNDC) to PEcAn. Imports: - PEcAn.DB, PEcAn.logger, PEcAn.utils (>= 1.4.8), dplyr, From 81441989786e2d702d665e334d043a7bae53fb84 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Tue, 13 Feb 2024 20:32:54 -0500 Subject: [PATCH 45/48] Update documentation. --- modules/data.land/tests/Rcheck_reference.log | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/data.land/tests/Rcheck_reference.log b/modules/data.land/tests/Rcheck_reference.log index beeca4c7797..291c81162de 100644 --- a/modules/data.land/tests/Rcheck_reference.log +++ b/modules/data.land/tests/Rcheck_reference.log @@ -10,7 +10,7 @@ * checking package namespace information ... OK * checking package dependencies ... WARNING -Imports includes 33 non-default packages. +Imports includes 35 non-default packages. Importing from so many packages makes the package vulnerable to any of them becoming unavailable. Move as many as possible to Suggests and use conditionally. From 3dc5f257d6f9fe8f345c8f57ded1016dce6189e4 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 14 Feb 2024 10:35:45 -0500 Subject: [PATCH 46/48] Update log file. --- modules/data.land/tests/Rcheck_reference.log | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/modules/data.land/tests/Rcheck_reference.log b/modules/data.land/tests/Rcheck_reference.log index 8c6553065f7..856f43fd98c 100644 --- a/modules/data.land/tests/Rcheck_reference.log +++ b/modules/data.land/tests/Rcheck_reference.log @@ -8,8 +8,7 @@ * this is package ‘PEcAn.data.land’ version ‘1.7.2.9000’ * package encoding: UTF-8 * checking package namespace information ... OK -* checking package dependencies ... WARNING - +* checking package dependencies ... NOTE Imports includes 35 non-default packages. Importing from so many packages makes the package vulnerable to any of them becoming unavailable. Move as many as possible to Suggests and @@ -1078,6 +1077,4 @@ Please remove it. Status: 7 WARNINGs, 4 NOTEs See ‘/tmp/RtmpBGazJV/PEcAn.data.land.Rcheck/00check.log’ -for details. - - +for details. \ No newline at end of file From 999bb0d3e45ca35a306b861ad67601fa6de7d9b8 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 14 Feb 2024 10:48:27 -0500 Subject: [PATCH 47/48] Update dependency. --- docker/depends/pecan_package_dependencies.csv | 1 + modules/data.remote/DESCRIPTION | 1 + 2 files changed, 2 insertions(+) diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv index 29b4c3453f8..f83a09de0ef 100644 --- a/docker/depends/pecan_package_dependencies.csv +++ b/docker/depends/pecan_package_dependencies.csv @@ -77,6 +77,7 @@ "future","*","modules/assim.sequential","Imports",FALSE "future","*","modules/data.atmosphere","Suggests",FALSE "future","*","modules/data.land","Imports",FALSE +"future","*","modules/data.remote","Imports",FALSE "geonames","> 0.998","modules/data.atmosphere","Imports",FALSE "getPass","*","base/remote","Suggests",FALSE "ggmcmc","*","modules/meta.analysis","Suggests",FALSE diff --git a/modules/data.remote/DESCRIPTION b/modules/data.remote/DESCRIPTION index a51a2b97d03..420b9681e8b 100644 --- a/modules/data.remote/DESCRIPTION +++ b/modules/data.remote/DESCRIPTION @@ -15,6 +15,7 @@ Imports: curl, DBI, furrr, + future, glue, ncdf4, PEcAn.DB, From 79d7e28e9e0345ed5cb5f6121c450bcef60e3683 Mon Sep 17 00:00:00 2001 From: Dongchen Zhang Date: Wed, 14 Feb 2024 11:00:07 -0500 Subject: [PATCH 48/48] Add namespace. --- modules/data.remote/R/MODIS_LAI_prep.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/data.remote/R/MODIS_LAI_prep.R b/modules/data.remote/R/MODIS_LAI_prep.R index d5f070b5729..36de23f4d98 100644 --- a/modules/data.remote/R/MODIS_LAI_prep.R +++ b/modules/data.remote/R/MODIS_LAI_prep.R @@ -120,7 +120,7 @@ MODIS_LAI_prep <- function(site_info, time_points, outdir = NULL, search_window } else { return(NA) } - }) %>% unlist %>% set_names(NULL) + }) %>% unlist %>% purrr::set_names(NULL) }, .progress = T) PEcAn.logger::logger.info("Extracting LAI qc products!") lai_qc <- split(as.data.frame(new_site_info), seq(nrow(as.data.frame(new_site_info)))) %>% @@ -142,7 +142,7 @@ MODIS_LAI_prep <- function(site_info, time_points, outdir = NULL, search_window } else { return(NA) } - }) %>% unlist %>% set_names(NULL) + }) %>% unlist %>% purrr::set_names(NULL) }, .progress = T) # LAI <- data.frame(matrix(NA, 0, 6)) %>% `colnames<-`(c("date", "site_id", "lat", "lon", "lai", "sd")) LAI <- data.frame()