Skip to content

Commit

Permalink
Merge pull request #3211 from JoshuaPloshay/develop
Browse files Browse the repository at this point in the history
Add the down_scale function to pecan.
  • Loading branch information
mdietze authored Mar 7, 2024
2 parents fc65daf + beb21fd commit 4564337
Show file tree
Hide file tree
Showing 5 changed files with 155 additions and 26 deletions.
51 changes: 25 additions & 26 deletions docker/depends/pecan.depends.R
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ condense_version_requirements <- function(specs) {
specs <- unique(specs[specs != "*"])
versions <- package_version(
gsub("[^[:digit:].-]+", "", specs))

if ((length(unique(versions)) > 1) && any(!grepl(">", specs))) {
# Can't assume the latest version works for all, so give up.
# We *could* write more to handle this case if needed, but it seems very rare:
Expand All @@ -39,30 +39,30 @@ condense_version_requirements <- function(specs) {
# Install <version> or newer,
# upgrading dependencies only if needed to satisfy stated version requirements
ensure_version <- function(pkg, version) {
vers <- gsub('[^[:digit:].-]+', '', version)
cmp <- get(gsub('[^<>=]+', '', version))
ok <- requireNamespace(pkg, quietly = TRUE) &&
cmp(packageVersion(pkg), vers)
if (!ok) {
# install pkg and any *missing* dependencies
remotes::install_version(pkg, version, dependencies = TRUE, upgrade = FALSE)
# Now check for installed but *incompatible* dependencies
# (install_version doesn't resolve these when upgrade=FALSE)
dep <- desc::desc_get_deps(system.file("DESCRIPTION", package = pkg))
dep <- dep[
dep$type %in% c("Depends", "Imports", "LinkingTo")
& dep$version != "*"
& dep$package != "R",]
invisible(Map(ensure_version, dep$package, dep$version))
}

vers <- gsub('[^[:digit:].-]+', '', version)
cmp <- get(gsub('[^<>=]+', '', version))
ok <- requireNamespace(pkg, quietly = TRUE) &&
cmp(packageVersion(pkg), vers)
if (!ok) {
# install pkg and any *missing* dependencies
remotes::install_version(pkg, version, dependencies = TRUE, upgrade = FALSE)
# Now check for installed but *incompatible* dependencies
# (install_version doesn't resolve these when upgrade=FALSE)
dep <- desc::desc_get_deps(system.file("DESCRIPTION", package = pkg))
dep <- dep[
dep$type %in% c("Depends", "Imports", "LinkingTo")
& dep$version != "*"
& dep$package != "R",]
invisible(Map(ensure_version, dep$package, dep$version))
}
}

# Read list of dependencies.
# NOTE: These files are autogenerated --
# use scripts/generate_dependencies.R to edit them.
all_deps <- read.csv("pecan_package_dependencies.csv") |>
subset(!is_pecan)
subset(!is_pecan)
gh_repos <- readLines("pecan_deps_from_github.txt")


Expand All @@ -74,9 +74,9 @@ remotes::install_github(gh_repos, lib = rlib)

# For deps used by multiple packages, find a version that works for all
uniq_deps <- tapply(
all_deps$version,
INDEX = all_deps$package,
FUN = condense_version_requirements)
all_deps$version,
INDEX = all_deps$package,
FUN = condense_version_requirements)


# Install deps that declare no version restriction.
Expand All @@ -92,9 +92,8 @@ install.packages(missing, lib = rlib)
# it can't fill the version req from snapshot versions.
# (Assumes our CRAN uses the same URL scheme as Posit package manager)
options(repos = c(
getOption('repos'),
sub(r'(\d{4}-\d{2}-\d{2})', 'latest', getOption('repos'))
getOption('repos'),
sub(r'(\d{4}-\d{2}-\d{2})', 'latest', getOption('repos'))
))
versioned <- uniq_deps[uniq_deps != "*"]
invisible(Map(ensure_version, names(versioned), versioned))

invisible(Map(ensure_version, names(versioned), versioned))
3 changes: 3 additions & 0 deletions docker/depends/pecan_package_dependencies.csv
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,7 @@
"purrr",">= 0.2.3","modules/data.atmosphere","Imports",FALSE
"pwr","*","modules/rtm","Suggests",FALSE
"R.utils","*","base/db","Imports",FALSE
"randomForest","*","modules/assim.sequential","Suggests",FALSE
"randtoolbox","*","base/utils","Suggests",FALSE
"randtoolbox","*","modules/uncertainty","Imports",FALSE
"raster","*","base/visualization","Suggests",FALSE
Expand All @@ -445,6 +446,7 @@
"raster","*","modules/data.land","Suggests",FALSE
"raster","*","modules/data.remote","Suggests",FALSE
"rcrossref","*","base/db","Suggests",FALSE
"readr","*","modules/assim.sequential","Suggests",FALSE
"REddyProc","*","modules/data.atmosphere","Imports",FALSE
"redland","*","modules/data.land","Suggests",FALSE
"reshape","*","modules/data.remote","Suggests",FALSE
Expand Down Expand Up @@ -554,6 +556,7 @@
"stringr",">= 1.1.0","modules/data.atmosphere","Imports",FALSE
"suntools","*","modules/data.atmosphere","Imports",FALSE
"swfscMisc","*","modules/data.land","Imports",FALSE
"terra","*","modules/assim.sequential","Suggests",FALSE
"terra","*","modules/data.atmosphere","Imports",FALSE
"terra","*","modules/data.land","Imports",FALSE
"terra","*","modules/data.remote","Imports",FALSE
Expand Down
3 changes: 3 additions & 0 deletions modules/assim.sequential/DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -47,11 +47,14 @@ Suggests:
PEcAn.visualization,
plotrix,
plyr (>= 1.8.4),
randomForest,
raster,
readr,
reshape2 (>= 1.4.2),
rlist,
sf,
stats,
terra,
testthat,
tictoc,
tidyr,
Expand Down
93 changes: 93 additions & 0 deletions modules/assim.sequential/R/downscale_function.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
##' @title North America Downscale Function
##' @name NA_downscale
##' @author Joshua Ploshay
##'
##' @param data In quotes, file path for .rds containing ensemble data.
##' @param focus_year In quotes, if SDA site run, format is yyyy/mm/dd, if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
##' @param C_pool In quotes, carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
##' @param covariates In quotes, file path of SpatRaster stack, used as predictors in randomForest. Layers within stack should be named.
##' @param cords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
##' @details This function will downscale forecast data to unmodeled locations using covariates and site locations
##'
##' @description This function uses the randomForest model.
##'
##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.


NA_downscale <- function(data, cords, covariates, focus_year, C_pool){

# Read in the covariates and set CRS to EPSG:4326
covariates <- terra::rast(covariates) # ADD package to every function
terra::crs(covariates) <- "EPSG:4326"

# Read the input data and site coordinates
input_data <- readRDS(data)
site_coordinates <- terra::vect(readr::read_csv(cords), geom=c("lon", "lat"), crs="EPSG:4326")

# Extract the carbon data for the specified focus year
index <- which(names(input_data) == focus_year)
data <- input_data[[index]]
carbon_data <- as.data.frame(t(data[which(names(data) == C_pool)]))
names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data)))

# Extract predictors from covariates raster using site coordinates
predictors <- as.data.frame(terra::extract(covariates, site_coordinates))
predictors <- dplyr::select(predictors, -1)

# Combine each ensemble member with all predictors
ensembles <- list()
for (i in seq_along(carbon_data)) {
ensembles[[i]] <- cbind(carbon_data[[i]], predictors)
}

# Rename the carbon_data column for each ensemble member
for (i in 1:length(ensembles)) {
ensembles[[i]] <- dplyr::rename(ensembles[[i]], "carbon_data" = "carbon_data[[i]]")
}

# Split the observations in each data frame into two data frames based on the proportion of 3/4
ensembles <- lapply(ensembles, function(df) {
sample <- sample(1:nrow(df), size = round(0.75*nrow(df)))
train <- df[sample, ]
test <- df[-sample, ]
split_list <- list(train, test)
return(split_list)
})

# Rename the training and testing data frames for each ensemble member
for (i in 1:length(ensembles)) {
# names(ensembles) <- paste0("ensemble",seq(1:length(ensembles)))
names(ensembles[[i]]) <- c("training", "testing")
}

# Train a random forest model for each ensemble member using the training data
output <- list()
for (i in 1:length(ensembles)) {
output[[i]] <- randomForest::randomForest(ensembles[[i]][[1]][["carbon_data"]] ~ land_cover+tavg+prec+srad+vapr+nitrogen+phh2o+soc+sand,
data = ensembles[[i]][[1]],
ntree = 1000,
na.action = stats::na.omit,
keep.forest = T,
importance = T)
}

# Generate predictions (maps) for each ensemble member using the trained models
maps <- list(ncol(output))
for (i in 1:length(output)) {
maps[[i]] <- terra::predict(object = covariates,
model = output[[i]],na.rm = T)
}

# Organize the results into a single output list
downscale_output <- list(ensembles, output, maps)

# Rename each element of the output list with appropriate ensemble numbers
for (i in 1:length(downscale_output)) {
names(downscale_output[[i]]) <- paste0("ensemble",seq(1:length(downscale_output[[i]])))
}

# Rename the main components of the output list
names(downscale_output) <- c("data", "models", "maps")

return(downscale_output)
}
31 changes: 31 additions & 0 deletions modules/assim.sequential/man/NA_downscale.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 4564337

Please sign in to comment.