Merge pull request #3211 from JoshuaPloshay/develop

Add the down_scale function to pecan.
PecanProject · Mar 7, 2024 · 4564337 · 4564337
2 parents fc65daf + beb21fd
commit 4564337
Show file tree

Hide file tree

Showing 5 changed files with 155 additions and 26 deletions.
diff --git a/docker/depends/pecan.depends.R b/docker/depends/pecan.depends.R
@@ -15,7 +15,7 @@ condense_version_requirements <- function(specs) {
   specs <- unique(specs[specs != "*"])
   versions <- package_version(
     gsub("[^[:digit:].-]+", "", specs))
-
+  
   if ((length(unique(versions)) > 1) && any(!grepl(">", specs))) {
     # Can't assume the latest version works for all, so give up.
     # We *could* write more to handle this case if needed, but it seems very rare:
@@ -39,30 +39,30 @@ condense_version_requirements <- function(specs) {
 # Install <version> or newer,
 # upgrading dependencies only if needed to satisfy stated version requirements
 ensure_version <- function(pkg, version) {
-    vers <- gsub('[^[:digit:].-]+', '', version)
-    cmp <- get(gsub('[^<>=]+', '', version))
-    ok <- requireNamespace(pkg, quietly = TRUE) &&
-        cmp(packageVersion(pkg), vers)
-    if (!ok) {
-        # install pkg and any *missing* dependencies
-        remotes::install_version(pkg, version, dependencies = TRUE, upgrade = FALSE)
-        # Now check for installed but *incompatible* dependencies
-        # (install_version doesn't resolve these when upgrade=FALSE)
-        dep <- desc::desc_get_deps(system.file("DESCRIPTION", package = pkg))
-        dep <- dep[
-            dep$type %in% c("Depends", "Imports", "LinkingTo")
-            & dep$version != "*"
-            & dep$package != "R",]
-        invisible(Map(ensure_version, dep$package, dep$version))
-    }
-
+  vers <- gsub('[^[:digit:].-]+', '', version)
+  cmp <- get(gsub('[^<>=]+', '', version))
+  ok <- requireNamespace(pkg, quietly = TRUE) &&
+    cmp(packageVersion(pkg), vers)
+  if (!ok) {
+    # install pkg and any *missing* dependencies
+    remotes::install_version(pkg, version, dependencies = TRUE, upgrade = FALSE)
+    # Now check for installed but *incompatible* dependencies
+    # (install_version doesn't resolve these when upgrade=FALSE)
+    dep <- desc::desc_get_deps(system.file("DESCRIPTION", package = pkg))
+    dep <- dep[
+      dep$type %in% c("Depends", "Imports", "LinkingTo")
+      & dep$version != "*"
+      & dep$package != "R",]
+    invisible(Map(ensure_version, dep$package, dep$version))
+  }
+  
 }
 
 # Read list of dependencies.
 # NOTE: These files are autogenerated --
 # use scripts/generate_dependencies.R to edit them.
 all_deps <- read.csv("pecan_package_dependencies.csv") |>
-    subset(!is_pecan)
+  subset(!is_pecan)
 gh_repos <- readLines("pecan_deps_from_github.txt")
 
 
@@ -74,9 +74,9 @@ remotes::install_github(gh_repos, lib = rlib)
 
 # For deps used by multiple packages, find a version that works for all
 uniq_deps <- tapply(
-        all_deps$version,
-        INDEX = all_deps$package,
-        FUN = condense_version_requirements)
+  all_deps$version,
+  INDEX = all_deps$package,
+  FUN = condense_version_requirements)
 
 
 # Install deps that declare no version restriction.
@@ -92,9 +92,8 @@ install.packages(missing, lib = rlib)
 # it can't fill the version req from snapshot versions.
 # (Assumes our CRAN uses the same URL scheme as Posit package manager)
 options(repos = c(
-    getOption('repos'),
-    sub(r'(\d{4}-\d{2}-\d{2})', 'latest', getOption('repos'))
+  getOption('repos'),
+  sub(r'(\d{4}-\d{2}-\d{2})', 'latest', getOption('repos'))
 ))
 versioned <- uniq_deps[uniq_deps != "*"]
-invisible(Map(ensure_version, names(versioned), versioned))
-
+invisible(Map(ensure_version, names(versioned), versioned))
diff --git a/docker/depends/pecan_package_dependencies.csv b/docker/depends/pecan_package_dependencies.csv
@@ -437,6 +437,7 @@
 "purrr",">= 0.2.3","modules/data.atmosphere","Imports",FALSE
 "pwr","*","modules/rtm","Suggests",FALSE
 "R.utils","*","base/db","Imports",FALSE
+"randomForest","*","modules/assim.sequential","Suggests",FALSE
 "randtoolbox","*","base/utils","Suggests",FALSE
 "randtoolbox","*","modules/uncertainty","Imports",FALSE
 "raster","*","base/visualization","Suggests",FALSE
@@ -445,6 +446,7 @@
 "raster","*","modules/data.land","Suggests",FALSE
 "raster","*","modules/data.remote","Suggests",FALSE
 "rcrossref","*","base/db","Suggests",FALSE
+"readr","*","modules/assim.sequential","Suggests",FALSE
 "REddyProc","*","modules/data.atmosphere","Imports",FALSE
 "redland","*","modules/data.land","Suggests",FALSE
 "reshape","*","modules/data.remote","Suggests",FALSE
@@ -554,6 +556,7 @@
 "stringr",">= 1.1.0","modules/data.atmosphere","Imports",FALSE
 "suntools","*","modules/data.atmosphere","Imports",FALSE
 "swfscMisc","*","modules/data.land","Imports",FALSE
+"terra","*","modules/assim.sequential","Suggests",FALSE
 "terra","*","modules/data.atmosphere","Imports",FALSE
 "terra","*","modules/data.land","Imports",FALSE
 "terra","*","modules/data.remote","Imports",FALSE

diff --git a/modules/assim.sequential/DESCRIPTION b/modules/assim.sequential/DESCRIPTION
@@ -47,11 +47,14 @@ Suggests:
     PEcAn.visualization,
     plotrix,
     plyr (>= 1.8.4),
+    randomForest,
     raster,
+    readr,
     reshape2 (>= 1.4.2),
     rlist,
     sf,
     stats,
+    terra,
     testthat,
     tictoc,
     tidyr,

diff --git a/modules/assim.sequential/R/downscale_function.R b/modules/assim.sequential/R/downscale_function.R
@@ -0,0 +1,93 @@
+##' @title North America Downscale Function
+##' @name NA_downscale
+##' @author Joshua Ploshay
+##'
+##' @param data  In quotes, file path for .rds containing ensemble data.
+##' @param focus_year In quotes, if SDA site run, format is yyyy/mm/dd, if NEON, yyyy-mm-dd. Restricted to years within file supplied to 'data'.
+##' @param C_pool In quotes, carbon pool of interest. Name must match carbon pool name found within file supplied to 'data'.
+##' @param covariates In quotes, file path of SpatRaster stack, used as predictors in randomForest. Layers within stack should be named.
+##' @param cords In quotes, file path for .csv file containing the site coordinates, columns named "lon" and "lat".
+##' @details This function will downscale forecast data to unmodeled locations using covariates and site locations
+##'
+##' @description This function uses the randomForest model.
+##'
+##' @return It returns the `downscale_output` list containing lists for the training and testing data sets, models, and predicted maps for each ensemble member.
+
+
+NA_downscale <- function(data, cords, covariates, focus_year, C_pool){
+
+  # Read in the covariates and set CRS to EPSG:4326
+  covariates <- terra::rast(covariates) # ADD package to every function
+  terra::crs(covariates) <- "EPSG:4326"
+
+  # Read the input data and site coordinates
+  input_data <- readRDS(data)
+  site_coordinates <- terra::vect(readr::read_csv(cords), geom=c("lon", "lat"), crs="EPSG:4326")
+
+  # Extract the carbon data for the specified focus year
+  index <- which(names(input_data) == focus_year)
+  data <- input_data[[index]]
+  carbon_data <- as.data.frame(t(data[which(names(data) == C_pool)]))
+  names(carbon_data) <- paste0("ensemble",seq(1:ncol(carbon_data)))
+
+  # Extract predictors from covariates raster using site coordinates
+  predictors <- as.data.frame(terra::extract(covariates, site_coordinates)) 
+  predictors <- dplyr::select(predictors, -1)
+
+  # Combine each ensemble member with all predictors
+  ensembles <- list()
+  for (i in seq_along(carbon_data)) {
+    ensembles[[i]] <- cbind(carbon_data[[i]], predictors)
+  }
+
+  # Rename the carbon_data column for each ensemble member
+  for (i in 1:length(ensembles)) {
+    ensembles[[i]] <- dplyr::rename(ensembles[[i]], "carbon_data" = "carbon_data[[i]]")
+  }
+
+  # Split the observations in each data frame into two data frames based on the proportion of 3/4
+  ensembles <- lapply(ensembles, function(df) {
+    sample <- sample(1:nrow(df), size = round(0.75*nrow(df)))
+    train  <- df[sample, ]
+    test   <- df[-sample, ]
+    split_list <- list(train, test)
+    return(split_list)
+  })
+
+  # Rename the training and testing data frames for each ensemble member
+  for (i in 1:length(ensembles)) {
+    # names(ensembles) <- paste0("ensemble",seq(1:length(ensembles)))
+    names(ensembles[[i]]) <- c("training", "testing")
+  }
+
+  # Train a random forest model for each ensemble member using the training data
+  output <- list()
+  for (i in 1:length(ensembles)) {
+    output[[i]] <- randomForest::randomForest(ensembles[[i]][[1]][["carbon_data"]] ~ land_cover+tavg+prec+srad+vapr+nitrogen+phh2o+soc+sand,
+                                data = ensembles[[i]][[1]],
+                                ntree = 1000,
+                                na.action = stats::na.omit,
+                                keep.forest = T,
+                                importance = T)
+  }
+
+  # Generate predictions (maps) for each ensemble member using the trained models
+  maps <- list(ncol(output))
+  for (i in 1:length(output)) {
+    maps[[i]] <- terra::predict(object = covariates,
+                         model = output[[i]],na.rm = T)
+  }
+
+  # Organize the results into a single output list
+  downscale_output <- list(ensembles, output, maps)
+
+  # Rename each element of the output list with appropriate ensemble numbers
+  for (i in 1:length(downscale_output)) {
+    names(downscale_output[[i]]) <- paste0("ensemble",seq(1:length(downscale_output[[i]])))
+  }
+
+  # Rename the main components of the output list
+  names(downscale_output) <- c("data", "models", "maps")
+
+  return(downscale_output)
+}
diff --git a/modules/assim.sequential/man/NA_downscale.Rd b/modules/assim.sequential/man/NA_downscale.Rd