Merge branch 'develop' into amfLBL-fix

PecanProject · Sep 23, 2024 · c5c52b7 · c5c52b7
2 parents 0e1544f + fbac460
commit c5c52b7
Show file tree

Hide file tree

Showing 6 changed files with 224 additions and 44 deletions.
diff --git a/.github/workflows/download-met-data.yml b/.github/workflows/download-met-data.yml
@@ -0,0 +1,57 @@
+name : Test Data Download
+on :
+  # allow manual triggering
+  workflow_dispatch:
+
+  schedule:
+    # run Thursday 4:30 AM UTC
+  - cron: '30 4 * * 4'
+
+env:
+  R_LIBS_USER: /usr/local/lib/R/site-library
+  LC_ALL: en_US.UTF-8
+  NCPUS: 2
+  PGHOST: postgres
+  CI: true
+
+jobs:
+  met-data-download:
+    runs-on: ubuntu-latest
+    env:
+      GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }}
+
+    services:
+      postgres:
+        image: mdillon/postgis:9.5
+        options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5
+
+    container: 
+      image: pecan/depends:R4.1
+
+    steps:
+    # checkout source code
+    - name: work around https://github.com/actions/checkout/issues/766
+      run: git config --global --add safe.directory "$GITHUB_WORKSPACE"
+    - uses: actions/checkout@v4
+      with:
+        set-safe-directory: false
+
+    # install additional tools needed
+    - name: install utils
+      run: apt-get update && apt-get install -y postgresql-client qpdf
+    - name: install new dependencies
+      run: Rscript scripts/generate_dependencies.R && cd docker/depends && Rscript pecan.depends.R
+
+    # initialize database
+    - name: db setup
+      uses: docker://pecan/db:ci
+    - name: add models to db
+      run: ./scripts/add.models.sh
+
+    # compile PEcAn code
+    - name: build
+      run: make -j1
+
+    - name: CRUNCEP
+      run: |
+        Rscript ./tests/test_met_downloads.R --settings ./tests/met_download_settings/docker.CRUNCEP.xml
diff --git a/base/db/R/dbfiles.R b/base/db/R/dbfiles.R
@@ -50,9 +50,8 @@ dbfile.input.insert <- function(in.path, in.prefix, siteid, startdate, enddate,
 
 
   # setup parent part of query if specified
-  if (is.na(parentid)) {
-    parent <- ""
-  } else {
+  parent <- ""
+  if (!is.na(parentid)) {
     parent <- paste0(" AND parent_id=", parentid)
   }
 
@@ -242,13 +241,13 @@ dbfile.input.check <- function(siteid, startdate = NULL, enddate = NULL, mimetyp
   formatid <- get.id(table = "formats", colnames = c("mimetype_id", "name"), values = c(mimetypeid, formatname), con = con)
 
   if (is.null(formatid)) {
-    invisible(data.frame())
+    return (invisible(data.frame()))
   }
 
   # setup parent part of query if specified
-  if (is.na(parentid)) {
-    parent <- ""
-  } else {
+  parent <- ""
+
+  if (!is.na(parentid)) {
     parent <- paste0(" AND parent_id=", parentid)
   }
 
@@ -450,7 +449,7 @@ dbfile.posterior.check <- function(pft, mimetype, formatname, con, hostname = PE
   # find appropriate pft
   pftid <- get.id(table = "pfts", values = "name", colnames = pft, con = con)
   if (is.null(pftid)) {
-    invisible(data.frame())
+    return (invisible(data.frame()))
   }
 
   # find appropriate format
@@ -461,7 +460,7 @@ dbfile.posterior.check <- function(pft, mimetype, formatname, con, hostname = PE
   formatid <- get.id(table = "formats", colnames = c("mimetype_id", "name"), values = c(mimetypeid, formatname), con = con)
 
   if (is.null(formatid)) {
-    invisible(data.frame())
+    return (invisible(data.frame()))
   }
 
   # find appropriate posterior
@@ -473,7 +472,7 @@ dbfile.posterior.check <- function(pft, mimetype, formatname, con, hostname = PE
     con = con
   )[["id"]]
   if (is.null(posteriorid)) {
-    invisible(data.frame())
+    return (invisible(data.frame()))
   }
 
   invisible(dbfile.check(type = "Posterior", container.id = posteriorid, con = con, hostname = hostname))
@@ -639,12 +638,12 @@ dbfile.file <- function(type, id, con, hostname = PEcAn.remote::fqdn()) {
 
   if (nrow(files) > 1) {
     PEcAn.logger::logger.warn("multiple files found for", id, "returned; using the first one found")
-    invisible(file.path(files[1, "file_path"], files[1, "file_name"]))
+    return(invisible(file.path(files[1, "file_path"], files[1, "file_name"])))
   } else if (nrow(files) == 1) {
-    invisible(file.path(files[1, "file_path"], files[1, "file_name"]))
+    return(invisible(file.path(files[1, "file_path"], files[1, "file_name"])))
   } else {
     PEcAn.logger::logger.warn("no files found for ", id, "in database")
-    invisible(NA)
+    return(invisible(NA))
   }
 }
 
@@ -662,7 +661,8 @@ dbfile.id <- function(type, file, con, hostname = PEcAn.remote::fqdn()) {
   # find appropriate host
   hostid <- db.query(query = paste0("SELECT id FROM machines WHERE hostname='", hostname, "'"), con = con)[["id"]]
   if (is.null(hostid)) {
-    invisible(NA)
+    PEcAn.logger::logger.warn("hostid not found in database")
+    return (invisible(NA))
   }
 
   # find file

diff --git a/book_source/03_topical_pages/93_installation/03_install_OS/04_Installing-PEcAn-OSX.Rmd b/book_source/03_topical_pages/93_installation/03_install_OS/04_Installing-PEcAn-OSX.Rmd
@@ -1,19 +1,20 @@
 ### Mac OSX {#macosx}
 
-These are specific notes for installing PEcAn on Mac OSX and will be referenced from the main [installing PEcAn](Installing-PEcAn) page. You will at least need to install the build environment and Postgres sections. If you want to access the database/PEcAn using a web browser you will need to install Apache. To access the database using the BETY interface, you will need to have Ruby installed.
+These are specific notes for installing PEcAn on Mac OSX and referenced from the [installing PEcAn](Installing-PEcAn) page. 
 
-This document also contains information on how to install the Rstudio server edition as well as any other packages that can be helpful.
+Build environment and Postgres sections are required to install and use PEcAn.
 
+Optional software includes Apache, Rails, and Rstudio. Apache is required to run the BETYdb UI and the web-based version of PEcAn - both are optional. Rails is requred to use the BETYdb web interface. Rstudio is a commonly used IDE for R.
 
 #### Install build environment
 
-```bash
-# install R
-# download from http://cran.r-project.org/bin/macosx/
+##### Option 1: Download and install
+
+R: download from http://cran.r-project.org/bin/macosx/
 
-# install gfortran 
-# download from http://cran.r-project.org/bin/macosx/tools/
+gfortran: download from http://cran.r-project.org/bin/macosx/tools/
 
+```bash
 # install OpenMPI
 curl -o openmpi-1.6.3.tar.gz http://www.open-mpi.org/software/ompi/v1.6/downloads/openmpi-1.6.3.tar.gz
 tar zxf openmpi-1.6.3.tar.gz
@@ -42,14 +43,63 @@ sudo make install
 cd ..
 ```
 
-#### Install Postgres
+##### Option 2: Homebrew
+
+```bash
+# R
+brew install --cask r
+# gfortran
+brew install gcc
+# OpenMPI
+brew install open-mpi
+# szip
+brew install szip
+# HDF5
+brew install hdf5 
+## homebrew should configure hdf5 with fortran and cxx, otherwise:
+## brew install hdf5 --with-fortran --with-cxx
+```
 
-For those on a Mac I use the following app for postgresql which has
-postgis already installed (http://postgresapp.com/)
 
-To get postgis run the following commands in psql:
+#### Install Postgres and PostGIS
+
+##### Option 1: 
+
+For MacOS, the Postgres.app provides Postgres with PostGIS
+already installed (http://postgresapp.com/).
+
+To run Postgres:
+*	Open Postgres.app.
+* In the menu bar, click the elephant icon and select “Open psql”.
+
+##### Option 2: install using homebrew:
 
 ```bash
+# optional: remove existing postgres installations with:
+# brew uninstall --force postgresql
+
+# install Postgres, fixed at v12 (officially supported by BETYdb):
+brew install postgres@12
+brew pin postgres@12
+
+# PostGIS
+brew install postgis
+
+# to run Postgres:
+brew services start postgresql
+```
+
+#### Enable PostGIS
+
+To enable PostGIS, you should start Postgres:
+
+```bash
+psql postgres
+```
+
+And then run the following commands:
+
+```sql
 ##### Enable PostGIS (includes raster)
 CREATE EXTENSION postgis;
 ##### Enable Topology
@@ -60,23 +110,42 @@ CREATE EXTENSION fuzzystrmatch;
 CREATE EXTENSION postgis_tiger_geocoder;
 ```
 
-To check your postgis run the following command again in psql: `SELECT PostGIS_full_version();`
+To check your postgis run the following command again in psql: 
+
+```sql
+SELECT PostGIS_full_version();`
+```
 
 #### Additional installs
 
 
 ##### Install JAGS
 
-Download JAGS from http://sourceforge.net/projects/mcmc-jags/files/JAGS/3.x/Mac%20OS%20X/JAGS-Mavericks-3.4.0.dmg/download
+
+##### Option 1: using homebrew
+
+```bash
+brew install jags
+```
+
+##### Option 2: Download 
+
+Download JAGS from http://sourceforge.net/projects/mcmc-jags/files/JAGS/3.x/Mac%20OS%20X/JAGS-Mavericks-3.4.0.dmg/download.
+
 
 ##### Install udunits
 
-Installing udunits-2 on MacOSX is done from source.
+##### Option 1: Install using homebrew:
+
+```bash
+brew install udunits
+```
+
+##### Option 2: Install udunits-2 on MacOSX is done from source.
 
 * download most recent [version of Udunits here](http://www.unidata.ucar.edu/downloads/udunits/index.jsp)
 * instructions for [compiling from source](http://www.unidata.ucar.edu/software/udunits/udunits-2/udunits2.html#Obtain)
 
-
 ```bash
 curl -o udunits-2.1.24.tar.gz ftp://ftp.unidata.ucar.edu/pub/udunits/udunits-2.1.24.tar.gz
 tar zxf udunits-2.1.24.tar.gz
@@ -86,7 +155,7 @@ make
 sudo make install
 ```
 
-#### Apache Configuration
+#### Apache Configuration  (Optional)
 
 Mac does not support pdo/postgresql by default. The easiest way to install is use: http://php-osx.liip.ch/
 
@@ -102,10 +171,24 @@ Alias /pecan ${PWD}/pecan/web
 EOF
 ```
 
-#### Ruby
+#### Ruby  (Optional)
+
+_Note: it is recommended that BETYdb be run using Docker because the application uses unsupported versions of Ruby and Rails._
+
+The BETYdb application requires Ruby version 2.7.7, as specified in [PecanProject/bety/.ruby-version](https://github.com/PecanProject/bety/blob/develop/.ruby-version). 
+
+```bash
+brew install rbenv
+rbenv init
+rbenv install 2.7.7 
+```
+
+#### Rstudio (Optional)
 
-The default version of ruby should work. Or use [JewelryBox](https://jewelrybox.unfiniti.com/).
+For MacOS, you can download [Rstudio Desktop](http://www.rstudio.com/).
 
-#### Rstudio Server
+Or using homebrew:
 
-For the mac you can download [Rstudio Desktop](http://www.rstudio.com/).
+```bash
+brew install --cask rstudio
+```
diff --git a/modules/data.atmosphere/R/met.process.R b/modules/data.atmosphere/R/met.process.R
@@ -255,13 +255,14 @@ met.process <- function(site, input_met, start_date, end_date, model,
   #--------------------------------------------------------------------------------------------------#
   # Change to Site Level - Standardized Met (i.e. ready for conversion to model specific format)
   if (stage$standardize) {
-    standardize_result <- list()
-
+    id_stdized <- list()
+    ready.id <- list(input.id = NULL, dbfile.id = NULL)
+
     for (i in seq_along(cf.id[[1]])) {
 
       if (register$scale == "regional") {
         #### Site extraction
-        standardize_result[[i]] <- .extract.nc.module(cf.id = list(input.id = cf.id$container_id[i],
+        id_stdized <- .extract.nc.module(cf.id = list(input.id = cf.id$container_id[i],
                                                                    dbfile.id = cf.id$id[i]), 
                                        register = register, 
                                        dir = dir, 
@@ -277,7 +278,7 @@ met.process <- function(site, input_met, start_date, end_date, model,
                                        # Expand to support ensemble names in the future
       } else if (register$scale == "site") {
         ##### Site Level Processing
-        standardize_result[[i]] <- .metgapfill.module(cf.id = list(input.id = cf.id$input.id[i], dbfile.id = cf.id$dbfile.id[i]), 
+        id_stdized <- .metgapfill.module(cf.id = list(input.id = cf.id$input.id[i], dbfile.id = cf.id$dbfile.id[i]), 
                                        register = register,
                                        dir = dir,
                                        met = met, 
@@ -288,15 +289,15 @@ met.process <- function(site, input_met, start_date, end_date, model,
                                        host = host, 
                                        overwrite = overwrite$standardize,
                                        ensemble_name = i)
+      } else {
+        # No action taken. These ids will be dropped from ready.id
+        id_stdized <- NULL
       }
+
+      ready.id$input.id <- c(ready.id$input.id, id_stdized$input.id)
+      ready.id$dbfile.id <- c(ready.id$dbfile.id, id_stdized$dbfile.id)
 
     } # End for loop
-    ready.id <- list(input.id = NULL, dbfile.id = NULL)
-
-    for (i in seq_along(standardize_result)) {
-      ready.id$input.id <- c(ready.id$input.id, standardize_result[[i]]$input.id)
-      ready.id$dbfile.id <- c(ready.id$dbfile.id, standardize_result[[i]]$dbfile.id)
-    }
 
   } else {
     ready.id <- input_met$id