Merge pull request #44 from adamkemberling/master

documentation for survdat related functions
gulfofmaine · May 21, 2024 · 3e72d24 · 3e72d24
2 parents 7f1cdeb + 8d68147
commit 3e72d24
Show file tree

Hide file tree

Showing 42 changed files with 417 additions and 162 deletions.
diff --git a/.gitignore b/.gitignore
@@ -11,3 +11,5 @@ doc/www
 
 # Testing Space
 testing/
+inst/stylesheets/Avenir.ttc
+inst/stylesheets/Font/
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -16,7 +16,7 @@ Description: Common routines and utilities for performing various
 License: CC0
 Encoding: UTF-8
 LazyData: true
-RoxygenNote: 7.1.2
+RoxygenNote: 7.2.2
 Suggests: 
     knitr,
     rmarkdown,
@@ -36,12 +36,10 @@ Imports:
     ggplot2 (>= 2.1.1),
     grDevices (>= 3.0.0),
     dplyr,
-    here,
     ncdf4,
     raster,
     rlang,
     magick,
-    sp,
     lubridate,
     ggthemes,
     shiny,

diff --git a/R/general_helpers.R b/R/general_helpers.R
@@ -83,8 +83,7 @@ as_fahrenheit <- function(temp_c, data_type = "temperature"){
 
 
 
-#' @title Convert Daily Stack to Monthly Means
-#'
+#' @title Convert a Named Daily Raster to Monthly Means Stack
 #'
 #' @description Takes a stack of daily data with names of "XYYYY.MM.DD" and
 #' returns monthly averages.

diff --git a/R/ggthemes.R b/R/ggthemes.R
@@ -7,7 +7,6 @@
 
 
 
-# Building a GMRI theme based on Wall street Journal and NYTimes theme
 # base settings from {ggthemes}
 #' @title GMRI ggplot2 theme for blog-style plots
 #'
@@ -22,12 +21,20 @@
 #'
 #' @examples ggplot2::ggplot(mtcars) + theme_gmri()
 theme_gmri <- function(base_size = 10,
-                       base_family = "sans",
+                       base_family  = "sans",
                        title_family = "sans",
-                       facet_color = "teal",
+                       facet_color  = "teal",
                        ...) {
-  # Color from gmRi palette, sets background color
-  facet_hex <- gmri_cols()[facet_color]
+
+  # Color from gmRi palette, sets background color for facet strips
+  if(facet_color %in% c(
+    "orange", "yellow", "gmri green", "light green", "dark green",
+    "green", "teal", "blue", "gmri blue", "light gray", "dark gray")){
+    facet_hex <- gmri_cols()[facet_color]
+  } else {
+    facet_hex <- facet_color
+  }
+
 
   # Set up theme
   gmri_ggtheme <- ggthemes::theme_foundation(
@@ -37,9 +44,10 @@ theme_gmri <- function(base_size = 10,
 
       # Major Elements
       line = ggplot2::element_line(linetype = 1, colour = "black"),
-      rect = ggplot2::element_rect(fill = "transparent",
-                                   linetype = 0,
-                                   colour = NA),
+      rect = ggplot2::element_rect(
+        fill = "transparent",
+        linetype = 0,
+        colour = NA),
       text  = ggplot2::element_text(colour = "black"),
       title = ggplot2::element_text(family = title_family, size = 12),
 
@@ -56,17 +64,15 @@ theme_gmri <- function(base_size = 10,
 
       # Legend Elements
       legend.background = ggplot2::element_rect(),
-      legend.position   = "top",
-      legend.direction  = "horizontal",
-      legend.box        = "vertical",
       legend.title      = ggplot2::element_text(size = 9),
       legend.text       = ggplot2::element_text(size = 9),
 
       # Panel/Grid Setup
-      panel.grid         = ggplot2::element_line(colour = NULL,
-                                                 linetype = 3,
-                                                 size = 0.3,
-                                                 color = "gray80"),
+      panel.grid = ggplot2::element_line(
+        colour = NULL,
+        linetype = 3,
+        linewidth = 0.3,
+        color = "gray80"),
       panel.grid.major   = ggplot2::element_line(colour = "black"),
       panel.grid.major.x = ggplot2::element_blank(),
       panel.grid.minor   = ggplot2::element_blank(),
@@ -77,16 +83,16 @@ theme_gmri <- function(base_size = 10,
       plot.caption  = ggplot2::element_text(size = 7.2,
                                             margin = ggplot2::margin(t = 20),
                                             color = "gray40"),
-      #plot.margin  = ggplot2::unit(c(1, 1, 1, 1), "lines"),
       plot.margin   = ggplot2::unit(c(1, 1, 2, 1), "lines"),
 
       # Facet Details
       strip.text = ggplot2::element_text(color = "white", face = "bold", size = 11),
       strip.background = ggplot2::element_rect(
-        color = "white",
+        color = "transparent",
         fill = facet_hex,
-        size = 1,
+        linewidth = 1,
         linetype="solid")) +
+
     # Add any additional theme calls to tweak on the fly
     ggplot2::theme(...)
 
@@ -153,7 +159,7 @@ map_theme <- function(...){
 # # getting path to fonts from gmRi package, should work for any user:
 # gmri_font_paths <- paste0(system.file("stylesheets", package = "gmRi"), "/Fonts/")
 #
-# # Adding  the fonts is another challenge after that
+# # Adding the fonts is another challenge after that
 # sysfonts::font_add("http://fast.fonts.net/t/1.css?apiType=css&projectid=806f61f6-d695-4965-a878-820b50bc0269")
 #
 # # doesn't work

diff --git a/R/gmri_palettes.R b/R/gmri_palettes.R
@@ -3,15 +3,17 @@
 
 ####  Colors  ####
 gmri_colors <- c(
-  `orange`    =  "#EA4F12",
-  `yellow`    =  "#EACA00",
-  `gmri green`=  "#ABB400",
-  `dark green`=  "#3B4620",
-  `green`     =  "#407331",
-  `teal`      =  "#00736D",
-  `gmri blue` =  "#00608A",
-  `light gray`=  "#E9E9E9",
-  `dark gray` =  "#535353"
+  `orange`     =  "#EA4F12",
+  `yellow`     =  "#EACA00",
+  `gmri green` =  "#ABB400",
+  `light green`=  "#ABB400",
+  `dark green` =  "#3B4620",
+  `green`      =  "#407331",
+  `teal`       =  "#00736D",
+  `blue`       =  "#00608A",
+  `gmri blue`  =  "#00608A",
+  `light gray` =  "#E9E9E9",
+  `dark gray`  =  "#535353"
 )
 
 
@@ -22,7 +24,6 @@ gmri_colors <- c(
 #' green, teal, gmri blue, light gray, & dark gray.
 #'
 #' @param ... Character names of official GMRI colors
-#' @param as_char Boolean T/F determining how the values should be returned. Character values can be passed directly in places a color hexcode is desired.
 #' @export
 #'
 #' @examples
@@ -40,38 +41,41 @@ gmri_colors <- c(
 #' ggplot2::ggplot(mtcars, ggplot2::aes(hp, mpg)) +
 #'   ggplot2::geom_point(color = gmri_cols("gmri blue"), size = 4, alpha = .8)
 #'
-gmri_cols <- function(...,  as_char = FALSE) {
+gmri_cols <- function(...) {
   cols <- c(...)
 
-  if (as_char == FALSE) {
-   if (is.null(cols))
+  if (is.null(cols)){
       return (gmri_colors)
   }
 
-  if (as_char == TRUE) {
-    if (is.null(cols))
-      return (as.character(gmri_colors))
-  }
-
 
   # If nothing is entered into the function return them all
-  gmri_colors[cols]
+  return_cols <- gmri_colors[cols]
+  names(return_cols) <- NULL
+  return_cols
 }
 
 
 
+
+
 ####  Palettes  ####
 gmri_palettes <- list(
   # Main palette
   `main`  = gmri_cols("gmri blue", "green", "gmri green",  "yellow", "orange"),
+
   # Cool palette
   `cool`  = gmri_cols("gmri blue", "dark green", "teal"),
+
   # Hot palette
   `hot`   = gmri_cols("gmri green", "yellow", "orange"),
+
   # Mixed palette
   `mixed` = gmri_cols("orange", "yellow", "gmri green", "dark green", "green", "teal", "gmri blue"),
+
   # Gray
   `gray`  = gmri_cols("light gray", "dark gray"),
+
   # Grey for british people
   `grey`  = gmri_cols("light gray", "dark gray")
 )

diff --git a/R/nefsc_groundfish_access.R b/R/nefsc_groundfish_access.R
@@ -1,5 +1,4 @@
 #### NEFSC Trawl Data Access  ####
-
 ####
 #### NEFSC Trawl Data - Size Spectra Build
 #### 3/24/2021
@@ -19,12 +18,54 @@
 
 ######################################################_
 
-#' @title  Load survdat file with standard data filters, keep all columns
+#' @title  Tidy the Survdat Dataset
+#' @description Processing function to tidy/prepare the "survdat" groundfish survey dataset received
+#' from the Northeast Fisheries Science Center. This function performs all common steps done when
+#' preparing the data for any analyses that rely on abundance or biomass by species and the details
+#' of where/when they were caught.
+#'
+#' This function will by default load the most up-to-date version of the dataset that has been
+#' received from the NEFSC using survdat = NULL. Optionally, users may provide a dataframe
+#' from the environment to be prepared using the same steps.
+#'
+#' The processing steps performed by this function include:
+#'
+#'  - loading a specific survdat dataset: "most recent" loads the most current and complete dataset.
+#'  "bigelow" returns data sampled only by the RV bigelow, in its raw form, with no adjustments to
+#'  catch that transform numbers to be more consistent with the RV albatross. "bio" loads the
+#'  biological dataset, which contains additional details that require follow-up lab procedures like
+#'  age information
+#'
+#'  - Flag and create any columns that are missing or inconsistent with how the dataset has been
+#'  sent over time. Messages will appear in the terminal to accompany any columns created or modified
 #'
+#'  -  Perform column formatting: length and biomass are renamed to be unit specific length_cm &
+#'  biomass_kg. Survey stratum numbers are pulled from the longer stratum field, these are used to
+#'  match up to the fields of the shapefiles for them. comname values are converted to be all
+#'  lowercase. The id field is formatted to not read as scientific, svspp is treated as a string.
 #'
-#' @description Processing function to prepare survdat data for size spectra analyses.
-#' Options to select various survdat pulls, or provide your own as a dataframe
-#' from the environment if available.
+#'  - Perform row filtering: eliminate stratum that are no longer sampled or sampled inconsistently
+#'  (values less than 01010 or greater than 01760 removed, in addition to 1310, 1320, 1330, 1350,
+#'  1410, 1420, & 1490). Any rows without abundance or biomass information are dropped.
+#'  Select species codes are also removed (0, 285-299, 305, 306, 307, 316, 323, 910-915, 955-961,
+#'  978, 979, 980, 998)
+#'
+#'  - Perform spatial filters: Data is kept for all strata within these major regional definitions:
+#'  "Georges Bank" = 13-23, "Gulf of Maine" = 24-40, "Southern New England"  01-12,
+#'  "Mid-Atlantic Bight" = 61-76.
+#'
+#'  - Perform numlen (numbers at length) adjustment: numlen is not adjusted to correct for the
+#'  change in survey vessels and gear that happened in 2008. These values consequently are not
+#'  equal to the overall abundance of a species, nor total biomass of a species which are
+#'  systematically adjusted to adjust for the gear change.
+#'
+#'  Because of this and also some instances of bad data, there are cases where more/less fishes are
+#'  measured than initially tallied* in the abundance field. This section ensures that the numlen
+#'  totals for a station & species are equal to abundance column (which has been adjusted already
+#'  for the gear change.)
+#'
+#'  - Remove any duplicate records: One final step is the verification that any duplicated records
+#'  are removed.
 #'
 #'
 #' @param survdat optional starting dataframe in the R environment to run through size spectra build.
@@ -249,17 +290,6 @@ gmri_survdat_prep <- function(survdat = NULL, survdat_source = "most recent", bo
     stratum != 1490)
 
 
-  # Filter to just Spring and Fall
-  trawldat <- dplyr::filter(trawldat, season %in% c("Spring", "Fall"))
-  trawldat <- dplyr::mutate(trawldat, season = factor(season, levels = c("Spring", "Fall")))
-
-
-  # Filter years
-  trawldat <- dplyr::filter(
-    .data = trawldat,
-    est_year >= 1970,
-    est_year < 2020)
-
   # Drop NA Biomass and Abundance Records
   trawldat <- dplyr::filter(
     .data = trawldat,
@@ -426,6 +456,17 @@ gmri_survdat_prep <- function(survdat = NULL, survdat_source = "most recent", bo
 #' @description calculate expected biomass-at-length for species based on
 #' published length-weight relationships.
 #'
+#' Species are matched against a spreadsheet containing length and weight information from 2
+#' sources. The first source is the length-weight relationships detailed in Wigley et al. 2003:
+#' "Length-weight relationships for 74 fish species collected during NEFSC research vessel bottom
+#' trawl surveys, 1992-99".
+#'
+#' The second source for matching growth details to species is fishbase. These values are known to
+#' be potentially less accurate or less regionally specific.
+#'
+#' Pairings are first checked against the Wigley paper, and then by fishbase, to provide preference
+#' to the more regionally focused source.
+#'
 #' @param survdat_clean Survdat data, after usual preparations are completed.
 #' These include removal of old strata, labeling of areas of interest, and inclusion
 #' of the annual effort in each.
@@ -537,7 +578,6 @@ add_lw_info <- function(survdat_clean, cutoff = FALSE, box_location = "root|clou
   # 15% difference in either direction were flagged for removal
   # code: github.com/adamkemberling/nefsc_trawl/R/qa_qc_reports/stratification_validation
   # list updated : 8/27/2021
-  #
   cutoff_15 <- c(
     "acadian redfish", "american plaice",
     "american shad",
@@ -756,9 +796,18 @@ add_epu_info <- function(trawldat, box_location = "root|cloudstorage"){
 # Add area stratified biomass function
 #' @title Add Survey Area Stratified Abundances and Biomasses
 #'
-#' @description Take the survdat data paired with length weight relationships and
-#' return estimates of area stratified catch rates and their expected abundances and
-#' biomasses when applied to the total areas of stratum.
+#' @description Take the survdat data which has been cleaned with gmRi::gmri_survdat_prep, that is
+#' paired with length weight relationships using gmRi::add_lw_info and estimate the area stratified
+#' catch rates and their expected abundances and biomasses when those catch rates are applied to
+#' the total areas of their respective strata.
+#'
+#' Area-stratified catch rates are calculated independently for each species, every year, within
+#' each strata, and by each season. i.e. 1982 spring cpue of acadian redfish informs the
+#' area-stratified catch of acadian redfish in spring of 1982.
+#'
+#' Constants for the area-towed and the catchability coefficient are as followed:
+#' - Area covered by an albatross standard tow in km2 = 0.0384
+#' - Catchability coefficient - ideally should change for species guilds: q = 1
 #'
 #' @param survdat_weights Input dataframe, produced by add_lw_info
 #' @param include_epu Flag for calculating the EPU rates in addition to the stratum regions we use.