Skip to content

Commit

Permalink
Merge pull request #44 from adamkemberling/master
Browse files Browse the repository at this point in the history
documentation for survdat related functions
  • Loading branch information
adamkemberling authored May 21, 2024
2 parents 7f1cdeb + 8d68147 commit 3e72d24
Show file tree
Hide file tree
Showing 42 changed files with 417 additions and 162 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ doc/www

# Testing Space
testing/
inst/stylesheets/Avenir.ttc
inst/stylesheets/Font/
4 changes: 1 addition & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ Description: Common routines and utilities for performing various
License: CC0
Encoding: UTF-8
LazyData: true
RoxygenNote: 7.1.2
RoxygenNote: 7.2.2
Suggests:
knitr,
rmarkdown,
Expand All @@ -36,12 +36,10 @@ Imports:
ggplot2 (>= 2.1.1),
grDevices (>= 3.0.0),
dplyr,
here,
ncdf4,
raster,
rlang,
magick,
sp,
lubridate,
ggthemes,
shiny,
Expand Down
3 changes: 1 addition & 2 deletions R/general_helpers.R
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,7 @@ as_fahrenheit <- function(temp_c, data_type = "temperature"){



#' @title Convert Daily Stack to Monthly Means
#'
#' @title Convert a Named Daily Raster to Monthly Means Stack
#'
#' @description Takes a stack of daily data with names of "XYYYY.MM.DD" and
#' returns monthly averages.
Expand Down
44 changes: 25 additions & 19 deletions R/ggthemes.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@



# Building a GMRI theme based on Wall street Journal and NYTimes theme
# base settings from {ggthemes}
#' @title GMRI ggplot2 theme for blog-style plots
#'
Expand All @@ -22,12 +21,20 @@
#'
#' @examples ggplot2::ggplot(mtcars) + theme_gmri()
theme_gmri <- function(base_size = 10,
base_family = "sans",
base_family = "sans",
title_family = "sans",
facet_color = "teal",
facet_color = "teal",
...) {
# Color from gmRi palette, sets background color
facet_hex <- gmri_cols()[facet_color]

# Color from gmRi palette, sets background color for facet strips
if(facet_color %in% c(
"orange", "yellow", "gmri green", "light green", "dark green",
"green", "teal", "blue", "gmri blue", "light gray", "dark gray")){
facet_hex <- gmri_cols()[facet_color]
} else {
facet_hex <- facet_color
}


# Set up theme
gmri_ggtheme <- ggthemes::theme_foundation(
Expand All @@ -37,9 +44,10 @@ theme_gmri <- function(base_size = 10,

# Major Elements
line = ggplot2::element_line(linetype = 1, colour = "black"),
rect = ggplot2::element_rect(fill = "transparent",
linetype = 0,
colour = NA),
rect = ggplot2::element_rect(
fill = "transparent",
linetype = 0,
colour = NA),
text = ggplot2::element_text(colour = "black"),
title = ggplot2::element_text(family = title_family, size = 12),

Expand All @@ -56,17 +64,15 @@ theme_gmri <- function(base_size = 10,

# Legend Elements
legend.background = ggplot2::element_rect(),
legend.position = "top",
legend.direction = "horizontal",
legend.box = "vertical",
legend.title = ggplot2::element_text(size = 9),
legend.text = ggplot2::element_text(size = 9),

# Panel/Grid Setup
panel.grid = ggplot2::element_line(colour = NULL,
linetype = 3,
size = 0.3,
color = "gray80"),
panel.grid = ggplot2::element_line(
colour = NULL,
linetype = 3,
linewidth = 0.3,
color = "gray80"),
panel.grid.major = ggplot2::element_line(colour = "black"),
panel.grid.major.x = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank(),
Expand All @@ -77,16 +83,16 @@ theme_gmri <- function(base_size = 10,
plot.caption = ggplot2::element_text(size = 7.2,
margin = ggplot2::margin(t = 20),
color = "gray40"),
#plot.margin = ggplot2::unit(c(1, 1, 1, 1), "lines"),
plot.margin = ggplot2::unit(c(1, 1, 2, 1), "lines"),

# Facet Details
strip.text = ggplot2::element_text(color = "white", face = "bold", size = 11),
strip.background = ggplot2::element_rect(
color = "white",
color = "transparent",
fill = facet_hex,
size = 1,
linewidth = 1,
linetype="solid")) +

# Add any additional theme calls to tweak on the fly
ggplot2::theme(...)

Expand Down Expand Up @@ -153,7 +159,7 @@ map_theme <- function(...){
# # getting path to fonts from gmRi package, should work for any user:
# gmri_font_paths <- paste0(system.file("stylesheets", package = "gmRi"), "/Fonts/")
#
# # Adding the fonts is another challenge after that
# # Adding the fonts is another challenge after that
# sysfonts::font_add("http://fast.fonts.net/t/1.css?apiType=css&projectid=806f61f6-d695-4965-a878-820b50bc0269")
#
# # doesn't work
Expand Down
42 changes: 23 additions & 19 deletions R/gmri_palettes.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,17 @@

#### Colors ####
gmri_colors <- c(
`orange` = "#EA4F12",
`yellow` = "#EACA00",
`gmri green`= "#ABB400",
`dark green`= "#3B4620",
`green` = "#407331",
`teal` = "#00736D",
`gmri blue` = "#00608A",
`light gray`= "#E9E9E9",
`dark gray` = "#535353"
`orange` = "#EA4F12",
`yellow` = "#EACA00",
`gmri green` = "#ABB400",
`light green`= "#ABB400",
`dark green` = "#3B4620",
`green` = "#407331",
`teal` = "#00736D",
`blue` = "#00608A",
`gmri blue` = "#00608A",
`light gray` = "#E9E9E9",
`dark gray` = "#535353"
)


Expand All @@ -22,7 +24,6 @@ gmri_colors <- c(
#' green, teal, gmri blue, light gray, & dark gray.
#'
#' @param ... Character names of official GMRI colors
#' @param as_char Boolean T/F determining how the values should be returned. Character values can be passed directly in places a color hexcode is desired.
#' @export
#'
#' @examples
Expand All @@ -40,38 +41,41 @@ gmri_colors <- c(
#' ggplot2::ggplot(mtcars, ggplot2::aes(hp, mpg)) +
#' ggplot2::geom_point(color = gmri_cols("gmri blue"), size = 4, alpha = .8)
#'
gmri_cols <- function(..., as_char = FALSE) {
gmri_cols <- function(...) {
cols <- c(...)

if (as_char == FALSE) {
if (is.null(cols))
if (is.null(cols)){
return (gmri_colors)
}

if (as_char == TRUE) {
if (is.null(cols))
return (as.character(gmri_colors))
}


# If nothing is entered into the function return them all
gmri_colors[cols]
return_cols <- gmri_colors[cols]
names(return_cols) <- NULL
return_cols
}





#### Palettes ####
gmri_palettes <- list(
# Main palette
`main` = gmri_cols("gmri blue", "green", "gmri green", "yellow", "orange"),

# Cool palette
`cool` = gmri_cols("gmri blue", "dark green", "teal"),

# Hot palette
`hot` = gmri_cols("gmri green", "yellow", "orange"),

# Mixed palette
`mixed` = gmri_cols("orange", "yellow", "gmri green", "dark green", "green", "teal", "gmri blue"),

# Gray
`gray` = gmri_cols("light gray", "dark gray"),

# Grey for british people
`grey` = gmri_cols("light gray", "dark gray")
)
Expand Down
89 changes: 69 additions & 20 deletions R/nefsc_groundfish_access.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
#### NEFSC Trawl Data Access ####

####
#### NEFSC Trawl Data - Size Spectra Build
#### 3/24/2021
Expand All @@ -19,12 +18,54 @@

######################################################_

#' @title Load survdat file with standard data filters, keep all columns
#' @title Tidy the Survdat Dataset
#' @description Processing function to tidy/prepare the "survdat" groundfish survey dataset received
#' from the Northeast Fisheries Science Center. This function performs all common steps done when
#' preparing the data for any analyses that rely on abundance or biomass by species and the details
#' of where/when they were caught.
#'
#' This function will by default load the most up-to-date version of the dataset that has been
#' received from the NEFSC using survdat = NULL. Optionally, users may provide a dataframe
#' from the environment to be prepared using the same steps.
#'
#' The processing steps performed by this function include:
#'
#' - loading a specific survdat dataset: "most recent" loads the most current and complete dataset.
#' "bigelow" returns data sampled only by the RV bigelow, in its raw form, with no adjustments to
#' catch that transform numbers to be more consistent with the RV albatross. "bio" loads the
#' biological dataset, which contains additional details that require follow-up lab procedures like
#' age information
#'
#' - Flag and create any columns that are missing or inconsistent with how the dataset has been
#' sent over time. Messages will appear in the terminal to accompany any columns created or modified
#'
#' - Perform column formatting: length and biomass are renamed to be unit specific length_cm &
#' biomass_kg. Survey stratum numbers are pulled from the longer stratum field, these are used to
#' match up to the fields of the shapefiles for them. comname values are converted to be all
#' lowercase. The id field is formatted to not read as scientific, svspp is treated as a string.
#'
#' @description Processing function to prepare survdat data for size spectra analyses.
#' Options to select various survdat pulls, or provide your own as a dataframe
#' from the environment if available.
#' - Perform row filtering: eliminate stratum that are no longer sampled or sampled inconsistently
#' (values less than 01010 or greater than 01760 removed, in addition to 1310, 1320, 1330, 1350,
#' 1410, 1420, & 1490). Any rows without abundance or biomass information are dropped.
#' Select species codes are also removed (0, 285-299, 305, 306, 307, 316, 323, 910-915, 955-961,
#' 978, 979, 980, 998)
#'
#' - Perform spatial filters: Data is kept for all strata within these major regional definitions:
#' "Georges Bank" = 13-23, "Gulf of Maine" = 24-40, "Southern New England" 01-12,
#' "Mid-Atlantic Bight" = 61-76.
#'
#' - Perform numlen (numbers at length) adjustment: numlen is not adjusted to correct for the
#' change in survey vessels and gear that happened in 2008. These values consequently are not
#' equal to the overall abundance of a species, nor total biomass of a species which are
#' systematically adjusted to adjust for the gear change.
#'
#' Because of this and also some instances of bad data, there are cases where more/less fishes are
#' measured than initially tallied* in the abundance field. This section ensures that the numlen
#' totals for a station & species are equal to abundance column (which has been adjusted already
#' for the gear change.)
#'
#' - Remove any duplicate records: One final step is the verification that any duplicated records
#' are removed.
#'
#'
#' @param survdat optional starting dataframe in the R environment to run through size spectra build.
Expand Down Expand Up @@ -249,17 +290,6 @@ gmri_survdat_prep <- function(survdat = NULL, survdat_source = "most recent", bo
stratum != 1490)


# Filter to just Spring and Fall
trawldat <- dplyr::filter(trawldat, season %in% c("Spring", "Fall"))
trawldat <- dplyr::mutate(trawldat, season = factor(season, levels = c("Spring", "Fall")))


# Filter years
trawldat <- dplyr::filter(
.data = trawldat,
est_year >= 1970,
est_year < 2020)

# Drop NA Biomass and Abundance Records
trawldat <- dplyr::filter(
.data = trawldat,
Expand Down Expand Up @@ -426,6 +456,17 @@ gmri_survdat_prep <- function(survdat = NULL, survdat_source = "most recent", bo
#' @description calculate expected biomass-at-length for species based on
#' published length-weight relationships.
#'
#' Species are matched against a spreadsheet containing length and weight information from 2
#' sources. The first source is the length-weight relationships detailed in Wigley et al. 2003:
#' "Length-weight relationships for 74 fish species collected during NEFSC research vessel bottom
#' trawl surveys, 1992-99".
#'
#' The second source for matching growth details to species is fishbase. These values are known to
#' be potentially less accurate or less regionally specific.
#'
#' Pairings are first checked against the Wigley paper, and then by fishbase, to provide preference
#' to the more regionally focused source.
#'
#' @param survdat_clean Survdat data, after usual preparations are completed.
#' These include removal of old strata, labeling of areas of interest, and inclusion
#' of the annual effort in each.
Expand Down Expand Up @@ -537,7 +578,6 @@ add_lw_info <- function(survdat_clean, cutoff = FALSE, box_location = "root|clou
# 15% difference in either direction were flagged for removal
# code: github.com/adamkemberling/nefsc_trawl/R/qa_qc_reports/stratification_validation
# list updated : 8/27/2021
#
cutoff_15 <- c(
"acadian redfish", "american plaice",
"american shad",
Expand Down Expand Up @@ -756,9 +796,18 @@ add_epu_info <- function(trawldat, box_location = "root|cloudstorage"){
# Add area stratified biomass function
#' @title Add Survey Area Stratified Abundances and Biomasses
#'
#' @description Take the survdat data paired with length weight relationships and
#' return estimates of area stratified catch rates and their expected abundances and
#' biomasses when applied to the total areas of stratum.
#' @description Take the survdat data which has been cleaned with gmRi::gmri_survdat_prep, that is
#' paired with length weight relationships using gmRi::add_lw_info and estimate the area stratified
#' catch rates and their expected abundances and biomasses when those catch rates are applied to
#' the total areas of their respective strata.
#'
#' Area-stratified catch rates are calculated independently for each species, every year, within
#' each strata, and by each season. i.e. 1982 spring cpue of acadian redfish informs the
#' area-stratified catch of acadian redfish in spring of 1982.
#'
#' Constants for the area-towed and the catchability coefficient are as followed:
#' - Area covered by an albatross standard tow in km2 = 0.0384
#' - Catchability coefficient - ideally should change for species guilds: q = 1
#'
#' @param survdat_weights Input dataframe, produced by add_lw_info
#' @param include_epu Flag for calculating the EPU rates in addition to the stratum regions we use.
Expand Down
Loading

0 comments on commit 3e72d24

Please sign in to comment.