Update TADAModule1_AdvancedTraining.Rmd and fix MeasureQualifierCode bug

USEPA · Dec 2, 2024 · 1ee2452 · 1ee2452
2 parents 2d4bcf6 + 928e953
commit 1ee2452
Show file tree

Hide file tree

Showing 6 changed files with 78 additions and 38 deletions.
diff --git a/R/RequiredCols.R b/R/RequiredCols.R
@@ -217,6 +217,7 @@ extra.cols <- c(
   "timeZoneStart", # no longer in default dataRetrieval profile? 11/7/24
   "timeZoneEnd", # no longer in default dataRetrieval profile? 11/7/24
   "ActivityStartTime.TimeZoneCode_offset", # new column from default dataRetrieval profile? 11/7/24
+  "ActivityEndTime.TimeZoneCode_offset", # new column from default dataRetrieval profile? 11/21/24
   "SourceMapScaleNumeric",
   "HorizontalAccuracyMeasure.MeasureValue",
   "HorizontalAccuracyMeasure.MeasureUnitCode",

diff --git a/R/ResultFlagsDependent.R b/R/ResultFlagsDependent.R
@@ -804,6 +804,18 @@ TADA_FlagMeasureQualifierCode <- function(.data, clean = FALSE, flaggedonly = FA
   TADA_CheckType(flaggedonly, "logical")
   # check .data has required columns
   TADA_CheckColumns(.data, "MeasureQualifierCode")
+  # check .data MeasureQualifierCode is not all NA. If it is, don't run function and return .data
+  if (all(is.na(.data$MeasureQualifierCode))) {
+    print("Data frame does not include any information (all NA's) in MeasureQualifierCode.")
+
+    .data = .data %>% 
+      dplyr::mutate(TADA.MeasureQualifierCode.Flag = "Pass") %>%
+      dplyr::mutate(TADA.MeasureQualifierCode.Def = "NA - Not Applicable")
+
+    .data <- TADA_OrderCols(.data)
+
+    return(.data)
+  }
 
   # execute function after checks are passed
   # delete existing flag column

diff --git a/R/ResultFlagsIndependent.R b/R/ResultFlagsIndependent.R
@@ -302,8 +302,17 @@ TADA_FlagContinuousData <- function(.data, clean = FALSE, flaggedonly = FALSE, t
     rm(within_window)
   }
 
-  flag.data <- cont.data %>%
+  # check if noncont.data is blank. If TRUE, flag.data = cont.data
+  if (nrow(noncont.data) == 0) {
+    print("All data is flagged as continuous in TADA.ContinuousData.Flag column.")
+    flag.data = cont.data
+  }
+
+  # if noncont.data is NOT blank, flag.data = join of noncont.data with cont.data
+  if (nrow(noncont.data) != 0) {
+    flag.data <- cont.data %>%
     dplyr::full_join(noncont.data, by = c(names(cont.data)))
+  }
 
   # flagged output, all data
   if (clean == FALSE & flaggedonly == FALSE) {

diff --git a/tests/testthat/test-URLChecker.R b/tests/testthat/test-URLChecker.R
@@ -1,4 +1,10 @@
-test_that("URLs are not broken", {
+# ignore warning
+# file("") only supports open = "w+" and open = "w+b": using the former
+# https://github.com/USEPA/EPATADA/pull/548
+suppressWarnings(
+  test_that("URLs are not broken", {
+
+
   # extract urls function
   extract_urls <- function(text) {
     stringr::str_extract_all(text, "http[s]?://[^\\s\\)\\]]+") %>% unlist()
@@ -34,7 +40,8 @@ test_that("URLs are not broken", {
     extract_urls() %>%
     clean_url() %>%
     unique() %>%
-    # problematic URL I can't get a response from using multiple methods (itec) and CRAN because its response is inconsistent, likely due to redirecting to mirrors (HRM 10/28/2024)
+    # problematic URL I can't get a response from using multiple methods (itec) 
+    #and CRAN because its response is inconsistent, likely due to redirecting to mirrors (HRM 10/28/2024)
     setdiff(c(
       # url works (HRM 11/7/24), but does not provide a recognizable response code
       "https://www.itecmembers.org/attains/",
@@ -68,3 +75,4 @@ test_that("URLs are not broken", {
   # verify that there are zero urls with failing response codes
   testthat::expect_equal(n, 0)
 })
+)
diff --git a/vignettes/TADAModule1_AdvancedTraining.Rmd b/vignettes/TADAModule1_AdvancedTraining.Rmd
@@ -99,6 +99,15 @@ the console to update dependency packages that have more recent versions
 available. If you see this prompt, it is recommended to update all of
 them (enter 1 into the console).
 
+The most stable branch for TADA right now is the develop
+branch. Contributors generally create their own branches based on
+develop, make some improvements, and then submit a pull request to be
+reviewed by the TADA Team. Once approved, updates are then merged into
+the develop branch. However, you are welcome to download any branch
+you'd like using the `ref` input in `install_github` (see code chunk
+below). This functionality is mainly only useful to TADA package
+developers/contributors.
+
 ```{r install_TADA, eval = F, results = 'hide'}
 remotes::install_github("USEPA/EPATADA",
   ref = "develop",
@@ -113,14 +122,7 @@ session.
 library(EPATADA)
 ```
 
-It's that easy! The most stable branch for TADA right now is the develop
-branch. Contributors generally create their own branches based on
-develop, make some improvements, and then submit a pull request to be
-reviewed by the TADA Team. Once approved, updates are then merged into
-the develop branch. However, you are welcome to download any branch
-you'd like using the `ref` input in `install_github` (see code chunk
-above). This functionality is mainly only useful to TADA package
-developers/contributors.
+It's that easy! 
 
 The following code block ensures the additional packages needed to run
 the code in this RMarkdown document are loaded. However, users may also
@@ -129,7 +131,8 @@ use the `package name:: package function` notation to avoid the list of
 
 ```{r, eval = F, results = 'hide'}
 list.of.packages <- c("tidyverse")
-new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[, "Package"])]
+new.packages <- list.of.packages[!(list.of.packages %in% 
+                                     installed.packages()[, "Package"])]
 if (length(new.packages)) install.packages(new.packages)
 
 library(tidyverse)
@@ -169,15 +172,15 @@ data structure and format.
 getwd() # find your working directory
 template <- TADA_GetTemplate() # download template to working directory
 
-# uncomment below to review example data frame
-# Data_Nutrients_UT <- Data_Nutrients_UT
+# review example data frame
+exampledata_Nutrients_UT <- Data_Nutrients_UT
 ```
 
-`TADA_DataRetrieval` is built upon USGS's `dataRetrieval::readWQPdata`
-function within the dataRetrieval package, which uses web service calls
-to bring WQP data into the R environment. Additionally,
-`TADA_DataRetrieval` performs some basic quality control checks via
-`TADA_AutoClean` on the data using new TADA-specific columns to preserve
+`TADA_DataRetrieval` is built upon USGS's `dataRetrieval::readWQPdata` and 
+`dataRetrieval::whatWQPsites` functions within the dataRetrieval package, 
+which uses web service calls to bring WQP data into the R environment. 
+Additionally, `TADA_DataRetrieval` performs some basic quality control checks 
+via `TADA_AutoClean` on the data using new TADA-specific columns to preserve
 the original data frame:
 
 -   Converts key character columns to ALL CAPS for easier harmonization
@@ -207,7 +210,12 @@ about input parameters and to see several examples.
 ```{r dataretrieval}
 # download example data
 # dataset_0  <- TADA_DataRetrieval(
-#   organization = c("REDLAKE_WQX", "SFNOES_WQX", "PUEBLO_POJOAQUE", "FONDULAC_WQX",   "PUEBLOOFTESUQUE", "CNENVSER"),
+#   organization = c("REDLAKE_WQX", 
+#                    "SFNOES_WQX", 
+#                    "PUEBLO_POJOAQUE", 
+#                    "FONDULAC_WQX",   
+#                    "PUEBLOOFTESUQUE", 
+#                    "CNENVSER"),
 #   startDate = "2018-01-01",
 #   endDate = "2023-01-01")
 
@@ -224,12 +232,14 @@ data. This ensures that all important quality control columns are
 included in the data frame.
 
 **Note:** USGS and EPA are working together to create WQP 3.0 data
-profiles. Once released, one data profile will contain the columns
-critical to TADA, removing the need to combine profiles in this first
-step. TADA package users likely will not notice a difference in their
-usage of the `TADA_DataRetrieval` function, but it will simplify the
-steps needed to upload a custom or WQP GUI-downloaded data frame into
-the R package.
+profiles. Once released (coming in 2025), one data profile will contain the 
+columns critical to TADA, removing the need to combine profiles in this first
+step. This will simplify the steps needed to upload a custom or WQP 
+GUI-downloaded data frame into the R package. However, column names are 
+changing in the new WQP 3.0 data profiles. This may impact TADA package users 
+usage of the `TADA_DataRetrieval` function in their workflows. The WQP and 
+TADA teams are available to assist with cross walking the old to new column
+names when the time comes.
 
 ## Initial data review
 
@@ -574,7 +584,10 @@ to make non-detect values equal to the provided detection limit? What
 would you need to change in the example below?**
 
 ```{r cens dataset}
-dataset_cens <- TADA_SimpleCensoredMethods(dataset_flags, nd_method = "multiplier", nd_multiplier = 0.5, od_method = "as-is")
+dataset_cens <- TADA_SimpleCensoredMethods(dataset_flags, 
+                                           nd_method = "multiplier", 
+                                           nd_multiplier = 0.5, 
+                                           od_method = "as-is")
 ```
 
 Let's take a look at how the censored data handling function affects the
@@ -737,8 +750,7 @@ secchi depth, and pH.
 ```{r depth plot}
 TADA_DepthProfilePlot(dataset_cens,
   groups = c(
-    "TEMPERATURE,
-                                       WATER_NA_NA_DEG C",
+    "TEMPERATURE, WATER_NA_NA_DEG C",
     "DEPTH, SECCHI DISK DEPTH_NA_NA_M",
     "PH_NA_NA_NONE"
   ),

diff --git a/vignettes/articles/TADAWaterSciConWorkshopDemo.Rmd b/vignettes/articles/TADAWaterSciConWorkshopDemo.Rmd
@@ -197,7 +197,8 @@ TADA.ComparableDataIdentifiers of interest.
 
 ```{r}
 data <- data %>%
-  dplyr::filter(TADA.ComparableDataIdentifier %in% c("TOTAL DISSOLVED SOLIDS_DISSOLVED_NA_UG/L", "SPECIFIC CONDUCTANCE_TOTAL_NA_US/CM @25C"))
+  dplyr::filter(TADA.ComparableDataIdentifier %in% c("TOTAL DISSOLVED SOLIDS_DISSOLVED_NA_UG/L", 
+                                                     "SPECIFIC CONDUCTANCE_TOTAL_NA_US/CM @25C"))
 ```
 
 ## Water Chemistry Data Preparation and QC
@@ -461,12 +462,9 @@ TADA_Histogram(
   data %>%
     dplyr::filter(
       ActivityStartDate > "2014-12-31",
-      TADA.CharacteristicName ==
-        "SPECIFIC CONDUCTANCE",
-      ATTAINS.assessmentunitname == "Animas River (San Juan River to Estes Arroyo)"
-    ),
-  id_cols = "TADA.ComparableDataIdentifier"
-)
+      TADA.CharacteristicName == "SPECIFIC CONDUCTANCE",
+      ATTAINS.assessmentunitname == "Animas River (San Juan River to Estes Arroyo)"),
+  id_cols = "TADA.ComparableDataIdentifier")
 ```
 
 Or we could create a boxplot for a particular characteristic and
@@ -488,7 +486,7 @@ TADA_Boxplot(
 
 We might also want to directly compare the same characteristic overtime
 between the two rivers. We can do this by revisiting
-`TADA.TwoCharacteristicScatterplot.`
+`TADA.TwoCharacteristicScatterplot`.
 
 ```{r compare locations}
 # create two characteristic scatterplot using TADA_TWoCharacteristicScatterplot
@@ -549,7 +547,7 @@ understand the decisions that were made, make changes as necessary, and
 run it again.
 
 For example, we could modify the code to repeat the same analysis at a
-different location. Or we could run `TADA_ConvertResultUnits` an
+different location. Or we could run `TADA_ConvertResultUnits` an
 additional time outside to convert units for one or more
 characteristics.