Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update ref files #340

Merged
merged 2 commits into from
Sep 15, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 34 additions & 27 deletions R/GenerateRefTables.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
#' Update TADA Reference Files
#' @return Saves updated reference files
#'
TADA_UpdateAllRefs <- function () {
TADA_UpdateWQXCharValRef()
TADA_UpdateMeasureUnitRef()
TADA_UpdateDetCondRef()
TADA_UpdateDetLimitRef()
TADA_UpdateActivityTypeRef()
TADA_UpdateCharacteristicRef()
TADA_UpdateMeasureQualifierCodeRef()
}

# Used to store cached WQX QAQC Characteristic Validation Reference Table
WQXCharValRef_Cached <- NULL

Expand Down Expand Up @@ -41,33 +54,26 @@ TADA_GetWQXCharValRef <- function() {
message("Falling back to (possibly outdated) internal file.")
return(utils::read.csv(system.file("extdata", "WQXcharValRef.csv", package = "TADA")))
}

# filter data to include only accepted (valid) values and remove extraneous columns

# Categorize status values
notreviewed <- "Not Reviewed"
valid <- c("Accepted", "Y")
invalid <- c("Rejected", "Rejected ", "N")
nonstandard <- c("NonStandardized",
"InvalidMediaUnit",
"InvalidChar",
"MethodNeeded")

WQXcharValRef <- raw.data %>%
dplyr::select(-c(
"Domain", "Unique.Identifier", "Note.Recommendation",
"Last.Change.Date"
))
# replace "Status" values with Valid, Invalid, Unknown
WQXcharValRef$Status2 <- ifelse(WQXcharValRef$Status %in% c("Accepted"), "Valid", "Invalid")
WQXcharValRef$Status2 <- ifelse(WQXcharValRef$Status %in% c(
"NonStandardized",
"Nonstandardized",
"InvalidMediaUnit",
"InvalidChar",
"MethodNeeded"
), "Nonstandardized", WQXcharValRef$Status2)

WQXcharValRef <- WQXcharValRef %>%
dplyr::select(-Status) %>%
dplyr::rename(Status = Status2) %>%
dplyr::mutate(TADA.WQXVal.Flag = dplyr::case_when(
Status %in% notreviewed ~ "Not Reviewed",
Status %in% valid ~ "Valid",
Status %in% invalid ~ "Invalid",
Status %in% nonstandard ~ "NonStandardized",
Status %in% NA ~ "Not Reviewed",
)) %>%
dplyr::distinct()

# # Convert all NONE to NA in Value and Value.Unit columns
# WQXcharValRef = WQXcharValRef %>% dplyr::mutate(Value = replace(Value, Value%in%c("NONE"),NA),
# Value.Unit = replace(Value.Unit, Value.Unit%in%c("NONE"),NA)) %>% dplyr::distinct()
#

# Save updated table in cache
WQXCharValRef_Cached <- WQXcharValRef

Expand Down Expand Up @@ -394,7 +400,8 @@ TADA_GetActivityTypeRef <- function() {
if (is.null(raw.data)) {
message("Downloading latest Activity Type Reference Table failed!")
message("Falling back to (possibly outdated) internal file.")
return(utils::read.csv(system.file("extdata", "WQXActivityTypeRef.csv", package = "TADA")))
return(utils::read.csv(system.file("extdata", "WQXActivityTypeRef.csv",
package = "TADA")))
}

# Categorize Activity Types
Expand Down Expand Up @@ -635,8 +642,8 @@ TADA_GetMeasureQualifierCodeRef <- function() {
Code %in% overdetect ~ "Over-Detect",
Code %in% suspect ~ "Suspect",
Code %in% pass ~ "Pass",
Code %in% NA ~ "Pass",
TRUE ~ as.character("NewValue_NeedsReview")
Code %in% NA ~ "Not Reviewed",
TRUE ~ as.character("Not Reviewed")
)) %>%
dplyr::distinct()

Expand Down
46 changes: 21 additions & 25 deletions R/ResultFlagsDependent.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,11 +72,11 @@ TADA_FlagFraction <- function(.data, clean = TRUE, flaggedonly = FALSE) {
check.data <- check.data %>%
dplyr::rename(TADA.SampleFraction.Flag = Status) %>%
dplyr::distinct()
# rename NA values to Nonstandardized in TADA.SampleFraction.Flag column
check.data["TADA.SampleFraction.Flag"][is.na(check.data["TADA.SampleFraction.Flag"])] <- "Nonstandardized"
# rename NA values to "Not Reviewed" in TADA.SampleFraction.Flag column
check.data["TADA.SampleFraction.Flag"][is.na(check.data["TADA.SampleFraction.Flag"])] <- "Not Reviewed"

# if all rows are "Valid", return input unchanged
if (any(c("Nonstandardized", "Invalid") %in%
if (any(c("Nonstandardized", "Invalid", "Not Reviewed") %in%
unique(check.data$TADA.SampleFraction.Flag)) == FALSE) {
if (flaggedonly == FALSE) {
print("All characteristic/fraction combinations are valid in your dataframe. Returning input dataframe with TADA.SampleFraction.Flag column for tracking.")
Expand All @@ -86,7 +86,6 @@ TADA_FlagFraction <- function(.data, clean = TRUE, flaggedonly = FALSE) {
if (flaggedonly == TRUE) {
print("This dataframe is empty because we did not find any invalid fraction/characteristic combinations in your dataframe")
empty.data <- dplyr::filter(check.data, TADA.SampleFraction.Flag == "Invalid")
# empty.data <- dplyr::select(empty.data, -TADA.SampleFraction.Flag)
empty.data <- TADA_OrderCols(empty.data)
return(empty.data)
}
Expand All @@ -103,9 +102,6 @@ TADA_FlagFraction <- function(.data, clean = TRUE, flaggedonly = FALSE) {
if (clean == TRUE & flaggedonly == FALSE) {
# filter out invalid characteristic-fraction combinations
clean.data <- dplyr::filter(check.data, TADA.SampleFraction.Flag != "Invalid")

# remove WQX.SampleFractionValidity column
# clean.data <- dplyr::select(clean.data, -TADA.SampleFraction.Flag)
clean.data <- TADA_OrderCols(clean.data)
return(clean.data)
}
Expand Down Expand Up @@ -211,11 +207,11 @@ TADA_FlagSpeciation <- function(.data, clean = c("invalid_only", "nonstandardize
check.data <- check.data %>%
dplyr::rename(TADA.MethodSpeciation.Flag = Status) %>%
dplyr::distinct()
# rename NA values to Nonstandardized in TADA.MethodSpeciation.Flag column
check.data["TADA.MethodSpeciation.Flag"][is.na(check.data["TADA.MethodSpeciation.Flag"])] <- "Nonstandardized"
# rename NA values to Not Reviewed in TADA.MethodSpeciation.Flag column
check.data["TADA.MethodSpeciation.Flag"][is.na(check.data["TADA.MethodSpeciation.Flag"])] <- "Not Reviewed"

# if all rows are "Valid", return input with flag column
if (any(c("Nonstandardized", "Invalid") %in%
if (any(c("Not Reviewed", "Invalid", "NonStandardized") %in%
unique(check.data$TADA.MethodSpeciation.Flag)) == FALSE) {
print("All characteristic/method speciation combinations are valid in your dataframe. Returning input dataframe with TADA.MethodSpeciation.Flag column for tracking.")
check.data <- TADA_OrderCols(check.data)
Expand All @@ -235,14 +231,14 @@ TADA_FlagSpeciation <- function(.data, clean = c("invalid_only", "nonstandardize

# when clean = "nonstandardized_only"
if (clean == "nonstandardized_only") {
# filter out only "Nonstandardized" characteristic-method speciation combinations
clean.data <- dplyr::filter(check.data, TADA.MethodSpeciation.Flag != "Nonstandardized")
# filter out only "NonStandardized" characteristic-method speciation combinations
clean.data <- dplyr::filter(check.data, TADA.MethodSpeciation.Flag != "NonStandardized")
}

# when clean = "both"
if (clean == "both") {
# filter out both "Invalid" and "Nonstandardized" characteristic-method speciation combinations
clean.data <- dplyr::filter(check.data, TADA.MethodSpeciation.Flag != "Nonstandardized" & TADA.MethodSpeciation.Flag != "Invalid")
# filter out both "Invalid" and "NonStandardized" characteristic-method speciation combinations
clean.data <- dplyr::filter(check.data, TADA.MethodSpeciation.Flag != "NonStandardized" & TADA.MethodSpeciation.Flag != "Invalid")
}

# when clean = "none"
Expand All @@ -259,8 +255,8 @@ TADA_FlagSpeciation <- function(.data, clean = c("invalid_only", "nonstandardize

# when flaggedonly = TRUE
if (flaggedonly == TRUE) {
# filter to show only invalid and/or nonstandardized characteristic-method speciation combinations
error.data <- dplyr::filter(clean.data, TADA.MethodSpeciation.Flag == "Invalid" | TADA.MethodSpeciation.Flag == "Nonstandardized")
# filter to show only invalid and/or nonStandardized characteristic-method speciation combinations
error.data <- dplyr::filter(clean.data, TADA.MethodSpeciation.Flag == "Invalid" | TADA.MethodSpeciation.Flag == "NonStandardized")
# if there are no errors
if (nrow(error.data) == 0) {
print("This dataframe is empty because either we did not find any invalid/nonstandardized characteristic-method speciation combinations or they were all filtered out")
Expand Down Expand Up @@ -364,11 +360,11 @@ TADA_FlagResultUnit <- function(.data, clean = c("invalid_only", "nonstandardize
check.data <- check.data %>%
dplyr::rename(TADA.ResultUnit.Flag = Status) %>%
dplyr::distinct()
# rename NA values to Nonstandardized in WQX.ResultUnitValidity column
check.data["TADA.ResultUnit.Flag"][is.na(check.data["TADA.ResultUnit.Flag"])] <- "Nonstandardized"
# rename NA values to Not Reviewed in TADA.ResultUnit.Flag column
check.data["TADA.ResultUnit.Flag"][is.na(check.data["TADA.ResultUnit.Flag"])] <- "Not Reviewed"

# if all rows are "Valid", return input with flag column
if (any(c("Nonstandardized", "Invalid") %in%
if (any(c("NonStandardized", "Invalid", "Not Reviewed") %in%
unique(check.data$TADA.ResultUnit.Flag)) == FALSE) {
print("All characteristic/unit combinations are valid in your dataframe. Returning input dataframe with TADA.ResultUnit.Flag column for tracking.")
check.data <- TADA_OrderCols(check.data)
Expand All @@ -389,13 +385,13 @@ TADA_FlagResultUnit <- function(.data, clean = c("invalid_only", "nonstandardize
# when clean = "nonstandardized_only"
if (clean == "nonstandardized_only") {
# filter out only "Nonstandardized" characteristic-method speciation combinations
clean.data <- dplyr::filter(check.data, TADA.ResultUnit.Flag != "Nonstandardized")
clean.data <- dplyr::filter(check.data, TADA.ResultUnit.Flag != "NonStandardized")
}

# when clean = "both"
if (clean == "both") {
# filter out both "Invalid" and "Nonstandardized" characteristic-method speciation combinations
clean.data <- dplyr::filter(check.data, TADA.ResultUnit.Flag != "Nonstandardized" & TADA.ResultUnit.Flag != "Invalid")
# filter out both "Invalid" and "NonStandardized" characteristic-method speciation combinations
clean.data <- dplyr::filter(check.data, TADA.ResultUnit.Flag != "NonStandardized" & TADA.ResultUnit.Flag != "Invalid")
}

# when clean = "none"
Expand All @@ -412,11 +408,11 @@ TADA_FlagResultUnit <- function(.data, clean = c("invalid_only", "nonstandardize

# when flaggedonly = TRUE
if (flaggedonly == TRUE) {
# filter to show only invalid and/or nonstandardized characteristic-method speciation combinations
error.data <- dplyr::filter(clean.data, TADA.ResultUnit.Flag == "Invalid" | TADA.ResultUnit.Flag == "Nonstandardized")
# filter to show only invalid and/or nonStandardized characteristic-method speciation combinations
error.data <- dplyr::filter(clean.data, TADA.ResultUnit.Flag == "Invalid" | TADA.ResultUnit.Flag == "NonStandardized")
# if there are no errors
if (nrow(error.data) == 0) {
print("This dataframe is empty because either we did not find any invalid/nonstandardized characteristic-media-result unit combinations or they were all filtered out")
print("This dataframe is empty because either we did not find any invalid/NonStandardized characteristic-media-result unit combinations or they were all filtered out")
# error.data <- dplyr::select(error.data, -TADA.ResultUnit.Flag)
}
error.data <- TADA_OrderCols(error.data)
Expand Down
14 changes: 7 additions & 7 deletions R/ResultFlagsIndependent.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#' @return This function adds the TADA.AnalyticalMethod.Flag to a TADA dataframe. This column
#' flags invalid CharacteristicName, ResultAnalyticalMethod/MethodIdentifier,
#' and ResultAnalyticalMethod/MethodIdentifierContext combinations in your dataframe
#' as either "Nonstandardized", "Invalid", or "Valid". When clean = FALSE and
#' as either "NonStandardized", "Invalid", or "Valid". When clean = FALSE and
#' flaggedonly = TRUE, the dataframe is filtered to show only "Invalid"
#' characteristic-analytical method combinations; the column TADA.AnalyticalMethod.Flag
#' is still appended. When clean = TRUE and flaggedonly = FALSE, "Invalid" rows
Expand Down Expand Up @@ -80,12 +80,12 @@ TADA_FlagMethod <- function(.data, clean = TRUE, flaggedonly = FALSE) {
check.data <- check.data %>%
dplyr::rename(TADA.AnalyticalMethod.Flag = Status) %>%
dplyr::distinct()
# rename NA values to Nonstandardized in WQX.AnalyticalMethodValidity column
check.data["TADA.AnalyticalMethod.Flag"][is.na(check.data["TADA.AnalyticalMethod.Flag"])] <- "Nonstandardized"
# rename NA values to NonStandardized in WQX.AnalyticalMethodValidity column
check.data["TADA.AnalyticalMethod.Flag"][is.na(check.data["TADA.AnalyticalMethod.Flag"])] <- "NonStandardized"

if (flaggedonly == FALSE) {
# if all rows are "Valid" or NA "Nonstandardized", return input unchanged
## note: Cristina edited this on 9/19/22 to keep Nonstandardized/NA data when clean = TRUE. Now only Invalid data is removed.
# if all rows are "Valid" or NA "NonStandardized", return input unchanged
## note: Cristina edited this on 9/19/22 to keep NonStandardized/NA data when clean = TRUE. Now only Invalid data is removed.
if (any("Invalid" %in%
unique(check.data$TADA.AnalyticalMethod.Flag)) == FALSE) {
print("No invalid method/characteristic combinations in your dataframe. Returning the input dataframe with TADA.AnalyticalMethod.Flag column for tracking.")
Expand Down Expand Up @@ -355,7 +355,7 @@ TADA_FlagAboveThreshold <- function(.data, clean = TRUE, flaggedonly = FALSE) {
dplyr::mutate(TADA.ResultValueAboveUpperThreshold.Flag = dplyr::case_when(
TADA.ResultMeasureValue >= Maximum ~ as.character("Y"),
TADA.ResultMeasureValue < Maximum ~ as.character("N"),
TRUE ~ as.character("No threshold available") # this occurs when the char/unit combo is not in the table
TRUE ~ as.character("Not Reviewed") # this occurs when the char/unit combo is not in the table
))

# remove extraneous columns, fix field names
Expand Down Expand Up @@ -506,7 +506,7 @@ TADA_FlagBelowThreshold <- function(.data, clean = TRUE, flaggedonly = FALSE) {
dplyr::mutate(TADA.ResultValueBelowLowerThreshold.Flag = dplyr::case_when(
TADA.ResultMeasureValue <= Minimum ~ as.character("Y"),
TADA.ResultMeasureValue > Minimum ~ as.character("N"),
TRUE ~ as.character("No threshold available") # this occurs when the char/unit combo is not in the table
TRUE ~ as.character("Not Reviewed") # this occurs when the char/unit combo is not in the table
))

# remove extraneous columns, fix field names
Expand Down
2 changes: 1 addition & 1 deletion R/Utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ utils::globalVariables(c(
"SummationSpeciationConversionFactor", "SummationNote", "NutrientGroup",
"Target.Speciation", "TADA.NearbySiteGroups", "numres", "TADA.SingleOrgDupGroupID",
"TADA.MeasureQualifierCode.Flag", "MeasureQualifierCode", "value", "Flag_Column",
"Data_NCTCShepherdstown_HUC12", "ActivityStartDateTime"
"Data_NCTCShepherdstown_HUC12", "ActivityStartDateTime", "TADA.MultipleOrgDupGroupID"
))


Expand Down
Binary file added inst/extdata/Data_6Tribes_5y.rda
Binary file not shown.
Binary file added inst/extdata/Data_6Tribes_5y_Harmonized.rda
Binary file not shown.
Binary file added inst/extdata/Data_NCTCShepherdstown_HUC12.rda
Binary file not shown.
Binary file added inst/extdata/Data_Nutrients_UT.rda
Binary file not shown.
Loading
Loading