diff --git a/R/Figures.R b/R/Figures.R index 23c5f497..6ba83527 100644 --- a/R/Figures.R +++ b/R/Figures.R @@ -559,20 +559,16 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier") plot.data <- subset(.data, .data$Group == i) groupid <- paste0(unique(plot.data[, id_cols]), collapse = "_") - # units lable for y axis + # units label for y axis unit <- unique(plot.data$TADA.ResultMeasure.MeasureUnitCode) y_label <- "Activity Start Date and Time" - - # include depth in hover info - depth <- plot.data$ResultDepthHeightMeasure.MeasureValue - plot.data$TADA.DateTime <- as.POSIXct(paste(plot.data$ActivityStartDate, plot.data$ActivityStartTime.Time), format = "%Y-%m-%d %H:%M:%S") - + # construct plotly scatterplot one_scatterplot <- plotly::plot_ly( data = plot.data, type = "scatter", mode = "markers", - x = plot.data$TADA.DateTime, + x = plot.data$ActivityStartDateTime, # currently uses start date and time, may want to change to just ActivityStartDate after aggregation functions are written y = plot.data$TADA.ResultMeasureValue, # color = plot.data$TADA.ResultMeasureValue, marker = list(color = "#00bde3"), @@ -580,12 +576,19 @@ TADA_Scatterplot <- function(.data, id_cols = c("TADA.ComparableDataIdentifier") name = "All Data", hoverinfo = "text", hovertext = paste( - "Result:", plot.data$TADA.ResultMeasureValue, "
", - "DateTime:", plot.data$TADA.DateTime, "
", - "ResultDepth:", depth - ) + "Result:", paste0(plot.data$TADA.ResultMeasureValue, " ", plot.data$TADA.ResultMeasure.MeasureUnitCode), "
", + "Date Time:", plot.data$ActivityStartDateTime, "
", + "Result Depth:", paste0(plot.data$TADA.ResultDepthHeightMeasure.MeasureValue, " ", + plot.data$TADA.ResultDepthHeightMeasure.MeasureUnitCode), "
", + "Activity Relative Depth Name:", plot.data$ActivityRelativeDepthName, "
", + "Activity Depth:", paste0(plot.data$TADA.ActivityDepthHeightMeasure.MeasureValue, " ", + plot.data$TADA.ActivityDepthHeightMeasure.MeasureUnitCode), "
", + "Activity Top Depth:", paste0(plot.data$TADA.ActivityTopDepthHeightMeasure.MeasureValue, " ", + plot.data$TADA.ActivityTopDepthHeightMeasure.MeasureUnitCode), "
", + "Activity Bottom Depth:", paste0(plot.data$TADA.ActivityBottomDepthHeightMeasure.MeasureValue, " ", + plot.data$TADA.ActivityBottomDepthHeightMeasure.MeasureUnitCode), "
") ) - + # figure margin mrg <- list( l = 50, r = 20, diff --git a/R/LegacyFunctions.R b/R/LegacyFunctions.R index 3be895cd..642bfc4a 100644 --- a/R/LegacyFunctions.R +++ b/R/LegacyFunctions.R @@ -1,9 +1,9 @@ # # ADD LEGACY FUNCTIONS BY COPYING THEM HERE AND REMOVING THEIR ROXYGEN CONTENT BEFORE COMMENTING OUT AGAIN -# -# +# +# # # TADA.env <- new.env() -# -# +# +# # # Generate list of field names # # # # Function creates a list of common WQX fields that should be reviewed when @@ -43,11 +43,11 @@ # # # Create a table of fields and count of unique values in each field: # # Fields_Data_Nutrients_UT <- FilterFields(Data_Nutrients_UT) # # -# +# # FilterFields <- function(.data) { # # check .data is data.frame # TADA_CheckType(.data, "data.frame", "Input object") -# +# # # CREATE LIST OF FIELDS # # Find count of unique values in each column # col.names <- data.frame(Count = apply(.data, 2, function(x) length(unique(x)))) @@ -57,7 +57,7 @@ # row.names(col.names) <- NULL # # Reorder columns # col.names <- col.names[, c(2, 1)] -# +# # # Filter dataframe to include only recommended fields for filtering # # This does not limit the elements that can be used in other filter functions # # This only filters the table output of this function @@ -74,29 +74,29 @@ # "DetectionQuantitationLimitTypeName", # "SampleTissueAnatomyName", "LaboratoryName" # )) -# +# # # Reorder Count column in col.names from largest to smallest number # col.names <- col.names %>% # dplyr::arrange(desc(Count)) -# +# # # CREATE LIST OF UNIQUE VALUES PER FIELDS FROM DATAFRAME -# +# # # remove fields with only NAs from df # df <- .data %>% dplyr::select(where(~ !all(is.na(.x)))) -# +# # # Get list of unique values per field # UniqueValList <- apply(df, 2, function(x) { # data.frame(table(list(x))) # }) -# +# # # Reorder Freq column in each data frame from largest to smallest number # UniqueValList <- lapply(UniqueValList, function(X) { # X[order(-X$Freq), ] # }) -# +# # # Rename fields # UniqueValList <- lapply(UniqueValList, stats::setNames, c("FieldValues", "Count")) -# +# # # Filter list to include only recommended fields for filtering (cm removed 3/23/23) # # UniqueValList <- UniqueValList[c( # # "ActivityTypeCode", "TADA.ActivityMediaName", @@ -111,13 +111,13 @@ # # "DetectionQuantitationLimitTypeName", # # "SampleTissueAnatomyName", "LaboratoryName" # #)] -# +# # TADA.env$UniqueValList <- UniqueValList -# +# # print(col.names) # } -# -# +# +# # # Generate list of unique values in a given field # # # # Function creates a table and pie chart of all unique values @@ -158,34 +158,34 @@ # # # # # Create table and pie chart of "Hydrologic Condition" unique values and counts: # # FieldReview_HydrologicCondition <- FilterFieldReview(field = "HydrologicCondition", Data_Nutrients_UT) -# -# +# +# # FilterFieldReview <- function(field, .data) { # # if provided, check .data is data.frame # if (!missing(.data)) { # TADA_CheckType(.data, "data.frame", "Input object") # } # # execute function after checks are passed -# +# # # refresh UniqueValList # invisible(utils::capture.output(FilterFields(.data))) -# +# # # check that input is in UniqueValList # if (exists(field, TADA.env$UniqueValList) == FALSE) { # stop("The field entered into FilterFieldReview is not populated (all NA) in # the input data frame.") # } -# +# # # subset UniqueValList by input # df <- TADA.env$UniqueValList[[field]] # df$Legend = paste0(df$FieldValues, " - ", df$Count, " results") -# +# # # define number of colors required for pie chart # colorCount <- length(unique(df$FieldValues)) -# +# # # define color palette # getPalette <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(8, "Set2")) -# +# # # create pie chart # # look into fixing display of counts for small slices: # # https://stackoverflow.com/questions/28912059/labels-on-the-pie-chart-for-small-pieces-ggplot @@ -195,13 +195,13 @@ # ggplot2::coord_polar("y", start = 0) + # ggplot2::theme_void() # remove background, grid, numeric labels # # ggplot2::geom_text(ggplot2::aes(label = Count), color = "black", size=4, position = ggplot2::position_stack(vjust = 0.5)) -# +# # print(pie) # print(df) # } -# -# -# +# +# +# # # Generate list of parameters # # # # Function generates list of characteristics in the input dataframe, as well as @@ -219,20 +219,20 @@ # # # each parameter: # # ParameterList <- FilterParList(Data_Nutrients_UT) # # -# +# # FilterParList <- function(.data) { # # count the frequency of each value in TADA.CharactersticName field # ParValueCount <- data.frame(table(list(.data$TADA.CharacteristicName))) -# +# # # Reorder Freq column from largest to smallest number # ParValueCount <- ParValueCount[order(-ParValueCount$Freq), ] -# +# # # Rename fields # ParValueCount <- stats::setNames(ParValueCount, c("TADA.CharacteristicName", # "Count")) # print(ParValueCount) # } -# +# # # Generate list of field names subset by parameter # # # # Function subsets the input dataframe by the input parameter and creates a @@ -286,19 +286,19 @@ # # # Create list of fields for parameter "AMMONIA" with number of unique values in each field: # # AmmoniaFields <- FilterParFields(Data_Nutrients_UT, parameter = "AMMONIA") # # -# +# # FilterParFields <- function(.data, parameter) { # # check .data is data.frame # TADA_CheckType(.data, "data.frame", "Input object") # # check .data has required columns # TADA_CheckColumns(.data, "TADA.CharacteristicName") -# +# # # check parameter is in .data # if ((parameter %in% .data$TADA.CharacteristicName) == FALSE) { # stop("Input parameter for FilterParFields function is not in the # input dataframe.") # } -# +# # # SUBSET DATAFRAME; CREATE LIST OF FIELDS # df <- dplyr::filter(.data, TADA.CharacteristicName %in% parameter) # # Find count of unique values in each column @@ -309,8 +309,8 @@ # row.names(col.names) <- NULL # # Reorder columns # col.names <- col.names[, c(2, 1)] -# -# +# +# # # Filter col.names to include only fields recommended for filtering # # This does not limit the fields than can be entered in other filter # # functions. It simply subsets the output table to a more manageable @@ -336,13 +336,13 @@ # "ResultAnalyticalMethod.MethodIdentifierContext", # "AssemblageSampledName", "DetectionQuantitationLimitTypeName" # )) -# -# +# +# # # Reorder Count column in col.names from largest to smallest number # col.names <- col.names %>% # dplyr::arrange(desc(Count)) -# -# +# +# # # CREATE LIST OF UNIQUE VALUES PER FIELD FROM DATAFRAME # # remove fields with only NAs from df (NA-only fields were removed from WQP, not .data) # df <- df %>% dplyr::select(where(~ !all(is.na(.x)))) @@ -357,7 +357,7 @@ # # Rename fields # ParUniqueValList <- lapply(ParUniqueValList, stats::setNames, # c("FieldValues", "Count")) -# +# # # cm removed so we do not limit users ability to enter other fields # # Filter list to require only these fields for filtering # #ParUniqueValList <- ParUniqueValList[c( @@ -382,11 +382,11 @@ # # "AssemblageSampledName", "DetectionQuantitationLimitTypeName", # # "MonitoringLocationIdentifier" # #)] -# +# # TADA.env$ParUniqueValList <- ParUniqueValList # print(col.names) # } -# +# # # Generate list of unique values in a given field subset by parameter # # # # Function creates a table and pie chart of unique values, and counts of those @@ -438,7 +438,7 @@ # # # Create table and pie chart of monitoring locations for the parameter "AMMONIA" in dataframe: # # AmmoniaMonitoringLocations <- FilterParFieldReview(field = "MonitoringLocationIdentifier", Data_Nutrients_UT, parameter = "AMMONIA") # # -# +# # FilterParFieldReview <- function(field, .data, parameter) { # # if provided, check .data is data.frame # if (!missing(.data)) { @@ -451,31 +451,31 @@ # dataframe.") # } # } -# +# # # execute function after checks are passed -# +# # # refreshes UniqueValList # FilterParList(.data) # FilterParFields(.data, parameter) -# +# # invisible(utils::capture.output(FilterParFields(.data, parameter))) -# +# # # check that input is in ParUniqueValList # if (exists(field, TADA.env$ParUniqueValList) == FALSE) { # stop("The field entered into FilterParFieldReview is NOT available # (or all NA) for the parameter entered in the input dataframe.") # } -# +# # # subset UniqueValList by input # df <- TADA.env$ParUniqueValList[[field]] # df$Legend = paste0(df$FieldValues, " - ", df$Count, " results") -# +# # # define number of colors required for pie chart # colorCount <- length(unique(df$FieldValues)) -# +# # # define color palette # getPalette <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(8, "Set2")) -# +# # # create pie chart # pie <- ggplot2::ggplot(df, ggplot2::aes(x = "", y = Count, fill = Legend)) + # ggplot2::scale_fill_manual(values = getPalette(colorCount), name = field) + @@ -484,11 +484,11 @@ # ggplot2::theme_void() + # ggplot2::labs(title = parameter) #+ # # ggplot2::geom_text(ggplot2::aes(label = Count), color = "black", size=4, position = ggplot2::position_stack(vjust = 0.5)) -# +# # print(pie) # print(df) # } -# +# # # Check for Special Characters in Measure Value Fields - Deprecated # # # # Function checks for special characters and non-numeric values in the @@ -506,29 +506,29 @@ # # DetectionQuantitationLimitMeasure.MeasureValue fields are converted to class # # numeric, and copies of each column are created to preserve original # # character values. -# -# +# +# # MeasureValueSpecialCharacters <- function(.data) { -# +# # warning("This function is deprecated and does not return the correct column names. Please use TADA_ConvertSpecialChars() function instead.") -# +# # # check .data is data.frame # TADA_CheckType(.data, "data.frame", "Input object") -# +# # # .data required columns # required_cols <- c("ResultMeasureValue", "DetectionQuantitationLimitMeasure.MeasureValue") # # check .data has required columns # TADA_CheckColumns(.data, required_cols) -# +# # # execute function after checks are passed # # define check.data # check.data <- .data -# +# # # copy MeasureValue columns to MeasureValue.Original # check.data$ResultMeasureValue.Original <- check.data$ResultMeasureValue # check.data$DetectionLimitMeasureValue.Original <- # check.data$DetectionQuantitationLimitMeasure.MeasureValue -# +# # # add TADA.ResultMeasureValue.Flag column # flag.data <- check.data %>% # # apply function row by row @@ -543,7 +543,7 @@ # (grepl("\\d", ResultMeasureValue.Original) == TRUE) ~ as.character("Numeric"), # TRUE ~ "Coerced to NA" # )) -# +# # # add TADA.DetectionLimitMeasureValue.Flag column # flag.data <- flag.data %>% # # apply function row by row @@ -558,7 +558,7 @@ # (grepl("\\d", DetectionLimitMeasureValue.Original) == TRUE) ~ as.character("Numeric"), # TRUE ~ "Coerced to NA" # )) -# +# # # remove special characters before converting to numeric # flag.data$ResultMeasureValue <- stringr::str_replace_all( # flag.data$ResultMeasureValue, @@ -568,7 +568,7 @@ # flag.data$DetectionQuantitationLimitMeasure.MeasureValue, # c("<" = "", ">" = "", "~" = "", "," = "") # ) -# +# # # change measure value columns to numeric # # rename df # clean.data <- flag.data @@ -579,7 +579,7 @@ # # DetectionQuantitationLimitMeasure.MeasureValue # clean.data$DetectionQuantitationLimitMeasure.MeasureValue <- # suppressWarnings(as.numeric(clean.data$DetectionQuantitationLimitMeasure.MeasureValue)) -# +# # # reorder columns # # place flag column next to relevant fields # clean.data <- clean.data %>% @@ -597,7 +597,7 @@ # ) # return(clean.data) # } -# +# # #Identify Potential Duplicate Data Uploads # # # # Identifies data records uploaded by different organizations with the same date, @@ -616,19 +616,19 @@ # # # # @export # # -# +# # identifyPotentialDuplicates <- function(.data, dist_buffer = 100){ # dat = .data # dups = dat%>%dplyr::filter(!is.na(ResultMeasureValue))%>%dplyr::mutate(roundRV = round(ResultMeasureValue,digits=2))%>%dplyr::group_by(ActivityStartDate, ActivityStartTime.Time, CharacteristicName,ResultMeasureValue)%>%dplyr::summarise(numorgs = length(unique(OrganizationIdentifier)))%>%dplyr::filter(numorgs>1) # dups$dup_id = seq(1:dim(dups)[1]) -# +# # tdups = dplyr::left_join(dups, dat) # tdups$LatitudeMeasure = as.numeric(tdups$LatitudeMeasure) # tdups$LongitudeMeasure = as.numeric(tdups$LongitudeMeasure) -# +# # distances = tdups%>%dplyr::ungroup()%>%dplyr::select(dup_id,LatitudeMeasure,LongitudeMeasure) # dcoords = sf::st_as_sf(x = distances, coords = c("LongitudeMeasure","LatitudeMeasure"), crs="EPSG:4326") -# +# # dists = data.frame() # for(i in 1:max(dcoords$dup_id)){ # ds = subset(dcoords, dcoords$dup_id==i) @@ -637,15 +637,15 @@ # dsdist$TADA.idPotentialDuplicates.Flag = ifelse(dist<=dist_buffer,"POTENTIAL DUPLICATE DATAPOINT",NA) # dists = rbind(dists, dsdist) # } -# +# # tdups1 = merge(tdups, dists, all.x = TRUE) # tdups1 = tdups1[,!names(tdups1)%in%c("dup_id","numorgs")] # dat1 = merge(dat, tdups1, all.x = TRUE) -# +# # return(dat1) # } -# -# +# +# # # AutoFilter # # # # Function can be used to autofilter and simplify a WQP dataframe. @@ -690,9 +690,9 @@ # # } # # } # # -# -# -# +# +# +# # # TADA Profile Check # # # # This function checks if the column names in a dataframe include the TADA @@ -706,7 +706,7 @@ # # @return Boolean result indicating whether or not the input dataframe contains # # all of the TADA profile fields. # # -# +# # TADAprofileCheck <- function(.data) { # TADA.fields <- c( # "OrganizationIdentifier", "OrganizationFormalName", @@ -742,18 +742,18 @@ # "DetectionQuantitationLimitMeasure.MeasureUnitCode", "PreparationStartDate", # "ProviderName", "ActivityStartDateTime", "ActivityEndDateTime" # ) -# +# # if (("data.frame" %in% class(.data)) == FALSE) { # stop("Input object must be of class 'data.frame'") # } -# +# # if (all(TADA.fields %in% colnames(.data)) == TRUE) { # TRUE # } else { # stop("The dataframe does not contain the required fields to use TADA. Use either the full physical/chemical profile downloaded from WQP or download the TADA profile template available on the EPA TADA webpage.") # } # } -# +# # # Check for Potential Duplicates # # # # Sometimes multiple organizations submit the exact same data set to the @@ -795,7 +795,7 @@ # # # # # Flag and review potential duplicate data only: # # PotentialDup_reviewduplicatesonly <- PotentialDuplicateRowID(Data_Nutrients_UT, clean = FALSE, flaggedonly = TRUE) -# +# # PotentialDuplicateRowID <- function(.data, clean = TRUE, flaggedonly = FALSE) { # # check .data is data.frame # TADA_CheckType(.data, "data.frame", "Input object") @@ -815,7 +815,7 @@ # if (clean == TRUE & flaggedonly == TRUE) { # stop("Function not executed because clean and flaggedonly cannot both be TRUE") # } -# +# # # execute function after checks are passed # # get list of field names in .data # field.names <- colnames(.data) @@ -827,10 +827,10 @@ # ) # # create list of fields to check for duplicates across # dupe.fields <- field.names[!field.names %in% excluded.fields] -# +# # # subset list of duplicate rows # dupe.data <- .data[duplicated(.data[dupe.fields]), ] -# +# # # if no potential duplicates are found # if (nrow(dupe.data) == 0) { # if (flaggedonly == FALSE) { @@ -844,16 +844,16 @@ # return(dupe.data) # } # } -# +# # # if potential duplicates are found # if (nrow(dupe.data) != 0) { -# +# # # flag potential duplicates # dupe.data$TADA.PotentialDupRowIDs.Flag <- as.integer(seq_len(nrow(dupe.data))) -# +# # # merge flag column into .data # flag.data <- merge(.data, dupe.data, by = dupe.fields, all.x = TRUE) -# +# # # remove extraneous columns, fix field names # flag.data <- flag.data %>% # # remove ".x" suffix from column names @@ -863,30 +863,30 @@ # ) %>% # # remove columns with ".y" suffix # dplyr::select_at(dplyr::vars(-dplyr::ends_with(".y"))) -# +# # # flagged output, all data # if (clean == FALSE & flaggedonly == FALSE) { # flag.data <- TADA_OrderCols(flag.data) # return(flag.data) # } -# +# # # clean output # if (clean == TRUE & flaggedonly == FALSE) { # # remove duplicate rows # # seperate data into 2 dataframes by TADA.PotentialDupRowIDs.Flag (no NAs and NAs) # dup.data <- flag.data[!is.na(flag.data$TADA.PotentialDupRowIDs.Flag), ] # NAdup.data <- flag.data[is.na(flag.data$TADA.PotentialDupRowIDs.Flag), ] -# +# # nodup.data <- dup.data[!duplicated(dup.data$TADA.PotentialDupRowIDs.Flag), ] -# +# # clean.data <- rbind(nodup.data, NAdup.data) -# +# # # remove TADA.PotentialDupRowID column # clean.data <- dplyr::select(clean.data, -TADA.PotentialDupRowIDs.Flag) # clean.data <- TADA_OrderCols(clean.data) # return(clean.data) # } -# +# # # flagged data, errors only # if (clean == FALSE & flaggedonly == TRUE) { # # filter to show duplicate data only diff --git a/R/UnitConversions.R b/R/UnitConversions.R index 3e93f3b8..c98e7d18 100644 --- a/R/UnitConversions.R +++ b/R/UnitConversions.R @@ -5,7 +5,7 @@ #' that include speciation information and transfers the speciation information #' to the TADA.MethodSpecificationName field. #' -#' This function will ALWAYS add "WQX.ResultMeasureValue.UnitConversion" to the input dataframe. +#' This function will ALWAYS add "TADA.WQXResultUnitConversion" to the input dataframe. #' #' This field indicates if data can be converted."NoResultValue" means data #' cannot be converted because there is no ResultMeasureValue, and "NoTargetUnit" @@ -18,7 +18,7 @@ #' dataframe to perform conversions as necessary when transform = TRUE. #' #' This function adds the following three fields ONLY when transform=FALSE: -#' Adds: "WQX.ConversionFactor", "WQX.TargetUnit", and "USGS.SpeciationConversion". +#' Adds: "TADA.WQXUnitConversionFactor", "TADA.WQXTargetUnit", and "USGS.SpeciationConversion". #' #' @param .data TADA dataframe #' @@ -32,9 +32,9 @@ #' #' When transform = FALSE, result values and units are NOT converted to WQX target units, #' but columns are appended to indicate what the target units and conversion factors are, -#' and if the data can be converted. In addition to "WQX.ResultMeasureValue.UnitConversion" -#' and "WQX.DetectionLimitMeasureValue.UnitConversion", transform=FALSE will add the -#' following two fields to the input dataframe: "WQX.ConversionFactor" and "WQX.TargetUnit". +#' and if the data can be converted. In addition to "TADA.WQXResultUnitConversion" +#' and "TADA.WQXDetectionLimitUnitConversion", transform=FALSE will add the +#' following two fields to the input dataframe: "TADA.WQXUnitConversionFactor" and "TADA.WQXTargetUnit". #' #' @export #' @@ -45,7 +45,7 @@ #' ResultUnitsConverted <- (Data_Nutrients_UT) #' #' # Do not convert result values and units, but add two new columns titled -#' # "WQX.ConversionFactor" and "WQX.TargetUnit": +#' # "TADA.WQXUnitConversionFactor" and "TADA.WQXTargetUnit": #' ResultUnitsNotConverted <- TADA_ConvertResultUnits(Data_Nutrients_UT, transform = FALSE) #' TADA_ConvertResultUnits <- function(.data, transform = TRUE) { @@ -93,8 +93,8 @@ TADA_ConvertResultUnits <- function(.data, transform = TRUE) { # rename columns flag.data <- check.data %>% - dplyr::rename(WQX.TargetUnit = Target.Unit) %>% - dplyr::rename(WQX.ConversionFactor = Conversion.Factor) %>% + dplyr::rename(TADA.WQXTargetUnit = Target.Unit) %>% + dplyr::rename(TADA.WQXUnitConversionFactor = Conversion.Factor) %>% dplyr::rename(USGS.SpeciationConversion = Target.Speciation) # if temp data exists, calculate conversion factor @@ -106,22 +106,22 @@ TADA_ConvertResultUnits <- function(.data, transform = TRUE) { # Calculate deg F and deg C, replace Conversion factor values flag.data <- flag.data %>% # create flag column - dplyr::mutate(WQX.ConversionFactor = dplyr::case_when( + dplyr::mutate(TADA.WQXUnitConversionFactor = dplyr::case_when( TADA.ResultMeasure.MeasureUnitCode == "deg F" ~ as.numeric(((TADA.ResultMeasureValue - 32) * (5 / 9)) / TADA.ResultMeasureValue), TADA.ResultMeasure.MeasureUnitCode == "deg K" ~ as.numeric((TADA.ResultMeasureValue - 273.15) / TADA.ResultMeasureValue), - TRUE ~ WQX.ConversionFactor + TRUE ~ TADA.WQXUnitConversionFactor )) } - # add WQX.ResultMeasureValue.UnitConversion column + # add TADA.WQXResultUnitConversion column flag.data <- flag.data %>% # create flag column - dplyr::mutate(WQX.ResultMeasureValue.UnitConversion = dplyr::case_when( - (!is.na(TADA.ResultMeasureValue) & !is.na(WQX.TargetUnit)) ~ as.character("Convert"), + dplyr::mutate(TADA.WQXResultUnitConversion = dplyr::case_when( + (!is.na(TADA.ResultMeasureValue) & !is.na(TADA.WQXTargetUnit)) ~ as.character("Convert"), is.na(TADA.ResultMeasureValue) ~ as.character("No Result Value"), - is.na(WQX.TargetUnit) ~ as.character("No Target Unit") + is.na(TADA.WQXTargetUnit) ~ as.character("No Target Unit") )) if (transform == FALSE) { @@ -137,17 +137,17 @@ TADA_ConvertResultUnits <- function(.data, transform = TRUE) { # apply conversions where there is a target unit, use original value if no target unit dplyr::mutate(TADA.ResultMeasureValue = dplyr::case_when( is.na(TADA.ResultMeasureValue) ~ TADA.ResultMeasureValue, - !is.na(WQX.TargetUnit) ~ - (TADA.ResultMeasureValue * WQX.ConversionFactor), - is.na(WQX.TargetUnit) ~ TADA.ResultMeasureValue + !is.na(TADA.WQXTargetUnit) ~ + (TADA.ResultMeasureValue * TADA.WQXUnitConversionFactor), + is.na(TADA.WQXTargetUnit) ~ TADA.ResultMeasureValue )) # populate ResultMeasure.MeasureUnitCode clean.data <- clean.data %>% # use target unit where there is a target unit, use original unit if no target unit dplyr::mutate(TADA.ResultMeasure.MeasureUnitCode = dplyr::case_when( - !is.na(WQX.TargetUnit) ~ WQX.TargetUnit, - is.na(WQX.TargetUnit) ~ TADA.ResultMeasure.MeasureUnitCode + !is.na(TADA.WQXTargetUnit) ~ TADA.WQXTargetUnit, + is.na(TADA.WQXTargetUnit) ~ TADA.ResultMeasure.MeasureUnitCode )) # Convert method speciation column for USGS data @@ -158,20 +158,20 @@ TADA_ConvertResultUnits <- function(.data, transform = TRUE) { clean.data$TADA.MethodSpecificationName <- ifelse(!is.na(clean.data$USGS.SpeciationConversion), clean.data$USGS.SpeciationConversion, clean.data$TADA.MethodSpecificationName) - # edit WQX.ResultMeasureValue.UnitConversion column + # edit TADA.WQXResultUnitConversion column clean.data <- clean.data %>% # apply function row by row- EDH - I don't think this is needed (I think default behavior of case_when is row by row)? # dplyr::rowwise() %>% # create flag column - dplyr::mutate(WQX.ResultMeasureValue.UnitConversion = dplyr::case_when( - (WQX.ConversionFactor == 1) ~ as.character("No Conversion Needed"), - (!is.na(TADA.ResultMeasureValue) & !is.na(WQX.TargetUnit)) ~ as.character("Converted"), - TRUE ~ WQX.ResultMeasureValue.UnitConversion + dplyr::mutate(TADA.WQXResultUnitConversion = dplyr::case_when( + (TADA.WQXUnitConversionFactor == 1) ~ as.character("No Conversion Needed"), + (!is.na(TADA.ResultMeasureValue) & !is.na(TADA.WQXTargetUnit)) ~ as.character("Converted"), + TRUE ~ TADA.WQXResultUnitConversion )) # remove extraneous columns, fix field names clean.data <- clean.data %>% - dplyr::select(-c("WQX.ConversionFactor", "WQX.TargetUnit", "USGS.SpeciationConversion")) + dplyr::select(-c("TADA.WQXUnitConversionFactor", "TADA.WQXTargetUnit", "USGS.SpeciationConversion")) # create new comparable data identifier column following conversion clean.data <- TADA_CreateComparableID(clean.data) diff --git a/R/Utilities.R b/R/Utilities.R index 3b89b3a0..3952236c 100644 --- a/R/Utilities.R +++ b/R/Utilities.R @@ -89,7 +89,9 @@ TADA_AutoClean <- function(.data) { required_cols <- c( "ActivityMediaName", "ResultMeasureValue", "ResultMeasure.MeasureUnitCode", "CharacteristicName", "ResultSampleFractionText", "MethodSpecificationName", - "DetectionQuantitationLimitMeasure.MeasureUnitCode", "ResultDetectionConditionText" + "DetectionQuantitationLimitMeasure.MeasureUnitCode", "ResultDetectionConditionText", + "ResultIdentifier", "DetectionQuantitationLimitMeasure.MeasureValue", + "LatitudeMeasure", "LongitudeMeasure" ) # check .data has required columns @@ -516,35 +518,38 @@ TADA_OrderCols <- function(.data) { ) tadacols <- c( - "TADA.LatitudeMeasure", - "TADA.LongitudeMeasure", - "TADA.NearbySiteGroups", - "TADA.InvalidCoordinates.Flag", - "TADA.QAPPDocAvailable", "TADA.ActivityMediaName", "TADA.CharacteristicName", "TADA.CharacteristicNameAssumptions", - "TADA.NutrientSummationGroup", - "TADA.NutrientSummationEquation", - "TADA.AggregatedContinuousData.Flag", "TADA.ResultMeasureValue", - "TADA.ResultValueAggregation.Flag", - "TADA.NutrientSummation.Flag", + "TADA.ResultMeasure.MeasureUnitCode", + "TADA.WQXResultUnitConversion", + "TADA.WQXTargetUnit", + "TADA.WQXUnitConversionFactor", + "TADA.UnitConversionFactor", "TADA.ResultMeasureValueDataTypes.Flag", + "TADA.ResultValueAggregation.Flag", + + "TADA.MeasureQualifierCode.Flag", "TADA.CensoredData.Flag", "TADA.CensoredMethod", - "TADA.ResultMeasure.MeasureUnitCode", - "WQX.TargetUnit", - "WQX.ConversionFactor", - "WQX.ResultMeasureValue.UnitConversion", - "TADA.UnitConversionFactor", - "WQX.DetectionLimitMeasureValue.UnitConversion", - "AboveWQXUpperThreshold", - "BelowWQXLowerThreshold", + + "TADA.NutrientSummation.Flag", + "TADA.NutrientSummationGroup", + "TADA.NutrientSummationEquation", + + "TADA.LatitudeMeasure", + "TADA.LongitudeMeasure", + "TADA.InvalidCoordinates.Flag", + "TADA.NearbySiteGroups", + + "TADA.QAPPDocAvailable", + "TADA.AggregatedContinuousData.Flag", + "TADA.ResultValueAboveUpperThreshold.Flag", + "TADA.ResultUnit.Flag", "CombinationValidity", - "WQX.ResultMeasureValue.UnitConversion", "TADA.MethodSpecificationName", "TADA.AnalyticalMethod.Flag", "TADA.MethodSpeciation.Flag", @@ -970,10 +975,10 @@ TADA_RunKeyFlagFunctions <- function(.data, remove_na = TRUE, clean = TRUE) { } -#' TADA_OvernightTesting -#' -#' @return console inputs and outputs -#' +# TADA_OvernightTesting +# +# @return console inputs and outputs +# # TADA_OvernightTesting <- function(){ # @@ -1106,3 +1111,171 @@ TADA_UpdateExampleData <- function() { save(Data_NCTCShepherdstown_HUC12, file = "inst/extdata/Data_NCTCShepherdstown_HUC12.rda") rm(Data_NCTCShepherdstown_HUC12) } + + + +#' TADA Module 1 Required Fields Check +#' +#' This function checks if all required fields for TADA Module 1 are +#' included in the input dataframe. +#' +#' @param .data A dataframe +#' +#' @return Boolean result indicating whether or not the input dataframe contains +#' all of the TADA profile fields. +#' + +TADA_CheckRequiredFields <- function(.data) { + TADA.fields <- c( + + # consider deleting below from TADA profile + + #"ActivityEndDate", + #"ActivityEndTime.Time", + #"ActivityEndTime.TimeZoneCode", + #"ActivityConductingOrganizationText", + #"SampleAquifer", + #"ActivityLocation.LatitudeMeasure", + #"ActivityLocation.LongitudeMeasure", + #"ResultStatusIdentifier", + #"ResultWeightBasisText", + #"ResultTemperatureBasisText", + #"ResultParticleSizeBasisText", + #"USGSPCode", + #"BinaryObjectFileName", + #"BinaryObjectFileTypeCode", + #"ResultFileUrl", + #"AnalysisStartDate", + #"ResultDetectionQuantitationLimitUrl", + #"LabSamplePreparationUrl", + #"timeZoneStart", + #"timeZoneEnd", + #"ActivityEndDateTime", + #"SourceMapScaleNumeric", + #"HorizontalAccuracyMeasure.MeasureValue", + #"HorizontalAccuracyMeasure.MeasureUnitCode", + #"HorizontalCollectionMethodName", + #"HorizontalCoordinateReferenceSystemDatumName", + #"VerticalMeasure.MeasureValue", + #"VerticalMeasure.MeasureUnitCode", + #"VerticalAccuracyMeasure.MeasureValue", + #"VerticalAccuracyMeasure.MeasureUnitCode", + #"VerticalCollectionMethodName", + #"VerticalCoordinateReferenceSystemDatumName", + #"AquiferName", + #"LocalAqfrName", + #"FormationTypeText", + #"ProjectMonitoringLocationWeightingUrl", + #"DrainageAreaMeasure.MeasureValue", + #"DrainageAreaMeasure.MeasureUnitCode", + #"ContributingDrainageAreaMeasure.MeasureValue", + #"ContributingDrainageAreaMeasure.MeasureUnitCode", + + # carried through but are not currently needed to run functions, + # with the EXCEPTION of filtering + "ProjectDescriptionText", + "SamplingDesignTypeCode", + "ActivityStartDate", + "ActivityStartTime.Time", + "ActivityStartTime.TimeZoneCode", + "ResultDepthAltitudeReferencePointText", + "ActivityDepthAltitudeReferencePointText", + "ProjectName", + "ActivityCommentText", + "HydrologicCondition", + "HydrologicEvent", + "MonitoringLocationName", + "SampleCollectionMethod.MethodIdentifier", + "SampleCollectionMethod.MethodIdentifierContext", + "SampleCollectionMethod.MethodName", + "SampleCollectionMethod.MethodDescriptionText", + "ActivityMediaSubdivisionName", + "DataQuality.PrecisionValue", + "DataQuality.BiasValue", + "DataQuality.ConfidenceIntervalValue", + "DataQuality.UpperConfidenceLimitValue", + "DataQuality.LowerConfidenceLimitValue", + "SubjectTaxonomicName", + "SampleTissueAnatomyName", + "ResultAnalyticalMethod.MethodIdentifier", + "ResultAnalyticalMethod.MethodIdentifierContext", + "ResultAnalyticalMethod.MethodName", + "ResultAnalyticalMethod.MethodUrl", + "ResultAnalyticalMethod.MethodDescriptionText", + "ResultCommentText", + "LaboratoryName", + "ResultLaboratoryCommentText", + "MonitoringLocationDescriptionText", + "HUCEightDigitCode", + "AquiferTypeName", # can be used to remove groundwater sites + "ConstructionDateText", # can be used to remove groundwater sites + "WellDepthMeasure.MeasureValue", # can be used to remove groundwater sites + "WellDepthMeasure.MeasureUnitCode", # can be used to remove groundwater sites + "WellHoleDepthMeasure.MeasureValue", # can be used to remove groundwater sites + "WellHoleDepthMeasure.MeasureUnitCode", # can be used to remove groundwater sites + "ProviderName", + "LastUpdated", + + # required + "TADA.CharacteristicName", + "TADA.ResultSampleFractionText", + "TADA.MethodSpecificationName", + "TADA.ResultMeasure.MeasureUnitCode", + "TADA.ActivityMediaName", + "TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode", + "TADA.ResultMeasureValueDataTypes.Flag", + "TADA.LatitudeMeasure", + "TADA.LongitudeMeasure", + "OrganizationFormalName", + "ActivityTypeCode", + "ActivityMediaName", + "MonitoringLocationTypeName", + "ActivityStartDateTime", + "CharacteristicName", + "ResultSampleFractionText", + "MethodSpecificationName", + "ResultMeasureValue", + "ResultMeasure.MeasureUnitCode", + "ResultDetectionConditionText", + "DetectionQuantitationLimitTypeName", + "DetectionQuantitationLimitMeasure.MeasureValue", + "DetectionQuantitationLimitMeasure.MeasureUnitCode", + "ResultDepthHeightMeasure.MeasureValue", + "ResultDepthHeightMeasure.MeasureUnitCode", + "ActivityRelativeDepthName", + "ActivityDepthHeightMeasure.MeasureValue", + "ActivityDepthHeightMeasure.MeasureUnitCode", + "ActivityTopDepthHeightMeasure.MeasureValue", + "ActivityTopDepthHeightMeasure.MeasureUnitCode", + "ActivityBottomDepthHeightMeasure.MeasureValue", + "ActivityBottomDepthHeightMeasure.MeasureUnitCode", + "CountryCode", + "StateCode", + "CountyCode", + "LatitudeMeasure", + "LongitudeMeasure", + "QAPPApprovedIndicator", + "QAPPApprovalAgencyName", + "ProjectFileUrl", + "MeasureQualifierCode", + "SampleCollectionEquipmentName", #required for continuous flag + "StatisticalBaseCode", #required for continuous flag + "ResultTimeBasisText", #required for continuous flag + "ResultValueTypeName", #required for continuous flag + "ActivityIdentifier", + "ProjectIdentifier", + "MonitoringLocationIdentifier", + "ResultIdentifier", + "OrganizationIdentifier" + ) + + if (("data.frame" %in% class(.data)) == FALSE) { + stop("Input object must be of class 'data.frame'") + } + + if (all(TADA.fields %in% colnames(.data)) == TRUE) { + TRUE + } else { + stop("The dataframe does not contain the required fields to use TADA Module 1.") + } +} diff --git a/man/TADA_CheckRequiredFields.Rd b/man/TADA_CheckRequiredFields.Rd new file mode 100644 index 00000000..3f83a20b --- /dev/null +++ b/man/TADA_CheckRequiredFields.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/Utilities.R +\name{TADA_CheckRequiredFields} +\alias{TADA_CheckRequiredFields} +\title{TADA Module 1 Required Fields Check} +\usage{ +TADA_CheckRequiredFields(.data) +} +\arguments{ +\item{.data}{A dataframe} +} +\value{ +Boolean result indicating whether or not the input dataframe contains +all of the TADA profile fields. +} +\description{ +This function checks if all required fields for TADA Module 1 are +included in the input dataframe. +} diff --git a/man/TADA_ConvertResultUnits.Rd b/man/TADA_ConvertResultUnits.Rd index 4a2dfece..e3d9e9c7 100644 --- a/man/TADA_ConvertResultUnits.Rd +++ b/man/TADA_ConvertResultUnits.Rd @@ -20,9 +20,9 @@ respective values within the "TADA.ResultMeasureValue" field. When transform = FALSE, result values and units are NOT converted to WQX target units, but columns are appended to indicate what the target units and conversion factors are, -and if the data can be converted. In addition to "WQX.ResultMeasureValue.UnitConversion" -and "WQX.DetectionLimitMeasureValue.UnitConversion", transform=FALSE will add the -following two fields to the input dataframe: "WQX.ConversionFactor" and "WQX.TargetUnit". +and if the data can be converted. In addition to "TADA.WQXResultUnitConversion" +and "TADA.WQXDetectionLimitUnitConversion", transform=FALSE will add the +following two fields to the input dataframe: "TADA.WQXUnitConversionFactor" and "TADA.WQXTargetUnit". } \description{ This function compares measure units in the input data to the Water Quality @@ -31,7 +31,7 @@ that include speciation information and transfers the speciation information to the TADA.MethodSpecificationName field. } \details{ -This function will ALWAYS add "WQX.ResultMeasureValue.UnitConversion" to the input dataframe. +This function will ALWAYS add "TADA.WQXResultUnitConversion" to the input dataframe. This field indicates if data can be converted."NoResultValue" means data cannot be converted because there is no ResultMeasureValue, and "NoTargetUnit" @@ -44,7 +44,7 @@ It also uses the"TADA.ResultMeasureValue" and dataframe to perform conversions as necessary when transform = TRUE. This function adds the following three fields ONLY when transform=FALSE: -Adds: "WQX.ConversionFactor", "WQX.TargetUnit", and "USGS.SpeciationConversion". +Adds: "TADA.WQXUnitConversionFactor", "TADA.WQXTargetUnit", and "USGS.SpeciationConversion". } \examples{ # Load example dataset: @@ -53,7 +53,7 @@ data(Data_Nutrients_UT) ResultUnitsConverted <- (Data_Nutrients_UT) # Do not convert result values and units, but add two new columns titled -# "WQX.ConversionFactor" and "WQX.TargetUnit": +# "TADA.WQXUnitConversionFactor" and "TADA.WQXTargetUnit": ResultUnitsNotConverted <- TADA_ConvertResultUnits(Data_Nutrients_UT, transform = FALSE) } diff --git a/man/TADA_UpdateExampleData.Rd b/man/TADA_UpdateExampleData.Rd deleted file mode 100644 index b63122f8..00000000 --- a/man/TADA_UpdateExampleData.Rd +++ /dev/null @@ -1,14 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/Utilities.R -\name{TADA_UpdateExampleData} -\alias{TADA_UpdateExampleData} -\title{TADA_OvernightTesting} -\usage{ -TADA_UpdateExampleData() -} -\value{ -console inputs and outputs -} -\description{ -TADA_OvernightTesting -} diff --git a/tests/testthat/test-DataDiscoveryRetrieval.R b/tests/testthat/test-DataDiscoveryRetrieval.R index 90f7c58c..08802300 100644 --- a/tests/testthat/test-DataDiscoveryRetrieval.R +++ b/tests/testthat/test-DataDiscoveryRetrieval.R @@ -136,7 +136,7 @@ test_that("TADA_DataRetrieval", { ) %in% names(tada1))) }) -# testing that "meters" is successfully replaced with "m". This feature is part of the TADA_AutoClean function +# testing that NWIS USGS only domain value "meters" is successfully replaced with "m". This feature is part of the TADA_AutoClean function # which runs automatically when TADA_DataRetrieval runs test_that("TADA_DataRetrieval", { check_autoclean_meters_works <- TADA_DataRetrieval( diff --git a/vignettes/TADAModule1.Rmd b/vignettes/TADAModule1.Rmd index ff2e738c..7e84781e 100644 --- a/vignettes/TADAModule1.Rmd +++ b/vignettes/TADAModule1.Rmd @@ -78,7 +78,7 @@ remotes::install_github("USEPA/TADA", # when developing the package, update this chunk to the current repository, so it runs with all of the new features prior to a PR to develop remotes::install_github("USEPA/TADA", - ref = "figures", + ref = "081023-cm", dependencies = TRUE ) ```