Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#346 - Add tribal boundaries to overview map #405

Merged
merged 12 commits into from
Feb 22, 2024
11 changes: 9 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@ Package: TADA
Type: Package
Title: TADA (Tools for Automated Data Analysis) R Package
Version: 0.0.1
Organization: U.S. Environmental Protection Agency
Authors@R:
c(person(given = "Cristina",
c(person(given = "U.S. Environmental Protection Agency",
role = "aut"),
person(given = "Cristina",
family = "Mullin",
role = c("aut", "cre"),
email = "[email protected]",
Expand All @@ -15,6 +16,9 @@ Authors@R:
person(given = "Elise",
family = "Hinman",
role = "aut"),
person(given = "Hillary",
family = "Marler",
role = "aut"),
person(given = "Kathleen",
family = "Healy",
role = "aut"),
Expand All @@ -24,6 +28,9 @@ Authors@R:
person(given = "Laura",
family = "Decicco",
role = "ctb"),
person(given = "Renae",
family = "Myers",
role = "aut"),
person(given = "Brad",
family = "Cooper",
role = "ctr"),
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,6 @@ export(TADA_Stats)
export(TADA_SubstituteDeprecatedChars)
export(TADA_SummarizeColumn)
export(TADA_TwoCharacteristicScatterplot)
export(TADA_addPoints)
export(TADA_addPolys)
importFrom(magrittr,"%>%")
35 changes: 30 additions & 5 deletions R/Figures.R
Original file line number Diff line number Diff line change
Expand Up @@ -369,8 +369,8 @@ TADA_Histogram <- function(.data, id_cols = c("TADA.ComparableDataIdentifier"))
#' @return A leaflet map that shows all sites in the data frame, where larger point sizes
#' indicate more results collected at a site, and darker point colors indicate more
#' characteristics measured at that site. Users can click on points on the map to see
#' a pop-up window with exact counts for measurements, visits, and characteristics
#' associated with each site.
#' a pop-up window with exact counts for measurements (i.e. number of rows),
#' visits (number of unique Activity ID's), and characteristics associated with each site.
#'
#' @export
#'
Expand All @@ -395,12 +395,11 @@ TADA_OverviewMap <- function(.data) {
return(leaflet::addLegend(map, colors = colorAdditions, labels = labelAdditions, opacity = opacity, title = "Measurements"))
}


sumdat <- .data %>%
dplyr::group_by(MonitoringLocationIdentifier, MonitoringLocationName, TADA.LatitudeMeasure, TADA.LongitudeMeasure) %>%
dplyr::summarise("Sample_Count" = length(unique(ResultIdentifier)), "Visit_Count" = length(unique(ActivityStartDate)), "Parameter_Count" = length(unique(TADA.CharacteristicName)), "Organization_Count" = length(unique(OrganizationIdentifier)))

pt_sizes <- round(quantile(sumdat$Sample_Count, probs = c(0.1, 0.25, 0.5, 0.75)), 0)
pt_sizes <- round(stats::quantile(sumdat$Sample_Count, probs = c(0.1, 0.25, 0.5, 0.75)), 0)
pt_labels <- c(
paste0("<=", pt_sizes[1]),
paste0(">", pt_sizes[1]),
Expand All @@ -424,10 +423,25 @@ TADA_OverviewMap <- function(.data) {
domain = sumdat$Parameter_Count
)

# Tribal layers will load by default in the overview map, restricted by the bounding box of the current dataset
# They can be toggled on and off using a button (all layers work together and can't be turned on/off individually).
# Colors and icons are as discussed previously (orange/tan colors and open triangle icons for points) but can be changed to match HMW if desired.
bbox <- sf::st_bbox(
c(
xmin = min(sumdat$TADA.LongitudeMeasure),
ymin = min(sumdat$TADA.LatitudeMeasure),
xmax = max(sumdat$TADA.LongitudeMeasure),
ymax = max(sumdat$TADA.LatitudeMeasure)
),
crs = sf::st_crs(sumdat)
)
vbbox <- bbox %>%
as.vector()

map <- leaflet::leaflet() %>%
leaflet::addProviderTiles("Esri.WorldTopoMap", group = "World topo", options = leaflet::providerTileOptions(updateWhenZooming = FALSE, updateWhenIdle = TRUE)) %>%
leaflet::clearShapes() %>% # get rid of whatever was there before if loading a second dataset
leaflet::fitBounds(lng1 = min(sumdat$TADA.LongitudeMeasure), lat1 = min(sumdat$TADA.LatitudeMeasure), lng2 = max(sumdat$TADA.LongitudeMeasure), lat2 = max(sumdat$TADA.LatitudeMeasure)) %>% # fit to bounds of data in tadat$raw
leaflet::fitBounds(lng1 = vbbox[1], lat1 = vbbox[2], lng2 = vbbox[3], lat2 = vbbox[4]) %>% # fit to bounds of data in tadat$raw
leaflet.extras::addResetMapButton() %>% # button to reset to initial zoom and lat/long
leaflet::addCircleMarkers(
data = sumdat,
Expand Down Expand Up @@ -457,6 +471,17 @@ TADA_OverviewMap <- function(.data) {
colors = "black",
labels = site_legend$Sample_n, sizes = site_legend$Point_size * 2
)
# TADA_addPolys and TADA_addPoints are in Utilities.R
map <- TADA_addPolys(map, AKAllotmentsUrl, "Tribes", "Alaska Allotments", bbox)
map <- TADA_addPolys(map, AmericanIndianUrl, "Tribes", "American Indian", bbox)
map <- TADA_addPolys(map, OffReservationUrl, "Tribes", "Off Reservation", bbox)
map <- TADA_addPolys(map, OKTribeUrl, "Tribes", "Oklahoma Tribe", bbox)
map <- TADA_addPoints(map, AKVillagesUrl, "Tribes", "Alaska Native Villages", bbox)
map <- TADA_addPoints(map, VATribeUrl, "Tribes", "Virginia Tribe", bbox)
map <- leaflet::addLayersControl(map,
overlayGroups = c("Tribes"),
options = leaflet::layersControlOptions(collapsed = FALSE)
)
return(map)
})
}
Expand Down
181 changes: 88 additions & 93 deletions R/Filtering.R
Original file line number Diff line number Diff line change
Expand Up @@ -244,193 +244,188 @@ TADA_FieldValuesTable <- function(.data, field = "null", characteristicName = "n
#' TADA_AnalysisDataFilter
#'
#' This function will filter the data set and retain only the media types
#' selected by the user. It uses ActivityMediaSubdivisionName, AquiferName,
#' selected by the user. It uses ActivityMediaSubdivisionName, AquiferName,
#' LocalAqfrName,ConstructionDateText, WellDepthMeasure.MeasureValue,
#' WellDepthMeasure.MeasureUnitCode, WellHoleDepthMeasure.MeasureValue, and
#' WellHoleDepthMeasure.MeasureUnitCode to identify groundwater samples. Users
#' can select whether sediment, fish tissue and/or surface water should be included. in the data set. An
#' An additional column, TADA.AssessmentData.Flag, specifies whether each row should
#' WellHoleDepthMeasure.MeasureUnitCode to identify groundwater samples. Users
#' can select whether sediment, fish tissue and/or surface water should be included. in the data set. An
#' An additional column, TADA.AssessmentData.Flag, specifies whether each row should
#' be included in the assessment workflow and why. Setting clean = TRUE, means
#' that all results not flagged for use in assessment workflow will be removed
#' that all results not flagged for use in assessment workflow will be removed
#' and the TADA.AssessmentData.Flag column will not be added.
#'
#'
#' *Need to add fish tissue to this function once new WQX profiles are available.
#' (HRM, 1/22/4)
#'
#'
#' @param .data A TADA profile object
#'
#'
#' @param clean Boolean argument; removes all results not flagged for use in
#' assessment workflow. TADA.Media.Flag and TADA.AssessmentData.Flag
#' assessment workflow. TADA.Media.Flag and TADA.AssessmentData.Flag
#' columns will not be added Default is clean = TRUE.
#'
#'
#' @param surface_water Boolean argument; specifies whether surface water
#' results should be included in the returned data frame. Default is
#' results should be included in the returned data frame. Default is
#' surface_water = TRUE, surface water samples are retained in the data frame.
#'
#'
#' @param ground_water Boolean argument; specifies whether ground water
#' results should be included in the returned data frame. Default is
#' ground_water = FALSE, ground water samples are not retained in the data
#' results should be included in the returned data frame. Default is
#' ground_water = FALSE, ground water samples are not retained in the data
#' frame.
#'
#' @param sediment Boolean argument; specifies whether sediment results should
#' be included in the returned data frame. Default is sediment = FALSE,
#'
#' @param sediment Boolean argument; specifies whether sediment results should
#' be included in the returned data frame. Default is sediment = FALSE,
#' sediment samples are not retained in the data frame.
#'
#'
#' @return If clean = TRUE, returns the data frame with only the media types
#' selected by the user. If clean = FALSE, returns the data frame with two
#' additional columns, "TADA.Media.Flag" and "TADA.AssessmentData.Flag",
#' indicating which results should be excluded from assessments based on user
#' input.
#'
#'
#' @export
#'
#'
#' @examples
#' # Return data frame with only surface water results
#' data(Data_6Tribes_5y_Harmonized)
#' Data_6Tribes_Assessment <- TADA_AnalysisDataFilter(Data_6Tribes_5y_Harmonized)
#'
#'
#' # Return data frame with surface water results and TADA.UseForAnalysis.Flag column
#' Data_6Tribes_Assessment <- TADA_AnalysisDataFilter(Data_6Tribes_5y_Harmonized, clean = FALSE)

TADA_AnalysisDataFilter <- function(.data,
clean = TRUE,
surface_water = TRUE,
ground_water = FALSE,
sediment = FALSE) {

TADA_AnalysisDataFilter <- function(.data,
clean = TRUE,
surface_water = TRUE,
ground_water = FALSE,
sediment = FALSE) {
# import MonitoringLocationTypeNames and TADA.Media.Flags
sw.sitetypes <- utils::read.csv(system.file("extdata", "WQXMonitoringLocationTypeNameRef.csv", package = "TADA")) %>%
dplyr::select(Name, TADA.Media.Flag) %>%
dplyr::rename(ML.Media.Flag = TADA.Media.Flag,
MonitoringLocationTypeName = Name)


dplyr::rename(
ML.Media.Flag = TADA.Media.Flag,
MonitoringLocationTypeName = Name
)


# add TADA.Media.Flag column
.data <- .data %>%
# identify TADA.Media.Flag using ActivityMediaSubdivisionName and columns related to groundwater
dplyr::mutate(TADA.Media.Flag = dplyr::case_when(ActivityMediaSubdivisionName == "Groundwater" ~ "Groundwater",
!is.na(AquiferName) |
!is.na(AquiferTypeName) |
!is.na(LocalAqfrName) |
!is.na(ConstructionDateText) |
!is.na(WellDepthMeasure.MeasureValue) |
!is.na(WellDepthMeasure.MeasureUnitCode) |
!is.na(WellHoleDepthMeasure.MeasureValue) |
!is.na(WellHoleDepthMeasure.MeasureUnitCode) ~ "Groundwater",
ActivityMediaSubdivisionName == "Surface Water" ~ "Surface Water",
!ActivityMediaName %in% c("WATER", "Water", "water") ~ ActivityMediaName)) %>%
dplyr::mutate(TADA.Media.Flag = dplyr::case_when(
ActivityMediaSubdivisionName == "Groundwater" ~ "Groundwater",
!is.na(AquiferName) |
!is.na(AquiferTypeName) |
!is.na(LocalAqfrName) |
!is.na(ConstructionDateText) |
!is.na(WellDepthMeasure.MeasureValue) |
!is.na(WellDepthMeasure.MeasureUnitCode) |
!is.na(WellHoleDepthMeasure.MeasureValue) |
!is.na(WellHoleDepthMeasure.MeasureUnitCode) ~ "Groundwater",
ActivityMediaSubdivisionName == "Surface Water" ~ "Surface Water",
!ActivityMediaName %in% c("WATER", "Water", "water") ~ ActivityMediaName
)) %>%
# add TADA.Media.Flag for additional rows based on MonitoringLocationTypeName
dplyr::left_join(sw.sitetypes, by = "MonitoringLocationTypeName") %>%
dplyr::mutate(TADA.Media.Flag = ifelse(is.na(TADA.Media.Flag),
ML.Media.Flag, TADA.Media.Flag)) %>%
ML.Media.Flag, TADA.Media.Flag
)) %>%
dplyr::select(-ML.Media.Flag)

print("TADA_AnalysisDataFilter: Identifying groundwater results.")

{ if (surface_water == TRUE)

sur.water.data <- .data %>%
dplyr::filter(TADA.Media.Flag == "Surface Water") %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = "Yes")


{
if (surface_water == TRUE) {
sur.water.data <- .data %>%
dplyr::filter(TADA.Media.Flag == "Surface Water") %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = "Yes")
}

print("TADA_AnalysisDataFilter: Flagging surface water results to include in assessments.")

}

{
if (surface_water == FALSE) {
sur.water.data <- .data %>%
dplyr::filter(TADA.Media.Flag == "Surface Water") %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = "No")
}

{ if (surface_water == FALSE)

sur.water.data <- .data %>%
dplyr::filter(TADA.Media.Flag == "Surface Water") %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = "No")


print("TADA_AnalysisDataFilter: Flagging surface water results to exclude from assessments.")

}


if (ground_water == TRUE) {

gr.water.data <- .data %>%
dplyr::filter(TADA.Media.Flag == "Groundwater") %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = "Yes")

print("TADA_AnalysisDataFilter: Flagging groundwater results to include in assessments.")


}

if (ground_water == FALSE) {
gr.water.data <- .data %>%
dplyr::filter(TADA.Media.Flag == "Groundwater") %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = "No")

print("TADA_AnalysisDataFilter: Flagging groundwater results to exclude from assessments.")

}
if (sediment == TRUE) {
sed.data <- .data %>%
dplyr::filter(ActivityMediaName %in% c("SEDIMENT", "Sediment", "sediment")) %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = "Yes")

print("TADA_AnalysisDataFilter: Flagging sediment results to include in assessments.")

}

if (sediment == FALSE) {
sed.data <- .data %>%
dplyr::filter(ActivityMediaName %in% c("SEDIMENT", "Sediment", "sediment")) %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = "No")

print("TADA_AnalysisDataFilter: Flagging sediment results to exclude from assessments.")

}

if (clean == TRUE) {

assessment.data <- sur.water.data %>%
suppressMessages(dplyr::full_join(gr.water.data)) %>%
suppressMessages(dplyr::full_join(sed.data)) %>%
dplyr::filter(TADA.UseForAnalysis.Flag == "Yes") %>%
dplyr::select(-TADA.UseForAnalysis.Flag, -TADA.Media.Flag) %>%
TADA_OrderCols()

rm(sur.water.data, gr.water.data, sed.data)

print("TADA_AnalysisDataFilter: Removing results flagged for exclusion from assessments.")

return(assessment.data)


}

if (clean == FALSE) {

assessment.data <- sur.water.data %>%
suppressMessages(dplyr::full_join(gr.water.data)) %>%
suppressMessages(dplyr::full_join(sed.data)) %>%
dplyr::mutate(TADA.UseForAnalysis.Flag = paste(TADA.UseForAnalysis.Flag, " - ", toupper(TADA.Media.Flag), sep = ""))

assessment.list <- assessment.data %>%
dplyr::select(ResultIdentifier) %>%
dplyr::pull()

other.data <- .data %>%
dplyr::filter(!ResultIdentifier %in% assessment.list) %>%
dplyr::mutate(TADA.Media.Flag = ifelse(TADA.Media.Flag == "" | is.na(TADA.Media.Flag), "OTHER", TADA.Media.Flag),
TADA.UseForAnalysis.Flag = "No",
TADA.UseForAnalysis.Flag = paste(TADA.UseForAnalysis.Flag, " - ", toupper(TADA.Media.Flag), sep = ""))

dplyr::mutate(
TADA.Media.Flag = ifelse(TADA.Media.Flag == "" | is.na(TADA.Media.Flag), "OTHER", TADA.Media.Flag),
TADA.UseForAnalysis.Flag = "No",
TADA.UseForAnalysis.Flag = paste(TADA.UseForAnalysis.Flag, " - ", toupper(TADA.Media.Flag), sep = "")
)

all.data <- assessment.data %>%
suppressMessages(dplyr::full_join(other.data)) %>%
dplyr::select(-TADA.Media.Flag) %>%
TADA_OrderCols()


rm(sur.water.data, gr.water.data, sed.data, assessment.data, assessment.list)

print("TADA_AnalysisDataFilter: Returning all results with TADA.UseForAnalysis.Flag column indicating if result should be used for assessments.")

return(all.data)
}
}
Loading
Loading