Skip to content

Commit

Permalink
Update UnitConversions.R
Browse files Browse the repository at this point in the history
Styler  updates
  • Loading branch information
hillarymarler committed Aug 28, 2024
1 parent df265f6 commit cd42f7a
Showing 1 changed file with 83 additions and 78 deletions.
161 changes: 83 additions & 78 deletions R/UnitConversions.R
Original file line number Diff line number Diff line change
Expand Up @@ -303,59 +303,59 @@ TADA_ConvertResultUnits <- function(.data, ref = "tada", transform = TRUE) {
# create uppercase version of original CharacteristicName
.data$TADA.CharacteristicName <- toupper(.data$CharacteristicName)
}

if ("TADA.ActivityMediaName" %in% colnames(.data)) {
.data <- .data
} else {
# create uppercase version of original ActivityMediaName
.data$TADA.ActivityMediaName <- toupper(.data$ActivityMediaName)
}
}

if ("TADA.MethodSpeciationName" %in% colnames(.data)) {
.data <- .data
} else {
# create uppercase version of original MethodSpeciationName
.data$TADA.MethodSpeciationName <- toupper(.data$MethodSpeciationName)
}

if ("TADA.ResultSampleFractionText" %in% colnames(.data)) {
.data <- .data
} else {
# create uppercase version of original ResultSampleFractionText
.data$TADA.ResultSampleFractionText <- toupper(.data$ResultSampleFractionText)
}

if ("TADA.ResultMeasureValue" %in% colnames(.data)) {
.data <- .data
} else {
# run TADA_ConvertSpecialChars function
.data <- TADA_ConvertSpecialChars(.data, "ResultMeasureValue")
}
}

if ("TADA.ResultMeasure.MeasureUnitCode" %in% colnames(.data)) {
.data <- .data
} else {
# create uppercase version of original ResultMeasure.MeasureUnitCode
.data$TADA.ResultMeasure.MeasureUnitCode <- toupper(.data$ResultMeasure.MeasureUnitCode)
}
}

if ("TADA.DetectionQuantitationLimitMeasure.MeasureValue" %in% colnames(.data)) {
.data <- .data
} else {
# run TADA_ConvertSpecialChars function
.data <- TADA_ConvertSpecialChars(.data, "DetectionQuantitationLimitMeasure.MeasureValue")
}
}

if ("TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode" %in% colnames(.data)) {
.data <- .data
} else {
# create uppercase version of original DetectionQuantitationLimitMeasure.MeasureUnitCode
.data$TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode <-
toupper(.data$DetectionQuantitationLimitMeasure.MeasureUnitCode)
}
}

expected_cols <- c(
"TADA.CharacteristicName",
"TADA.CharacteristicName",
"TADA.ActivityMediaName",
"TADA.MethodSpeciationName",
"TADA.ResultSampleFractionText",
Expand All @@ -364,14 +364,14 @@ TADA_ConvertResultUnits <- function(.data, ref = "tada", transform = TRUE) {
"TADA.DetectionQuantitationLimitMeasure.MeasureValue",
"TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode"
)

TADA_CheckColumns(.data, expected_cols)

# execute function after checks are passed

# import USGS ref for method speciation
usgs.ref <- TADA_GetUSGSSynonymRef()

usgs.spec <- usgs.ref %>%
dplyr::rename(
ResultMeasure.MeasureUnitCode = Code,
Expand All @@ -383,7 +383,7 @@ TADA_ConvertResultUnits <- function(.data, ref = "tada", transform = TRUE) {
TADA.Target.MethodSpeciationName = toupper(TADA.Target.MethodSpeciationName)
) %>%
dplyr::select(ResultMeasure.MeasureUnitCode, TADA.Target.MethodSpeciationName)

usgs.unit <- usgs.ref %>%
dplyr::select(Code, CodeNoSpeciation) %>%
dplyr::mutate(
Expand All @@ -392,47 +392,47 @@ TADA_ConvertResultUnits <- function(.data, ref = "tada", transform = TRUE) {
) %>%
dplyr::select(-Code) %>%
dplyr::rename(MeasureUnitCode.match = CodeNoSpeciation)


# if user supplied unit reference was provided
if (is.data.frame(ref)) {
# required columns
expected_ref_cols <- c(
"TADA.CharacteristicName",
"TADA.CharacteristicName",
"TADA.ResultMeasure.MeasureUnitCode",
"TADA.Target.ResultMeasure.MeasureUnitCode",
"TADA.WQXUnitConversionFactor",
"TADA.WQXUnitConversionFactor",
"TADA.WQXUnitConversionCoefficient"
)

if (all(expected_ref_cols %in% colnames(ref)) == FALSE) {
stop("The reference data frame does not contain all fields required for TADA_ConvertResultUnits. Use TADA_CreateUnitRef with the TADA data frame to create an editable unit reference table with all required columns.")

if (all(expected_ref_cols %in% colnames(ref)) == TRUE) {
print("The reference data frame contains all fields required for TADA_ConvertResultUnits.")
}
}

# join USGS ref for method speciation name information
unit.ref <- ref %>%
dplyr::left_join(usgs.spec, by = dplyr::join_by(ResultMeasure.MeasureUnitCode)) %>%
dplyr::distinct()

# create message to inform users if user-supplied unit reference contains all combinations present in TADA data frame
# create list of unique characteristic and unit combinations in data
tada.list <- TADA_CreateUnitRef(.data) %>%
dplyr::select(TADA.CharacteristicName, TADA.ResultMeasure.MeasureUnitCode) %>%
dplyr::distinct()

# create list of unique characteristic and unit combinations in user-supplied unit ref
user.list <- unit.ref %>%
dplyr::select(TADA.CharacteristicName, TADA.ResultMeasure.MeasureUnitCode) %>%
dplyr::distinct()

# compare the unique characteristic/unit combinations in data nd unit ref
compare.ref <- tada.list %>%
dplyr::anti_join(user.list, by = c("TADA.CharacteristicName", "TADA.ResultMeasure.MeasureUnitCode"))

# if no difference between the two, print message that all combinations are present in unit ref
if (nrow(compare.ref) == 0) {
print("All CharacteristicName/Unit combinations in the TADA dataframe are represented in user-supplied unit reference.")
Expand All @@ -444,33 +444,35 @@ TADA_ConvertResultUnits <- function(.data, ref = "tada", transform = TRUE) {
dplyr::select(CombList) %>%
dplyr::distinct() %>%
stringi::stri_replace_last(fixed = ",", " and")

print(paste("TADA_ConvertResultUnits: The following CharacteristicName and ResultMeasure.MeasureUnitCode combinations are not included in the user-supplied unit reference data frame: ",
compare.list,
". Consider revising the user-supplied unit reference data frame and running TADA_ConvertResultUnits again.",
sep = ""
compare.list,
". Consider revising the user-supplied unit reference data frame and running TADA_ConvertResultUnits again.",
sep = ""
))
}
}

# if user did not provide a data frame
if (!is.data.frame(ref)) {
# if no unit reference df was provided by user or user input was "tada"
if (ref == "tada") {
unit.ref <- TADA_CreateUnitRef(.data, print.message = FALSE)

unit.ref <- unit.ref %>%
dplyr::left_join(usgs.spec, by = dplyr::join_by(ResultMeasure.MeasureUnitCode)) %>%
dplyr::distinct()
}

if (ref == "wqx") {
# import WQX unit ref
wqx.ref <- TADA_GetMeasureUnitRef()

wqx.ref <- wqx.ref %>%
dplyr::mutate(Target.Unit = toupper(Target.Unit),
Code = toupper(Code)) %>%
dplyr::mutate(
Target.Unit = toupper(Target.Unit),
Code = toupper(Code)
) %>%
dplyr::rename(
MeasureUnitCode.match = Code,
TADA.Target.ResultMeasure.MeasureUnitCode = Target.Unit,
Expand All @@ -482,13 +484,14 @@ TADA_ConvertResultUnits <- function(.data, ref = "tada", transform = TRUE) {
TADA.WQXUnitConversionFactor, TADA.WQXUnitConversionCoefficient
) %>%
dplyr::distinct()

# create unit ref
unit.ref <- TADA_UniqueCharUnitSpeciation(.data) %>%
dplyr::left_join(usgs.unit, by = dplyr::join_by(TADA.ResultMeasure.MeasureUnitCode)) %>%
dplyr::mutate(MeasureUnitCode.match = ifelse(is.na(MeasureUnitCode.match),
TADA.ResultMeasure.MeasureUnitCode,
MeasureUnitCode.match)) %>%
dplyr::mutate(MeasureUnitCode.match = ifelse(is.na(MeasureUnitCode.match),
TADA.ResultMeasure.MeasureUnitCode,
MeasureUnitCode.match
)) %>%
dplyr::left_join(wqx.ref, by = dplyr::join_by(MeasureUnitCode.match)) %>%
dplyr::select(-MeasureUnitCode.match) %>%
dplyr::distinct()
Expand All @@ -497,40 +500,39 @@ TADA_ConvertResultUnits <- function(.data, ref = "tada", transform = TRUE) {
dplyr::left_join(usgs.spec, by = dplyr::join_by(ResultMeasure.MeasureUnitCode)) %>%
dplyr::select(-TADA.MethodSpeciationName) %>%
dplyr::distinct()

}
}

# list of conversion columns

conversion.cols <- c(
"TADA.SpeciationUnitConversion", "TADA.WQXTargetUnit",
"TADA.WQXUnitConversionFactor", "TADA.WQXUnitConversionCoefficient",
"TADA.Target.MethodSpeciationName", "TADA.Target.ResultMeasure.MeasureUnitCode"
)

# ref join

ref.join <- c(
"TADA.CharacteristicName",
"ResultMeasure.MeasureUnitCode",
"ResultMeasure.MeasureUnitCode",
"TADA.ResultMeasure.MeasureUnitCode"
)


# join unit.ref to .data
check.data <- .data %>%
# remove existing conversion columns
dplyr::select(-tidyselect::any_of(c(conversion.cols))) %>%
# add new conversion columns
dplyr::left_join(unit.ref,
by =
c(
ref.join
),
relationship = "many-to-many"
)
by =
c(
ref.join
),
relationship = "many-to-many"
)

# add TADA.WQXResultUnitConversion column
flag.data <- check.data %>%
# create flag column
Expand All @@ -539,16 +541,16 @@ TADA_ConvertResultUnits <- function(.data, ref = "tada", transform = TRUE) {
is.na(TADA.ResultMeasureValue) ~ as.character("No Result Value"),
is.na(TADA.Target.ResultMeasure.MeasureUnitCode) ~ as.character("No Target Unit")
))

if (transform == FALSE) {
print("TADA_ConvertResultUnits: When Transform = FALSE, result values and units are NOT converted. Conversions are required for many other TADA functions to work properly (such as result value range checks).")
# reorder columns
clean.data <- TADA_OrderCols(flag.data) %>%
TADA_CreateComparableID()

return(clean.data)
}

if (transform == TRUE) {
# Transform result measure value to Target Unit only if target unit exists
clean.data <- flag.data %>%
Expand All @@ -558,43 +560,45 @@ TADA_ConvertResultUnits <- function(.data, ref = "tada", transform = TRUE) {
!is.na(TADA.Target.ResultMeasure.MeasureUnitCode) ~ ((TADA.ResultMeasureValue + TADA.WQXUnitConversionCoefficient) * TADA.WQXUnitConversionFactor),
is.na(TADA.Target.ResultMeasure.MeasureUnitCode) ~ TADA.ResultMeasureValue
))

rm(flag.data)

# populate TADA.ResultMeasure.MeasureUnitCode
clean.data <- clean.data %>%
# use target unit where there is a target unit, use original unit if no target unit
dplyr::mutate(TADA.ResultMeasure.MeasureUnitCode = dplyr::case_when(
!is.na(TADA.Target.ResultMeasure.MeasureUnitCode) ~ TADA.Target.ResultMeasure.MeasureUnitCode,
is.na(TADA.Target.ResultMeasure.MeasureUnitCode) ~ TADA.ResultMeasure.MeasureUnitCode
))

# Convert method speciation column for USGS data
check <- clean.data %>%
dplyr::filter(ResultMeasure.MeasureUnitCode %in% usgs.ref$ResultMeasure.MeasureUnitCode)

if (dim(check)[1] > 0) {
print(paste0("NOTE: Dataset contains ", dim(check)[1], " USGS results with speciation information in both the result unit and method speciation columns. This function overwrites the TADA method speciation column with the speciation provided in the result unit column."))
}

# add target method speciation name when needed
clean.data <- clean.data %>%
dplyr::mutate(TADA.MethodSpeciationName = ifelse(!is.na(TADA.Target.MethodSpeciationName), TADA.Target.MethodSpeciationName, toupper(TADA.MethodSpeciationName)),
# replace UNKNOWN or NONE method speciation name with NA
TADA.MethodSpeciationName = ifelse(TADA.MethodSpeciationName %in% c("UNKNOWN", "NONE"), NA, TADA.MethodSpeciationName))
dplyr::mutate(
TADA.MethodSpeciationName = ifelse(!is.na(TADA.Target.MethodSpeciationName), TADA.Target.MethodSpeciationName, toupper(TADA.MethodSpeciationName)),
# replace UNKNOWN or NONE method speciation name with NA
TADA.MethodSpeciationName = ifelse(TADA.MethodSpeciationName %in% c("UNKNOWN", "NONE"), NA, TADA.MethodSpeciationName)
)

# create detection unit ref
det.ref <- unit.ref %>%
dplyr::ungroup() %>%
dplyr::rename(DetectionQuantitationLimitMeasure.MeasureUnitCode = ResultMeasure.MeasureUnitCode) %>%
dplyr::select(-TADA.ResultMeasure.MeasureUnitCode) %>%
dplyr::distinct()

det.join <- c(
"TADA.CharacteristicName",
"DetectionQuantitationLimitMeasure.MeasureUnitCode"
)

# Transform TADA.DetectionQuantitationLimitMeasure.MeasureValue value to target value only if target unit exists
det.data <- clean.data %>%
dplyr::select(-tidyselect::any_of(conversion.cols)) %>%
Expand All @@ -604,24 +608,25 @@ TADA_ConvertResultUnits <- function(.data, ref = "tada", transform = TRUE) {
!is.na(TADA.Target.ResultMeasure.MeasureUnitCode) ~ ((TADA.DetectionQuantitationLimitMeasure.MeasureValue - TADA.WQXUnitConversionCoefficient) * TADA.WQXUnitConversionFactor),
is.na(TADA.Target.ResultMeasure.MeasureUnitCode) ~ TADA.DetectionQuantitationLimitMeasure.MeasureValue
))

rm(clean.data)

# populate TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode
convert.data <- det.data %>%
# use target unit where there is a target unit, use original unit if no target unit
dplyr::mutate(TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode = dplyr::case_when(
!is.na(TADA.Target.ResultMeasure.MeasureUnitCode) ~ TADA.Target.ResultMeasure.MeasureUnitCode,
is.na(TADA.Target.ResultMeasure.MeasureUnitCode) ~ TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode))

is.na(TADA.Target.ResultMeasure.MeasureUnitCode) ~ TADA.DetectionQuantitationLimitMeasure.MeasureUnitCode
))

# Remove unneccessary conversion columns
convert.data <- convert.data %>%
dplyr::select(-tidyselect::any_of(conversion.cols))


# Update ID and column ordering
convert.data <- TADA_CreateComparableID(convert.data)
convert.data <- TADA_OrderCols(convert.data)
convert.data <- TADA_OrderCols(convert.data)

return(convert.data)
}
Expand Down

0 comments on commit cd42f7a

Please sign in to comment.