Skip to content

Commit

Permalink
Merge pull request #30 from tcrombie/dev/WF
Browse files Browse the repository at this point in the history
package down
  • Loading branch information
tcrombie authored Oct 9, 2023
2 parents 2d55c73 + 498f055 commit 41af4a5
Show file tree
Hide file tree
Showing 31 changed files with 413 additions and 63 deletions.
4 changes: 4 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,7 @@
^\.Rproj\.user$
^doc$
^Meta$
^vignettes/articles$
^_pkgdown\.yml$
^docs$
^pkgdown$
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@
inst/doc
doc
Meta
docs
10 changes: 5 additions & 5 deletions R/checkModels.R
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ checkModels <- function(data, ..., modelName = "MDHD", OF = "filter", length_thr

# nest the data
nest <- of.data %>%
dplyr::mutate(well.id = paste(Metadata_Experiment, Metadata_Plate, Metadata_Well, sep = "_")) %>%
dplyr::group_by(...) %>%
tidyr::nest() %>%
tidyr::unite(col = group, -data)
Expand All @@ -77,7 +78,7 @@ checkModels <- function(data, ..., modelName = "MDHD", OF = "filter", length_thr
d <- nest %>%
dplyr::filter(group == i) %>%
tidyr::unnest(cols = data) %>%
dplyr::group_by(Metadata_Experiment, Metadata_Plate, Metadata_Well) %>%
dplyr::group_by(well.id) %>%
dplyr::mutate(mean_wormlength_um = mean(worm_length_um, na.rm = TRUE)) %>%
dplyr::group_by(strain) %>%
dplyr::mutate(mean_strain_length = mean(mean_wormlength_um, na.rm = TRUE)) %>%
Expand All @@ -93,20 +94,19 @@ checkModels <- function(data, ..., modelName = "MDHD", OF = "filter", length_thr

# get lowest wellN wells for those strains
wells <- d %>%
dplyr::distinct(Metadata_Experiment, Metadata_Plate, Metadata_Well, strain, mean_wormlength_um) %>%
dplyr::distinct(well.id, strain, mean_wormlength_um) %>%
dplyr::filter(strain %in% strains) %>%
dplyr::group_by(strain) %>%
dplyr::arrange(mean_wormlength_um) %>%
dplyr::mutate(slice_index = 1:dplyr::n()) %>%
dplyr::filter(slice_index <= wellN) %>%
dplyr::ungroup() %>%
dplyr::mutate(well_id = paste(Metadata_Experiment, Metadata_Plate, Metadata_Well, sep = "_")) %>%
dplyr::pull(well_id)
dplyr::pull(well.id)

# filter down d
f.d <- d %>%
dplyr::filter(strain %in% strains) %>%
dplyr::mutate(checkMV_well_id = paste(Metadata_Experiment, Metadata_Plate, Metadata_Well, sep = "_")) %>%
dplyr::mutate(checkMV_well_id = well.id) %>%
dplyr::filter(checkMV_well_id %in% wells) %>%
dplyr::arrange(strain)

Expand Down
56 changes: 47 additions & 9 deletions R/classifierOF.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,66 @@
#' @param model Specify one of the built in models. Currently only "gbm2x" is available. This classifier was
#' trained on over 1,000 worm objects selected from a large GWAS experiment and classifies objects as "worm" or "non-worm" with ~90% accuracy.
#' NOTE: The model was trained to classify poorly segmented worms as 'non-worm' so true worms are often classified as "non-worm".
#' @param thresh The probability threshold for flagging objects based on the classifier. By default the thresh is st to 0.6.
#' Only the objects the classifier predicts to be improperly segmented, with a probability greater than \code{thres}, will be flagged.
#' @return A single data frame identical to the input data with the \code{classifier_ObjectFlag} variable added.
#' The \code{classifier_ObjectFlag} variable is coded as \code{"classifier"} for objects that are called non-worm by the 2X classifier.
#' All other objects are coded as \code{NA_character}. The \code{gbm2x_worm_prob} variable provides the probability that the object is a properly segmented worm.
#' All other objects are coded as \code{NA_character}, or if there are NAs in any of the variables used to classify objects they are coded as \code{"classErr"}. The \code{gbm2x_worm_prob} variable provides the probability that the object is a properly segmented worm.
#' @export

classifierOF <- function(data, model = "gbm2x"){
classifierOF <- function(data, model = "gbm2x", thresh = 0.6){
if(model == "gbm2x") {
# get the classifier
gbm2x <- easyXpress::gbm2x

# look for missing values or columns in expected model data
# Specify column names to check
check <- names(gbm2x$trainingData)[-1]

# send error if one or more variables are missing
if(F %in% (check %in% names(data))) {
message("ERROR: The data does not contain the variables expected for the gbm2x classifier. Consider running the latest version of cellprofiler-nf. You can find the expected variables with <names(easyXpress::gbm2x$trainingData)>")
stop()
}

# Filter rows with NAs in any of the specified columns
nas <- data %>%
dplyr::filter(rowSums(is.na(dplyr::select(., dplyr::all_of(check)))) > 0) %>%
dplyr::mutate(gbm2x_worm_prob = NA_real_,
gbm2x_class = NA_character_,
classifier_ObjectFlag = "classErr")

if(nrow(nas) > 0) {
# filter to rows with no NAs in expected variables
f.data <- data %>%
dplyr::filter(rowSums(!is.na(dplyr::select(., dplyr::all_of(check)))) == length(check))

# message about it
message(glue::glue('WARNING: There are {nrow(nas)} rows with NAs in one or more object variables used for the classifier. These rows are flagged as "classErr" in the output.'))

} else {
# assign to data
f.data <- data
}

# use it
gbm2x_prob <- stats::predict(gbm2x, newdata = data, type = "prob")
gbm2x_class <- stats::predict(gbm2x, newdata = data)
gbm2x_prob <- stats::predict(gbm2x, newdata = f.data, type = "prob")
gbm2x_class <- stats::predict(gbm2x, newdata = f.data)

# add these predictions
classifier_df <- data %>%
classifier_df <- f.data %>%
dplyr::bind_cols(gbm2x_prob, gbm2x_class = gbm2x_class) %>%
dplyr::mutate(classifier_ObjectFlag = dplyr::case_when(gbm2x_class == "non-worm" ~ "classifier",
gbm2x_class == "worm" ~ NA_character_,
TRUE ~ "ERROR")) %>%
# dplyr::mutate(classifier_ObjectFlag = dplyr::case_when(gbm2x_class == "non-worm" ~ "classifier",
# gbm2x_class == "worm" ~ NA_character_,
# TRUE ~ "ERROR")) %>%
dplyr::mutate(classifier_ObjectFlag = dplyr::case_when(`non-worm` > thresh ~ "classifier",
`non-worm` <= thresh ~ NA_character_,
TRUE ~ "ERROR")) %>%
dplyr::select(-`non-worm`) %>%
dplyr::rename(gbm2x_worm_prob = worm)
dplyr::rename(gbm2x_worm_prob = worm) %>%
dplyr::bind_rows(nas) %>%
dplyr::arrange(Metadata_Experiment, Metadata_Plate, Metadata_Well, Parent_WormObjects)


# test for unclassified objects
if("ERROR" %in% classifier_df$classifier_ObjectFlag) {
Expand Down
47 changes: 40 additions & 7 deletions R/delta.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,22 +9,45 @@
#' @param WF Select \code{"filter"} or \code{"ignore"}. The default, \code{"filter"}, will filter out all flagged wells before calculating the delta from control.
#' \code{"ignore"} will calculate the delta including all flagged data. Be careful using \code{"ignore"}, it is only included for diagnostic purposes.
#' @param vars The well summary statistics to perform the delta calculation on. These are supplied in a character vector. For example, the default is set to \code{c("median_wormlength_um", "cv_wormlength_um")}.
#' @param doseR Logical, is this dose response data? The default, \code{doseR = FALSE},
#' expects control data to be recorded in the design file a particular way. Specifically, the drug and diluent variables should be identical for controls,
#' e.g, \code{drug = DMSO, diluent = DMSO, concentration_um = 0}. If \code{doseR = TRUE}, the controls are expected to be coded differently,
#' .e.g, \code{drug = ABZ, diluent = DMSO, concentration_um = 0}. Warning messages are produced if the controls do not fit expectations, but try to ensure the
#' controls are coded properly without relying this function to catch all edge cases.
#' @return A data frame identical to the input but with control delta variables added, i.e., \code{median_wormlength_um_delta} and \code{cv_wormlength_um_delta}.
#' The median values for the control conditions are also added as \code{control_median_wormlength_um} and \code{control_cv_wormlength_um}.
#' @export

delta <- function(data, ..., WF = "filter", vars = c("median_wormlength_um", "cv_wormlength_um")) {
delta <- function(data, ..., WF = "filter", vars = c("median_wormlength_um", "cv_wormlength_um"), doseR = F) {
# expecting message
if(doseR == T){
message("You set doseR = TRUE. Expecting controls to be coded as for a dose response.")
}
if(doseR == F){
message("You set doseR = FALSE. Not expecting controls to be coded for a dose reponse.")
}
# check on control coding
controls <- unique(data$diluent)
drugs <- unique(data$drug)

if(F %in% (controls %in% drugs)) {
# send a stop message
message(glue::glue("ERROR: the controls are not configured as expected. Control conditions should have the same value for drug and diluent and a 0 for concentration_um. Please correct the control condition coding before using easyXpress well flag functions.
# check on expected dcontrols
if(doseR == F & F %in% (controls %in% drugs)) {
# send a warning.
message(glue::glue("ERROR: the controls are not configured as expected in the design file doseR = FALSE. Do you want doseR = TRUE? If not, control conditions should have the same value for drug and diluent and a 0 for concentration_um. Please correct the control condition coding before using easyXpress well flag (WF) functions.
For example:"))
example <- tibble::tibble(drug = c("DMSO", "Water", "death juice", "seizure sauce"),
example <- tibble::tibble(drug = c("DMSO", "water", "death juice", "seizure sauce"),
concentration_um = c(0, 0, 10, 100),
diluent = c("DMSO", "Water", "DMSO", "Water"))
diluent = c("DMSO", "water", "DMSO", "water"))
stop(message(message(paste0(capture.output(knitr::kable(example)), collapse = "\n"))))
}
# warn about doseR=T
if(doseR == T & T %in% (controls %in% drugs)){
# send a warning.
message(glue::glue("ERROR: the controls are not configured as expected in the design file for doseR = TRUE. Do you want doseR = FALSE? If not, control conditions should have 0 for concentration_um and be named for the drug not the diluent. Please correct the control condition coding before using easyXpress well flag (WF) functions.
For example:"))
example <- tibble::tibble(drug = c("death juice", "death juice", "seizure sauce", "seizure sauce"),
concentration_um = c(0, 10, 0, 100),
diluent = c("DMSO", "DMSO", "water", "water"))
stop(message(message(paste0(capture.output(knitr::kable(example)), collapse = "\n"))))
}

Expand All @@ -42,14 +65,24 @@ delta <- function(data, ..., WF = "filter", vars = c("median_wormlength_um", "cv
message(glue::glue("The data are grouped by, {group_by}."))

# setup means
if(doseR == F) {
# get control values for each group
control_values <- d %>%
dplyr::filter(drug %in% controls) %>% # filter to control wells
dplyr::group_by(...) %>%
dplyr::mutate(dplyr::across(dplyr::all_of(vars), ~mean(.), .names = "control_{.col}")) %>% # get control values
dplyr::distinct(dplyr::across(dplyr::matches("_delta|control_"))) %>%
dplyr::ungroup()

}
if(doseR == T) {
# get control values for each group
control_values <- d %>%
dplyr::filter(concentration_um == 0) %>% # filter to control wells
dplyr::group_by(...) %>%
dplyr::mutate(dplyr::across(dplyr::all_of(vars), ~mean(.), .names = "control_{.col}")) %>% # get control values
dplyr::distinct(dplyr::across(dplyr::matches("_delta|control_"))) %>%
dplyr::ungroup()
}
# calculate the delta
suppressMessages(delta <- d %>%
dplyr::left_join(., control_values) %>%
Expand Down
2 changes: 1 addition & 1 deletion R/modelSelection.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ modelSelection <- function(df) {
dplyr::group_by(Metadata_Experiment, Metadata_Plate, Metadata_Well, Parent_WormObjects) %>%
dplyr::distinct(model, .keep_all = T) %>%
dplyr::ungroup() %>%
dplyr::select(Metadata_Experiment, Metadata_Plate, Metadata_Well,
dplyr::select(well.id, Metadata_Experiment, Metadata_Plate, Metadata_Well,
Parent_WormObjects, model, num_worms) %>%
tidyr::spread(model, num_worms) %>%
dplyr::mutate_at(dplyr::vars(tidyselect::one_of(model_names)),
Expand Down
3 changes: 2 additions & 1 deletion R/outlierOF.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#' outlierOF
#'
#' This function will flag outlier objects based the Worm_Legnth variable.
#' This function will flag outlier objects based the worm_length_um variable.
#'
#' @param data A data frame output from the \code{modelSelection} function.
#' @param iqr Logical, if \code{TRUE}, objects in a well are called outliers if their worm_length_um is outside the range \code{median(worm_length_um) +/- (thresh * IQR)}.
#' If \code{FALSE}, objects in a well are called outliers if their worm_length_um is outside the range \code{mean(worm_length_um) +/- (thresh * SD)}.
Expand Down
Loading

0 comments on commit 41af4a5

Please sign in to comment.