From 3eac2d47c94edf81de4afb1dfc7e3c4716287ecc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tymoteusz=20Kwieci=C5=84ski?= <31191783+Fersoil@users.noreply.github.com> Date: Mon, 8 Jul 2024 20:16:20 +0200 Subject: [PATCH 1/3] bug: standard curve sample type (#13) * simple README with overview and installation (#7) * website for docs (#8) * add sample vignette and artificial dataset * pkgdown workflow --------- Co-authored-by: Fersoil * added STANDARD CURVE type, added option to reverse the xaxis of MFI plot, plots in ggplot2, log scales * docs * plate name detection based on filepath * changed the default log scale into `all` --------- Co-authored-by: Fersoil --- .github/workflows/pkgdown.yaml | 50 +++++++++++ .gitignore | 4 + DESCRIPTION | 3 +- R/classes.R | 52 ++++++++--- R/read_data.R | 5 ++ R/standard_curves.R | 130 +++++++++++++++++++++++----- README.md | 25 +++++- inst/extdata/artificial_plate.csv | 62 +++++++++++++ man/SampleType.Rd | 5 +- man/plot_standard_curve_antibody.Rd | 13 ++- vignettes/example_script.Rmd | 77 ++++++++++++++++ 11 files changed, 390 insertions(+), 36 deletions(-) create mode 100644 .github/workflows/pkgdown.yaml create mode 100644 .gitignore create mode 100644 inst/extdata/artificial_plate.csv create mode 100644 vignettes/example_script.Rmd diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml new file mode 100644 index 00000000..05540872 --- /dev/null +++ b/.github/workflows/pkgdown.yaml @@ -0,0 +1,50 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main] + pull_request: + branches: [main] + release: + types: [published] + workflow_dispatch: + +name: pkgdown + +permissions: read-all + +jobs: + pkgdown: + runs-on: ubuntu-latest + # Only restrict concurrency for non-PR jobs + concurrency: + group: pkgdown-${{ github.event_name != 'pull_request' || github.run_id }} + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + permissions: + contents: write + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::pkgdown, local::. + needs: website + + - name: Build site + run: pkgdown::build_site_github_pages(new_process = FALSE, install = FALSE) + shell: Rscript {0} + + - name: Deploy to GitHub pages 🚀 + if: github.event_name != 'pull_request' + uses: JamesIves/github-pages-deploy-action@v4.5.0 + with: + clean: false + branch: gh-pages + folder: docs \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..5b6a0652 --- /dev/null +++ b/.gitignore @@ -0,0 +1,4 @@ +.Rproj.user +.Rhistory +.RData +.Ruserdata diff --git a/DESCRIPTION b/DESCRIPTION index 931197dd..bf89ee92 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,7 +10,8 @@ Imports: R6, tools, readxl, - RColorBrewer + RColorBrewer, + ggplot2 Suggests: testthat (>= 3.0.0) Config/testthat/edition: 3 diff --git a/R/classes.R b/R/classes.R index f4e86d58..f34dc046 100644 --- a/R/classes.R +++ b/R/classes.R @@ -276,8 +276,9 @@ SampleLocation$parse_sample_location = function(location_string) { #' A type of the sample. #' The possible sample types are: #' 1. (`"BLANK"`)\cr - background sample used for testing -#' 3. (`"POSITIVE CONTROL"`)\cr - -#' 4. (`"NEGATIVE CONTROL"`)\cr - +#' 2. (`"STANDARD CURVE"`)\cr - a sample used to create a standard curve +#' 3. (`"POSITIVE CONTROL"`)\cr - a positive control sample used for testing +#' 4. (`"NEGATIVE CONTROL"`)\cr - a negative control sample used for testing #' 5. (`"TEST"`)\cr - the actual sample used for testing #' #' @field dilution_factor A numeric value that represents the dilution factor of the sample. Used only in the case of control samples @@ -374,6 +375,7 @@ SampleType$valid_sample_types <- c("BLANK", "TEST", "NEGATIVE CONTROL", + "STANDARD CURVE", "POSITIVE CONTROL") #' @@ -393,9 +395,12 @@ SampleType$validate_dilution_factor = function(sample_type, dilution_factor) { if (sample_type == "POSITIVE CONTROL" && is.na(dilution_factor)) { stop("Positive control samples must have a dilution factor") } - if (sample_type != "POSITIVE CONTROL" && + if (sample_type == "STANDARD CURVE" && is.na(dilution_factor)) { + stop("Standard curve samples must have a dilution factor") + } + if (sample_type %in% c("POSITIVE CONTROL", "STANDARD CURVE") && !is.na(dilution_factor)) { - stop("Only positive control samples should have a dilution factor") + stop("Only positive control or standard curve samples should have a dilution factor") } } @@ -405,6 +410,7 @@ SampleType$validate_dilution_factor = function(sample_type, dilution_factor) { #' #' It parses the names as follows: #' If `sample_name` or `sample_name_loc` equals to `BLANK`, `B` or starts with letter `B`, then SampleType equals to `BLANK` +#' If `sample_name` or `sample_name_loc` equals to `STANDARD CURVE`, `SC`, `S` or equals to `1/\d+`, `S_1/d+`, `S 1\d+` or `S1\d+` then SampleType equals to `STANDARD CURVE` #' If `sample_name` or `sample_name_loc` equals to `NEGATIVE CONTROL`, `N`, or starts with `N` or `NEG`, then SampleType equals to `NEGATIVE CONTROL` #' If `sample_name` or `sample_name_loc` contains substring `1/\d+` SampleType equals to `POSITIVE CONTROL` #' otherwise, the returned SampleType is `TEST` @@ -436,6 +442,28 @@ SampleType$parse_sample_type = function(sample_name, return(SampleType$new("BLANK")) } + standard_curve_types <- c("STANDARD CURVE", "SC", "S") # CP3 - tanzania, PRISM - uganda, Brefet - gambia, NIBSC 10/198 + standard_curve_pattern <- "^(S_|S|S\\s|)(1/\\d+)$" + standard_curve_loc_pattern <- "(1/\\d+)" + if (sample_name %in% standard_curve_types || + grepl(standard_curve_pattern, sample_name) || + grepl(standard_curve_loc_pattern, sample_name_loc)) { + dilution_factor_pattern <- "1/\\d+" + match <- "" + if (!is.null(sample_name_loc) && sample_name_loc != "" || !is.na(sample_name_loc) && sample_name_loc != "") { + match <- regmatches(sample_name_loc, regexpr(dilution_factor_pattern, sample_name_loc)) + } else { + match <- regmatches(sample_name, regexpr(dilution_factor_pattern, sample_name)) + } + dilution_factor <- eval(parse(text = match)) + + if (is.null(dilution_factor)) { + dilution_factor = NA # this value needs to be updated later + } + + return(SampleType$new("STANDARD CURVE", dilution_factor = dilution_factor, validate_dilution = FALSE)) + } + negative_types <- c("NEGATIVE CONTROL", "N") negative_pattern <- "^(N..|.*\\bNEG\\b)" # check if it starts with N or contains NEG string @@ -446,12 +474,11 @@ SampleType$parse_sample_type = function(sample_name, return(SampleType$new("NEGATIVE CONTROL")) } - #standard_curve_types <- c("STANDARD CURVE", "SC", "S", "CP3") # CP3 - tanzania, PRISM - uganda, Brefet - gambia, NIBSC 10/198 - #standard_curve_pattern <- "^(S_|S|S\\s|CP.+)(1/\\d+)$" + + positive_control_pattern <- c("^(P.+|POS.+|CP.+)(1/\\d+)$") - positive_control_loc_pattern <- c("(1/\\d+)") if (grepl(positive_control_pattern, sample_name) || - grepl(positive_control_loc_pattern, sample_name_loc)) { + grepl(positive_control_pattern, sample_name_loc)) { dilution_factor_pattern <- "1/\\d+" match <- "" if (!is.null(sample_name_loc) && sample_name_loc != "" || !is.na(sample_name_loc) && sample_name_loc != "") { @@ -639,6 +666,9 @@ Plate <- R6Class( #' @field audit_logs a list containing audit logs read from Luminex file audit_logs = list(), + #' @field plate_name - plate name obtained from filename + plate_name = "", + #' @description #' creates a new `Plate` object @@ -661,13 +691,15 @@ Plate <- R6Class( samples = list(), batch_info = list(), calibration_info = list(), - audit_logs = list()) { + audit_logs = list(), + plate_name = "") { # check for valid input self$analytes <- analytes self$samples <- samples self$batch_info <- batch_info self$calibration_info <- calibration_info self$audit_logs <- audit_logs + self$plate_name <- plate_name }, @@ -712,7 +744,7 @@ Plate <- R6Class( cat( "Summary of the plate generated on ", as.character(self$examination_date), - "\nwith batch name '", plate$batch_name, "':\n", + "\nwith name '", plate$plate_name, "':\n", "Total number of samples: ", self$number_of_samples, "\n", diff --git a/R/read_data.R b/R/read_data.R index 4b64be5f..2d1f3dcd 100644 --- a/R/read_data.R +++ b/R/read_data.R @@ -98,6 +98,11 @@ read_data <- function(file_path, ) } } + # add name from filepath + filename_without_extension <- sub("\\.[^.]*$", "", basename(file_path)) + + results_plate$plate_name <- filename_without_extension + return(results_plate) } diff --git a/R/standard_curves.R b/R/standard_curves.R index 2333199a..4e88efc0 100644 --- a/R/standard_curves.R +++ b/R/standard_curves.R @@ -1,3 +1,5 @@ +library(ggplot2) + #' PLot standard curves of plate or list of plates #' #' @@ -7,10 +9,11 @@ #' @param data_type Data type of the value we want to plot - the same datatype as in the plate file. By default equals to `Net MFI` #' @param file_path where to save the output plot. If `NULL` the plot is displayed, `NULL` by default #' @param decreasing_dilution_order If `TRUE` the dilutions are plotted in decreasing order, `TRUE` by default +#' @param log_scale Which elements on the plot should be displayed in log scale. By default c("dilutions"). If `NULL` no log scale is used, if "all" or c("dilutions", "MFI") all elements are displayed in log scale. #' @param verbose If `TRUE` print messages, `TRUE` by default #' #' @export -plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Median", file_path = NULL, decreasing_dilution_order = TRUE, verbose = TRUE) { +plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Median", file_path = NULL, decreasing_dilution_order = TRUE, log_scale = c("all"), verbose = TRUE) { if (inherits(plates, "Plate")) { # an instance of Plate plates <- list(plates) @@ -24,6 +27,12 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi } } + # check if log_scale is a character vector and contains element from set + available_log_scale_values <- c("all", "dilutions", "MFI") + if (!is.null(log_scale) && !all(log_scale %in% available_log_scale_values)){ + stop("log_scale should be a character vector containing elements from set: ", paste(available_log_scale_values, collapse = ", ")) + } + dilutions_numeric_base <- NULL standard_curve_num_samples <- NULL @@ -42,11 +51,23 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi ) } - standard_curves <- plate$get_sample_by_type("POSITIVE CONTROL") + standard_curves <- plate$get_sample_by_type("STANDARD CURVE") + if (length(standard_curves) == 0){ + verbose_cat( + "(", + color_codes$red_start, + "WARNING", + color_codes$red_end, + ")", + "\nNo standard curve samples found in the plate\nUsing positive control samples", + verbose = verbose + ) + standard_curves <- plate$get_sample_by_type("POSITIVE CONTROL") + } if (is.null(standard_curve_num_samples)) { standard_curve_num_samples = length(standard_curves) } else if (standard_curve_num_samples != length(standard_curves)) { - stop("Inconsistent number of positive control samples accross plates") + stop("Inconsistent number of positive control or standard curve samples accross plates") } if (!antibody_name %in% plate$analyte_names){ @@ -57,7 +78,7 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi dilutions <- sapply(standard_curves, function(sample) sample$sample_type$character_dilution_factor) dilutions_numeric <- sapply(standard_curves, function(sample) sample$sample_type$dilution_factor) # sort values according to dilutions - sorted_order <- order(dilutions_numeric, decreasing = decreasing_dilution_order) + sorted_order <- order(dilutions_numeric) # Sort the vectors according to the sorted order of the reference vector dilutions_numeric <- dilutions_numeric[sorted_order] @@ -81,12 +102,6 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi } - if (!is.null(file_path)){ - if (grepl("\\.pdf$", file_path, ignore.case = TRUE)) - pdf(file = file_path) - else - png(filename = file_path) - } plot_name <- paste0("Standard curve for analyte: ", antibody_name) @@ -98,22 +113,97 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi par(mfrow=c(1,1)) - plot(log(dilutions_numeric), log(standard_curve_values_list[[1]]), type = "o", lwd=2, main=plot_name, xlab="dilutions", ylab = paste0("log(", data_type, ")"), col=colors[[1]],axes=F,bty='L', pch=19, - ylim = c(min(log(unlist(standard_curve_values_list))), max(log(unlist(standard_curve_values_list))))) - if (length(plates) > 1) { - for (i in 2:length(plates)) { - lines(log(dilutions_numeric), log(standard_curve_values_list[[i]]), type = "o", lwd = 2, col = colors[[i]]) + log_if_needed_mfi <- function(x) { + if ("MFI" %in% log_scale || "all" %in% log_scale) { + return(log(x)) } + return(x) + } + + log_if_needed_dilutions <- function(x) { + if ("dilutions" %in% log_scale || "all" %in% log_scale) { + return(log(x)) + } + return(x) + } + + # Determine if x and y axes need to be log-scaled + x_log_scale <- "dilutions" %in% log_scale || "all" %in% log_scale + y_log_scale <- "MFI" %in% log_scale || "all" %in% log_scale + + plot_data <- data.frame() + + + for (i in 1:length(plates)) { + temp_data <- data.frame( + dilutions = log_if_needed_dilutions(dilutions_numeric), + mfi = log_if_needed_mfi(standard_curve_values_list[[i]]), + plate = plates[[i]]$plate_name, + colors = colors[[i]] + ) + plot_data <- rbind(plot_data, temp_data) + } + + # Generate x and y labels + xlab <- ifelse(x_log_scale, "log(dilutions)", "dilutions") + ylab <- ifelse(y_log_scale, paste0("log(", data_type, ")"), data_type) + + x_ticks <- c(log_if_needed_dilutions(dilutions_numeric), max(log_if_needed_dilutions(dilutions_numeric)) + 1) + x_labels <- c(dilutions, "") + + legend_position <- c(0.8, 0.2) # Automatically position the legend + if (decreasing_dilution_order) { + if (x_log_scale && !y_log_scale) + legend_position <- c(0.8, 0.8) + else + legend_position <- c(0.2, 0.2) + } else { + if (x_log_scale && !y_log_scale) + legend_position <- c(0.2, 0.8) + else + legend_position <- c(0.8, 0.2) } - axis(1,at=c(log(dilutions_numeric),max(log(dilutions_numeric))+1),labels=c(dilutions,""),cex.axis=0.9) - axis(2,cex.axis=0.9) - legend("topleft", legend = paste("Plate", 1:length(plates)), col = colors, lty = 1, lwd = 2) + p <- ggplot(plot_data, aes(x = dilutions, y = mfi, color = plate)) + + geom_line(size = 1.2) + + geom_point(size = 3) + + scale_color_manual(values = colors) + + labs(title = plot_name, x = xlab, y = ylab) + + scale_x_continuous(breaks = x_ticks, labels = x_labels, trans = if (decreasing_dilution_order) "reverse" else "identity") + + scale_y_continuous() + + theme_minimal() + + theme(axis.line = element_line(colour = "black"), + axis.text.x = element_text(size = 9), + axis.text.y = element_text(size = 9), + legend.position = legend_position, # Automatically position the legend + legend.background = element_rect(fill = "white", color = "black")) - if (!is.null(file_path)) - dev.off() + if (!is.null(file_path)){ + ggsave(file_path, plot = p, width = 10, height = 7, units = "in", dpi = 300) + } else { + print(p) + } + + + +} + + +verbose_cat <- function(..., verbose = TRUE) { + if (verbose) { + cat(..., sep = "") + } } +color_codes <- + list( + yellow_start = "\033[33m", + yellow_end = "\033[39m", + red_start = "\033[31m", + red_end = "\033[39m", + green_start = "\033[32m", + green_end = "\033[39m" + ) diff --git a/README.md b/README.md index 20ed8f35..d8652d53 100644 --- a/README.md +++ b/README.md @@ -1 +1,24 @@ -# PvSTATEM \ No newline at end of file +# PvSTATEM - an R package for automated analysis of serological data +*pre-release* version +## Overview +This package is a simple tool, which handles the raw data of various formats, produced in Multiplex Bead Assay (MBA). In short, it reads the unstructured, raw data from e.g. Luminex device and output normalized and well-structured data which can be later used in more advanced, downstream analysis. + +The package is in pre-release version, thus it lacks most of the functionalities. It is planned to be released by the end of September 2024. + +The package includes 3 main steps of preprocessing the data: +1. data reading and manipulation +2. quality control +3. data normalization + + +`PvSTATEM` package is developed within the project of the same name - [PvSTATEM](https://www.pvstatem.eu/), which is an international project which aims into malaria elimination. + +## Installation +For now, the only way to install the (unreleased) package is to build it by hand. The easiest way to do that is using a simple command `install_github` available in `devtools` library. + +```R +library(devtools) +install_github("mini-pw/PvSTATEM") +``` + +First command loads the `devtools` library (you might need to install it first - using command `install_packages("devtools")`) and the second one sources the git repository with code of our package and automatically installs it. diff --git a/inst/extdata/artificial_plate.csv b/inst/extdata/artificial_plate.csv new file mode 100644 index 00000000..012fcc06 --- /dev/null +++ b/inst/extdata/artificial_plate.csv @@ -0,0 +1,62 @@ +Program,xPONENT,,MAGPIX,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Build,3.1.871.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Date,01/07/2024,00:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +SN,PvSTATEM1234,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Batch,test_plate,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Version,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Operator,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +ComputerName,test,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Country Code,616,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +ProtocolName,test,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +ProtocolVersion,1,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Most Recent Calibration and Verification Results:,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Last VER Calibration,Passed 01/07/2024 00:00:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Last CAL Calibration,Passed 01/07/2024 00:00:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Last Fluidics Test,Passed 01/07/2024 00:00:00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +CALInfo:,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Calibrator,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Lot,ExpirationDate,CalibrationTime,DDTemp,CL1Temp,CL2Temp,DDVolts,CL1Volts,CL2Volts,DDRVal,CL1RVal,CL2RVal,Result,MachineSerialNo,,,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Samples,8,Min Events,50,Per Bead,,,,,,,,,,,,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Results,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +DataType:,Median,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Location,Sample,Etramp5_ag1,Etramp4_ag1,AMA1 +1(1,A1),BLANK,20, 21, 23 +2(1,B1),test1,1292,1389,1414 +3(1,C1),test2,1430,1435, 867 +4(1,D1),test3,657,2031,1300 +5(1,E1),CP3 1/50,1486,1437,1893 +6(1,F1),CP3 1/250,130,87,90 +7(1,G1),CP3 1/1250,78,67,64 +8(1,H1),CP3 1/6250, 65,60,64 +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +DataType:,Count,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Location,Sample,Etramp5_ag1,Etramp4_ag1,AMA1 +1(1,A1),BLANK,100,121,143 +2(1,B1),test1,102,104,105 +3(1,C1),test2,130,135, 67 +4(1,D1),test3,57,231,100 +5(1,E1),CP3 1/50,86,37,193 +6(1,F1),CP3 1/250,130,87,90 +7(1,G1),CP3 1/1250,53,47,65 +8(1,H1),CP3 1/6250, 85,50,54 +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +DataType:,Per Bead Count,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Alyte:,Etramp5_ag1,Etramp4_ag1,AMA1 +BeadID:,73,75,34 +Per Bead:,50,50,50, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +DataType:,Audit Logs,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +UserId,Date,Message,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +DataType:,Warnings/Errors,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +Location,Status,Message,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +-- CRC --,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, +CRC32: 85C2BFCD,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, diff --git a/man/SampleType.Rd b/man/SampleType.Rd index 69df4e8a..74a194bf 100644 --- a/man/SampleType.Rd +++ b/man/SampleType.Rd @@ -13,8 +13,9 @@ SampleType defines the type of the sample and the dilution factor The possible sample types are: \enumerate{ \item (\code{"BLANK"})\cr - background sample used for testing -\item (\code{"POSITIVE CONTROL"})\cr - -\item (\code{"NEGATIVE CONTROL"})\cr - +\item (\code{"STANDARD CURVE"})\cr - a sample used to create a standard curve +\item (\code{"POSITIVE CONTROL"})\cr - a positive control sample used for testing +\item (\code{"NEGATIVE CONTROL"})\cr - a negative control sample used for testing \item (\code{"TEST"})\cr - the actual sample used for testing }} diff --git a/man/plot_standard_curve_antibody.Rd b/man/plot_standard_curve_antibody.Rd index 59d87233..f6f0f727 100644 --- a/man/plot_standard_curve_antibody.Rd +++ b/man/plot_standard_curve_antibody.Rd @@ -7,8 +7,11 @@ plot_standard_curve_antibody( plates, antibody_name, - data_type = "Net MFI", - file_path = NULL + data_type = "Median", + file_path = NULL, + decreasing_dilution_order = TRUE, + log_scale = c("dilutions"), + verbose = TRUE ) } \arguments{ @@ -19,6 +22,12 @@ plot_standard_curve_antibody( \item{data_type}{Data type of the value we want to plot - the same datatype as in the plate file. By default equals to \verb{Net MFI}} \item{file_path}{where to save the output plot. If \code{NULL} the plot is displayed, \code{NULL} by default} + +\item{decreasing_dilution_order}{If \code{TRUE} the dilutions are plotted in decreasing order, \code{TRUE} by default} + +\item{log_scale}{Which elements on the plot should be displayed in log scale. By default c("dilutions"). If \code{NULL} no log scale is used, if "all" or c("dilutions", "MFI") all elements are displayed in log scale.} + +\item{verbose}{If \code{TRUE} print messages, \code{TRUE} by default} } \description{ PLot standard curves of plate or list of plates diff --git a/vignettes/example_script.Rmd b/vignettes/example_script.Rmd new file mode 100644 index 00000000..1ac5db3d --- /dev/null +++ b/vignettes/example_script.Rmd @@ -0,0 +1,77 @@ +--- +title: "Simple example of basic PvSTATEM package pre-release version functionalities" +author: "Tymoteusz KwieciƄski" +date: "`r Sys.Date()`" +output: rmarkdown::html_document +vignette: > + %\VignetteIndexEntry{Simple example of basic PvSTATEM package pre-release version functionalities} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +```{r setup, include=FALSE} +knitr::opts_chunk$set( + collapse = FALSE, + comment = "#>", + warning = FALSE, + message = FALSE +) +``` + +# Introduction +The basic functionality of the `PvSTATEM` package is reading raw MBA data. To present the functionalities of the package, we created a simple, artificial `csv` file, which is pre-loaded into the package. + +```{r} +library(PvSTATEM) + +plate_filepath <- system.file("extdata", "artificial_plate.csv", package = "PvSTATEM", mustWork = TRUE) # get the filepath of the csv dataset + + +plate <- read_data(plate_filepath) # read the data + +``` + +After the plate is successfully loaded, we can look into some basic information about the plate. + +```{r} + + +plate$summary() + +plate$get_sample(1) +plate$get_sample_by_type("POSITIVE CONTROL")[[1]] + +plate$sample_names +plate$analyte_names +``` + + + +## Warnings +Our scripts are designed to catch potential errors in the data. If there are any warnings, they will be stored in the `warnings` field of the plate object. The warnings can be accessed by the `$warnings` field of the plate object or by the `$warnings` field of the sample object. + +```{r} +```{r} +## warnings ---------------------------------------------------------------- + +plate$warnings + +plate$get_sample(1)$warnings + +plate$get(analyte = "AMA1", sample = 1) + +plate$get_sample_by_type("POSITIVE CONTROL")[[1]]$warnings +``` + + + +## simple standard curve plotting +```{r} +plot_standard_curve_antibody(plate, antibody_name = "AMA1") + +plate$blank_adjustment() + +plot_standard_curve_antibody(plate, antibody_name = "AMA1") + + +``` From b03c489d080db633deee9e8151d873d7734f7118 Mon Sep 17 00:00:00 2001 From: Jakub Grzywaczewski <44850303+ZetrextJG@users.noreply.github.com> Date: Mon, 8 Jul 2024 21:09:07 +0200 Subject: [PATCH 2/3] Add tests and setup GHA (#18) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * simple README with overview and installation (#7) * website for docs (#8) * add sample vignette and artificial dataset * pkgdown workflow --------- Co-authored-by: Fersoil * Add .gitignore * Add sample files and adequate tests * Setup package workflow * Style code (GHA) * Export remaining objects * Move dots down * Add missing usage description * Disable lint error * Fix missing import ggplot2 --------- Co-authored-by: Tymoteusz KwieciƄski <31191783+Fersoil@users.noreply.github.com> Co-authored-by: ZetrextJG --- .Rbuildignore | 2 + .github/.gitignore | 1 + .github/workflows/R-CMD-check.yaml | 52 +++ .github/workflows/lint.yaml | 34 ++ .github/workflows/pkgdown.yaml | 2 +- .github/workflows/style.yaml | 78 ++++ .github/workflows/test-coverage.yaml | 53 +++ .gitignore | 1 + .pre-commit-config.yaml | 19 + DESCRIPTION | 12 +- LICENSE => LICENSE.md | 1 + NAMESPACE | 5 + R/classes.R | 390 ++++++++++---------- R/read_data.R | 142 +++---- R/standard_curves.R | 66 ++-- README.md | 5 + inst/extdata/random.csv | 62 ++++ inst/extdata/random2.csv | 62 ++++ inst/extdata/random_broken_colB.csv | 62 ++++ inst/extdata/random_layout.xlsx | Bin 0 -> 6129 bytes man/Analyte.Rd | 2 +- man/Plate.Rd | 9 +- man/Sample.Rd | 3 +- man/SampleLocation.Rd | 1 + man/plot_standard_curve_antibody.Rd | 2 +- man/read_data.Rd | 9 +- tests/testthat/test-initial-preprocessing.R | 46 ++- tests/testthat/test-read-data.R | 25 +- vignettes/.gitignore | 2 + vignettes/example_script.Rmd | 9 +- 30 files changed, 810 insertions(+), 347 deletions(-) create mode 100644 .github/.gitignore create mode 100644 .github/workflows/R-CMD-check.yaml create mode 100644 .github/workflows/lint.yaml create mode 100644 .github/workflows/style.yaml create mode 100644 .github/workflows/test-coverage.yaml create mode 100644 .pre-commit-config.yaml rename LICENSE => LICENSE.md (99%) create mode 100644 inst/extdata/random.csv create mode 100644 inst/extdata/random2.csv create mode 100644 inst/extdata/random_broken_colB.csv create mode 100644 inst/extdata/random_layout.xlsx create mode 100644 vignettes/.gitignore diff --git a/.Rbuildignore b/.Rbuildignore index 91114bf2..31e37542 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,2 +1,4 @@ ^.*\.Rproj$ ^\.Rproj\.user$ +^LICENSE\.md$ +^\.github$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 00000000..2d19fc76 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 00000000..0f2fe080 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,52 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +permissions: read-all + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 00000000..f5d92097 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,34 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, dev] + pull_request: + branches: [main, dev] + +name: lint + +permissions: read-all + +jobs: + lint: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::lintr, local::. + needs: lint + + - name: Lint + run: lintr::lint_package() + shell: Rscript {0} + env: + LINTR_ERROR_ON_LINT: false diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 05540872..19c96e79 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -47,4 +47,4 @@ jobs: with: clean: false branch: gh-pages - folder: docs \ No newline at end of file + folder: docs diff --git a/.github/workflows/style.yaml b/.github/workflows/style.yaml new file mode 100644 index 00000000..be9b34aa --- /dev/null +++ b/.github/workflows/style.yaml @@ -0,0 +1,78 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + paths: ["**.[rR]", "**.[qrR]md", "**.[rR]markdown", "**.[rR]nw", "**.[rR]profile"] + branches: [main, dev] + +name: Style + +permissions: read-all + +jobs: + style: + runs-on: ubuntu-latest + permissions: + contents: write + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup R + uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - name: Install dependencies + uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::styler, any::roxygen2 + needs: styler + + - name: Enable styler cache + run: styler::cache_activate() + shell: Rscript {0} + + - name: Determine cache location + id: styler-location + run: | + cat( + "location=", + styler::cache_info(format = "tabular")$location, + "\n", + file = Sys.getenv("GITHUB_OUTPUT"), + append = TRUE, + sep = "" + ) + shell: Rscript {0} + + - name: Cache styler + uses: actions/cache@v4 + with: + path: ${{ steps.styler-location.outputs.location }} + key: ${{ runner.os }}-styler-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-styler- + ${{ runner.os }}- + + - name: Style + run: styler::style_pkg() + shell: Rscript {0} + + - name: Commit and push changes + run: | + if FILES_TO_COMMIT=($(git diff-index --name-only ${{ github.sha }} \ + | egrep --ignore-case '\.(R|[qR]md|Rmarkdown|Rnw|Rprofile)$')) + then + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + git commit ${FILES_TO_COMMIT[*]} -m "Style code (GHA)" + git pull --ff-only + git push origin + else + echo "No changes to commit." + fi diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 00000000..c0f85efd --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,53 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: test-coverage + +permissions: read-all + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr, any::xml2 + needs: coverage + + - name: Test coverage + run: | + cov <- covr::package_coverage( + quiet = FALSE, + clean = FALSE, + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") + ) + covr::to_cobertura(cov) + shell: Rscript {0} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/.gitignore b/.gitignore index 5b6a0652..c833a2c6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ .Rhistory .RData .Ruserdata +inst/doc diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..49d2591f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,19 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + +- repo: local + hooks: + - id: styler + name: Style R code with styler + entry: Rscript -e "errors <- styler::style_pkg(); print(errors); quit(save = 'no', status = sum(errors[[2]]))" + language: system + files: '\.Rproj$' + always_run: true diff --git a/DESCRIPTION b/DESCRIPTION index bf89ee92..8ed390cd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,18 +1,24 @@ Package: PvSTATEM Type: Package Title: Reading, quality control and preprocessin of MBA assay data +Description: Reading, quality control and preprocessin of MBA assay data Version: 0.0.1 Authors@R: person("Tymoteusz", "Kwiecinski", email = "tymoteuszkwiecinski@gmail.com", role = c("aut", "cre")) LazyData: true -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Imports: dplyr, R6, tools, + qpdf, readxl, RColorBrewer, + knitr, + rmarkdown, ggplot2 -Suggests: +Suggests: testthat (>= 3.0.0) -Config/testthat/edition: 3 +Config/testfhat/edition: 3 Roxygen: list(markdown = TRUE, r6 = TRUE) +License: BSD 3 +VignetteBuilder: knitr diff --git a/LICENSE b/LICENSE.md similarity index 99% rename from LICENSE rename to LICENSE.md index 599355e9..8696e19f 100644 --- a/LICENSE +++ b/LICENSE.md @@ -26,3 +26,4 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/NAMESPACE b/NAMESPACE index 1b1abc7b..68d38f58 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,10 @@ # Generated by roxygen2: do not edit by hand +export(Analyte) export(Plate) +export(Sample) +export(SampleLocation) +export(SampleType) export(plot_standard_curve_antibody) export(read_data) +import(ggplot2) diff --git a/R/classes.R b/R/classes.R index f34dc046..8440d7bd 100644 --- a/R/classes.R +++ b/R/classes.R @@ -1,7 +1,7 @@ library(R6) -verify_numeric_join = function(x, y) { +verify_numeric_join <- function(x, y) { # check if two numeric values are equal if (is.na(x) || is.na(y)) { return(TRUE) @@ -9,7 +9,7 @@ verify_numeric_join = function(x, y) { return(x == y) } -verify_character_join = function(x, y) { +verify_character_join <- function(x, y) { # check if two character values are equal if (is.null(x) || is.null(y)) { return(TRUE) @@ -17,7 +17,7 @@ verify_character_join = function(x, y) { return(x == y) } -get_join_value = function(x, y) { +get_join_value <- function(x, y) { if (is.na(x) || is.null(x)) { return(y) } @@ -48,9 +48,10 @@ get_join_value = function(x, y) { #' @field units Units of the analyte in which the results are expressed #' #' @examples -#' etramp <- Analyte$new(id = 73, analyte_name = "Etramp5_ag1", bead_count=50) +#' etramp <- Analyte$new(id = 73, analyte_name = "Etramp5_ag1", bead_count = 50) #' print(etramp) #' +#' @export Analyte <- R6Class( "Analyte", list( @@ -86,14 +87,14 @@ Analyte <- R6Class( stopifnot(length(id) == 1 && is.numeric(id)) stopifnot(length(analyte_name) == 1 && - is.character(analyte_name)) + is.character(analyte_name)) stopifnot(length(bead_count) == 1 && - (is.na(bead_count) || is.numeric(bead_count))) + (is.na(bead_count) || is.numeric(bead_count))) stopifnot(length(analysis_type) == 0 || - (is.character(analysis_type) && - length(analysis_type) == 1)) + (is.character(analysis_type) && + length(analysis_type) == 1)) stopifnot(length(units) == 0 || - (is.character(units) && length(units) == 1)) + (is.character(units) && length(units) == 1)) self$id <- id self$analyte_name <- analyte_name @@ -144,7 +145,6 @@ Analyte <- R6Class( get_join_value(self$analysis_type, new_analyte$analysis_type) self$units <- get_join_value(self$units, new_analyte$units) } - ) ) @@ -169,6 +169,8 @@ Analyte <- R6Class( #' #' sample_location <- SampleLocation$parse_sample_location("65(1,F5)") #' sample_location$location_name +#' +#' @export SampleLocation <- R6Class( "SampleLocation", public = list( @@ -188,7 +190,6 @@ SampleLocation <- R6Class( self$col <- col self$row <- row - }, #' @description @@ -206,14 +207,13 @@ SampleLocation <- R6Class( # join the data of two samples if (!verify_numeric_join(self$row, new_location$row) || - !verify_numeric_join(self$col, new_location$col)) { + !verify_numeric_join(self$col, new_location$col)) { stop("Cannot join samples of different locations") } self$col <- get_join_value(self$col, new_location$col) self$row <- get_join_value(self$row, new_location$row) } - ), active = list( #' @description @@ -224,7 +224,7 @@ SampleLocation <- R6Class( #' @description #' Returns the location of the sample in the format `(row_letter, column)` - location_name = function() { + location_name = function() { return(paste0("(", self$row_letter, ", ", self$col, ")")) } ) @@ -239,17 +239,16 @@ SampleLocation <- R6Class( #' #' @returns New SampleLocation object with parsed location information. #' -SampleLocation$parse_sample_location = function(location_string) { +SampleLocation$parse_sample_location <- function(location_string) { cleaned_string <- gsub("\\\"", "", location_string) cleaned_string <- unlist(strsplit(cleaned_string, "[()]")) - if (length(cleaned_string) > 1){ + if (length(cleaned_string) > 1) { id <- as.numeric(cleaned_string[1]) cleaned_string <- unlist(strsplit(cleaned_string[2], "[;,]")) plate_id <- as.numeric(cleaned_string[1]) location <- cleaned_string[2] - } - else{ + } else { cleaned_string <- unlist(strsplit(cleaned_string[1], "[;,]")) plate_id <- as.numeric(cleaned_string[1]) location <- cleaned_string[2] @@ -283,6 +282,8 @@ SampleLocation$parse_sample_location = function(location_string) { #' #' @field dilution_factor A numeric value that represents the dilution factor of the sample. Used only in the case of control samples #' +#' +#' @export SampleType <- R6Class( "SampleType", public = list( @@ -302,24 +303,23 @@ SampleType <- R6Class( # for now there should passed a sample type only # check for valid input stopifnot(length(sample_type) == 1 && - is.character(sample_type)) + is.character(sample_type)) stopifnot(length(dilution_factor) == 1 && - (is.na(dilution_factor) || - is.numeric(dilution_factor))) + (is.na(dilution_factor) || + is.numeric(dilution_factor))) SampleType$validate_sample_type(sample_type) # allow for lazy loading - dont verify the dilution factor after creation, but wait until checks - if (validate_dilution){ + if (validate_dilution) { SampleType$validate_dilution_factor(sample_type, dilution_factor) - private$is_validated = TRUE - }else { - private$is_validated = FALSE + private$is_validated <- TRUE + } else { + private$is_validated <- FALSE } self$sample_type <- sample_type self$dilution_factor <- dilution_factor - }, #' @description @@ -372,18 +372,18 @@ SampleType <- R6Class( #' Possible types for the samples #' SampleType$valid_sample_types <- - c("BLANK", + c( + "BLANK", "TEST", "NEGATIVE CONTROL", "STANDARD CURVE", - "POSITIVE CONTROL") - + "POSITIVE CONTROL" + ) #' #' @description #' Validates the sample type using five possible values: `"BLANK"`, `"POSITIVE CONTROL"`, `"NEGATIVE CONTROL"`, `"TEST"`. #' -SampleType$validate_sample_type = function(sample_type) { - +SampleType$validate_sample_type <- function(sample_type) { if (!(sample_type %in% SampleType$valid_sample_types)) { stop("Invalid sample type") } @@ -391,18 +391,17 @@ SampleType$validate_sample_type = function(sample_type) { #' @description #' Validates the dilution factor based on the sample type -SampleType$validate_dilution_factor = function(sample_type, dilution_factor) { +SampleType$validate_dilution_factor <- function(sample_type, dilution_factor) { if (sample_type == "POSITIVE CONTROL" && is.na(dilution_factor)) { stop("Positive control samples must have a dilution factor") } if (sample_type == "STANDARD CURVE" && is.na(dilution_factor)) { stop("Standard curve samples must have a dilution factor") } - if (sample_type %in% c("POSITIVE CONTROL", "STANDARD CURVE") && - !is.na(dilution_factor)) { + if (sample_type %in% c("POSITIVE CONTROL", "STANDARD CURVE") && + !is.na(dilution_factor)) { stop("Only positive control or standard curve samples should have a dilution factor") } - } #' @description @@ -426,9 +425,9 @@ SampleType$validate_dilution_factor = function(sample_type, dilution_factor) { #' #' @param dilution_factor (`numeric(1)`)\cr #' The dilution factor of the sample from the base luminex file. This parameter is ignored for now -SampleType$parse_sample_type = function(sample_name, - sample_name_loc = "", - dilution_factor = 1) { +SampleType$parse_sample_type <- function(sample_name, + sample_name_loc = "", + dilution_factor = 1) { if (is.null(sample_name_loc) || is.na(sample_name_loc)) { sample_name_loc <- "" } @@ -437,8 +436,8 @@ SampleType$parse_sample_type = function(sample_name, blank_pattern <- "^B..$" if (sample_name %in% blank_types || - grepl(blank_pattern, sample_name) || - grepl(blank_pattern, sample_name_loc)) { + grepl(blank_pattern, sample_name) || + grepl(blank_pattern, sample_name_loc)) { return(SampleType$new("BLANK")) } @@ -446,8 +445,8 @@ SampleType$parse_sample_type = function(sample_name, standard_curve_pattern <- "^(S_|S|S\\s|)(1/\\d+)$" standard_curve_loc_pattern <- "(1/\\d+)" if (sample_name %in% standard_curve_types || - grepl(standard_curve_pattern, sample_name) || - grepl(standard_curve_loc_pattern, sample_name_loc)) { + grepl(standard_curve_pattern, sample_name) || + grepl(standard_curve_loc_pattern, sample_name_loc)) { dilution_factor_pattern <- "1/\\d+" match <- "" if (!is.null(sample_name_loc) && sample_name_loc != "" || !is.na(sample_name_loc) && sample_name_loc != "") { @@ -458,7 +457,7 @@ SampleType$parse_sample_type = function(sample_name, dilution_factor <- eval(parse(text = match)) if (is.null(dilution_factor)) { - dilution_factor = NA # this value needs to be updated later + dilution_factor <- NA # this value needs to be updated later } return(SampleType$new("STANDARD CURVE", dilution_factor = dilution_factor, validate_dilution = FALSE)) @@ -469,8 +468,8 @@ SampleType$parse_sample_type = function(sample_name, "^(N..|.*\\bNEG\\b)" # check if it starts with N or contains NEG string if (sample_name %in% negative_types || - grepl(negative_pattern, sample_name) || - grepl(negative_pattern, sample_name_loc)) { + grepl(negative_pattern, sample_name) || + grepl(negative_pattern, sample_name_loc)) { return(SampleType$new("NEGATIVE CONTROL")) } @@ -478,7 +477,7 @@ SampleType$parse_sample_type = function(sample_name, positive_control_pattern <- c("^(P.+|POS.+|CP.+)(1/\\d+)$") if (grepl(positive_control_pattern, sample_name) || - grepl(positive_control_pattern, sample_name_loc)) { + grepl(positive_control_pattern, sample_name_loc)) { dilution_factor_pattern <- "1/\\d+" match <- "" if (!is.null(sample_name_loc) && sample_name_loc != "" || !is.na(sample_name_loc) && sample_name_loc != "") { @@ -489,7 +488,7 @@ SampleType$parse_sample_type = function(sample_name, dilution_factor <- eval(parse(text = match)) if (is.null(dilution_factor)) { - dilution_factor = NA # this value needs to be updated later + dilution_factor <- NA # this value needs to be updated later } return(SampleType$new("POSITIVE CONTROL", dilution_factor = dilution_factor, validate_dilution = FALSE)) } @@ -504,7 +503,9 @@ SampleType$parse_sample_type = function(sample_name, #' @description #' A class to represent the sample. It contains all the necessary information about the sample #' @examples -#' #TODO +#' # TODO +#' +#' @export Sample <- R6Class( "Sample", list( @@ -558,17 +559,17 @@ Sample <- R6Class( sample_type = NULL, sample_location = NULL, warnings = list(), - errors= list(), + errors = list(), data = data.frame()) { # check for valid input stopifnot(length(id) == 1 && is.numeric(id)) stopifnot(length(sample_name) == 1 && - is.character(sample_name)) + is.character(sample_name)) stopifnot(is.null(sample_type) || - "SampleType" %in% class(sample_type)) + "SampleType" %in% class(sample_type)) stopifnot(is.null(sample_location) || - "SampleLocation" %in% class(sample_location)) + "SampleLocation" %in% class(sample_location)) stopifnot(is.data.frame(data)) self$id <- id @@ -631,7 +632,6 @@ Sample <- R6Class( self$data <- dplyr::bind_rows(self$data, new_sample$data) } - ) ) @@ -644,7 +644,7 @@ Sample <- R6Class( #' #' #' @examples -#' #TODO +#' # TODO #' #' @export Plate <- R6Class( @@ -700,9 +700,7 @@ Plate <- R6Class( self$calibration_info <- calibration_info self$audit_logs <- audit_logs self$plate_name <- plate_name - }, - print = function(...) { cat( "Plate with", @@ -721,14 +719,13 @@ Plate <- R6Class( #' #' @param include_names If `include_names` parameter is `TRUE`, apart from count of control samples, provides also their names.By default `FALSE` summary = function(..., include_names = FALSE) { - positive_control_samples_list <- self$get_sample_by_type("POSITIVE CONTROL") negative_control_samples_list <- self$get_sample_by_type("NEGATIVE CONTROL") blank_samples_num <- length(self$get_sample_by_type("BLANK")) positive_control_num <- length(positive_control_samples_list) - negative_control_num <-length(negative_control_samples_list) + negative_control_num <- length(negative_control_samples_list) positive_control_names <- "" @@ -757,7 +754,6 @@ Plate <- R6Class( "Number of negative control samples: ", negative_control_num, negative_control_names, "\n", - sep = "" ) @@ -765,8 +761,8 @@ Plate <- R6Class( }, #' @description - #' function adds block of information to the current plate - #' + #' function adds block of information to the current plate + #' #' #' the analysis type should be added after the analysis, otherwise there is no enough information about samples add_results_block = function(data_type, parsed_block) { @@ -775,17 +771,17 @@ Plate <- R6Class( return(0) } if (data_type == "Audit Logs") { - self$audit_logs = parsed_block + self$audit_logs <- parsed_block return(0) } if (data_type == "Warnings/Errors") { # warnings should be at the end of the file, thus we assume that there exists samples in given locations - for (warning in parsed_block){ + for (warning in parsed_block) { location <- SampleLocation$parse_sample_location(warning$Location) sample <- self$get_sample(location) - if (warning$Status == "Warning"){ + if (warning$Status == "Warning") { sample$warnings <- c(sample$warnings, warning$Message) - } else if (warning$Status == "Error"){ + } else if (warning$Status == "Error") { sample$errors <- c(sample$errors, warning$Message) } } @@ -798,8 +794,9 @@ Plate <- R6Class( new_samples <- parsed_block for (i in seq_along(new_samples)) { new_sample <- new_samples[[i]] - if (new_sample$id %in% sapply(self$samples, function(x) - x$id)) { + if (new_sample$id %in% sapply(self$samples, function(x) { + x$id + })) { # add new data to the existing sample self$samples[[new_sample$id]]$join(new_sample) } else { @@ -817,8 +814,9 @@ Plate <- R6Class( new_analytes <- parsed_block for (i in seq_along(new_analytes)) { new_analyte <- new_analytes[[i]] - if (new_analyte$id %in% sapply(self$analytes, function(x) - x$id)) { + if (new_analyte$id %in% sapply(self$analytes, function(x) { + x$id + })) { # add new data to the existing sample self$analytes[[as.character(new_analyte$id)]]$join(new_analyte) } else { @@ -826,23 +824,23 @@ Plate <- R6Class( } } return(0) - } if (data_type == "analyte_types") { parsed_block <- Filter(Negate(is.null), parsed_block) - if (length(self$analytes) == 0 || length(parsed_block)) - return(0) # nothing to add + if (length(self$analytes) == 0 || length(parsed_block)) { + return(0) + } # nothing to add for (analyte_name in names(parsed_block)) { analyte_type <- parsed_block[[analyte_name]] analyte_id <- self$get_analyte_id(analyte_name) - if(length(analyte_id) > 0) + if (length(analyte_id) > 0) { self$analytes[[analyte_id]]$analysis_type <- analyte_type + } } return(0) - } }, @@ -866,9 +864,9 @@ Plate <- R6Class( #' @description #' checks analyte consistency - verifies if all of the analytes contained within the samples are listed in the `analytes` list of the plate object check_analyte_consistency = function() { - additional_column_names = c("Total Events") + additional_column_names <- c("Total Events") - is_consistent = TRUE + is_consistent <- TRUE analytes_in_plate <- self$analyte_names @@ -892,7 +890,7 @@ Plate <- R6Class( sample$warnings <- append(sample$warnings, warning_message) # Raise an error - is_consistent = FALSE + is_consistent <- FALSE # stop("Error: There are analytes in the sample not defined in the plate.") } } @@ -906,16 +904,15 @@ Plate <- R6Class( #' @param min_events_per_bead lower bound of acceptable number of events. By default equals to `min_events_per_bead` parameter saved in the plate object #' check_beads_number = function(min_events_per_bead = self$min_events_per_bead) { - below_min_list <- list() - below_min_flag = FALSE + below_min_flag <- FALSE for (sample in self$samples) { - if("Count" %in% row.names(sample$data)){ #TODO there should be option for lowercase + if ("Count" %in% row.names(sample$data)) { # TODO there should be option for lowercase below_min <- which(sample$data["Count", ] < min_events_per_bead, arr.ind = TRUE) - if(length(below_min) > 0) { - below_min_analytes <- names(sample$data["Count", ] )[below_min[, "col"]] + if (length(below_min) > 0) { + below_min_analytes <- names(sample$data["Count", ])[below_min[, "col"]] new_warnings <- paste0("An analyte ", below_min_analytes, " did not reach the specified count in the given sample") sample$warnings <- c(sample$warnings, new_warnings) } @@ -928,25 +925,27 @@ Plate <- R6Class( #' #' @param sample sample name or its id #' @returns sample object of given sample name or id - get_sample = function(sample) { + get_sample = function(sample) { # get the sample by its name, id or location - if ("SampleLocation" %in% class(sample)){ + if ("SampleLocation" %in% class(sample)) { sample <- - which(sapply(self$samples, function(x) - x$sample_location$location_name) == sample$location_name) - } - else if (is.numeric(sample)) { + which(sapply(self$samples, function(x) { + x$sample_location$location_name + }) == sample$location_name) + } else if (is.numeric(sample)) { if (sample < 0 || sample > plate$number_of_samples) { stop("Sample ID out of range") } } else { sample <- - which(sapply(self$samples, function(x) - x$sample_name) == sample) + which(sapply(self$samples, function(x) { + x$sample_name + }) == sample) sample_by_loc <- - which(sapply(self$samples, function(x) - x$sample_location$location_name) == sample) + which(sapply(self$samples, function(x) { + x$sample_location$location_name + }) == sample) sample <- c(sample, sample_by_loc) @@ -957,43 +956,43 @@ Plate <- R6Class( return(self$samples[[sample]]) }, - #' @description - #' Function returns list of samples filtered by the type - #' - #' @param sample_type type of the sample to be filtered. Possible values are: - #' SampleType$valid_sample_types - #' - #' - #' @param exclude If `FALSE` returns list of samples with given `sample_type`, - #' otherwise returns all samples except for the specified `sample_type` - #' - get_sample_by_type = function(sample_type, exclude = FALSE) { - stopifnot(sample_type %in% SampleType$valid_sample_types) - - samples_by_type <- list() - for (sample in self$samples) { - if (sample$sample_type$sample_type == sample_type && !exclude){ - samples_by_type = append(samples_by_type, sample) - } else if (sample$sample_type$sample_type != sample_type && exclude) { - samples_by_type = append(samples_by_type, sample) - } - } - - return(samples_by_type) - }, + #' @description + #' Function returns list of samples filtered by the type + #' + #' @param sample_type type of the sample to be filtered. Possible values are: + #' SampleType$valid_sample_types + #' + #' + #' @param exclude If `FALSE` returns list of samples with given `sample_type`, + #' otherwise returns all samples except for the specified `sample_type` + #' + get_sample_by_type = function(sample_type, exclude = FALSE) { + stopifnot(sample_type %in% SampleType$valid_sample_types) + + samples_by_type <- list() + for (sample in self$samples) { + if (sample$sample_type$sample_type == sample_type && !exclude) { + samples_by_type <- append(samples_by_type, sample) + } else if (sample$sample_type$sample_type != sample_type && exclude) { + samples_by_type <- append(samples_by_type, sample) + } + } + + return(samples_by_type) + }, #' #' @returns analyte id of given name get_analyte_id = function(analyte_name) { analyte_id <- - which(sapply(self$analytes, function(x) - x$analyte_name) == analyte_name) - + which(sapply(self$analytes, function(x) { + x$analyte_name + }) == analyte_name) }, #' @description - #' Function returns data for a specific analyte and sample. - #' + #' Function returns data for a specific analyte and sample. + #' #' @param analyte An analyte name or its id of which data we want to extract #' #' @param sample sample name or id @@ -1025,7 +1024,7 @@ Plate <- R6Class( if (is.null(data_type)) { return(sample$data[analyte_name]) } else { - if (! data_type %in% row.names(sample$data[analyte_name])){ + if (!data_type %in% row.names(sample$data[analyte_name])) { stop(paste0("Incorrect value for `data_type`: ", data_type)) } return(sample$data[data_type, analyte_name]) @@ -1035,96 +1034,89 @@ Plate <- R6Class( return(sample) }, - #' @description - #' Function adjusts the values of test samples substracting values from BLANK samples to remove background light - #' In short it substracts the values from data in all samples, except from Blanks. It does not substract values from - #' `Count` values - #' - #' @param method How the values of different blanks should be aggregated. By default `avg`. For now it is the only available method - #' @param inplace Whether the method should produce new plate with adjusted values or not, By default `TRUE` - operates on the current plate. - blank_adjustment = function(method = "avg", inplace = "TRUE") { - if (private$blank_already_adjusted) { - stop("Blank values have been already adjusted in this plate, if you want to try doing it using different method consider reversing this process") - } - - private$blank_already_adjusted = TRUE - available_methods = c("avg") - if (!method %in% available_methods){ - stop(paste0(method, "not available for now, consider using one of the following: ", available_methods)) - } - - if (inplace == FALSE){ - newplate <- self$copy() - } - else { - newplate <- self - } - - blank_samples <- self$get_sample_by_type("BLANK") # these values will be substracted - non_blank_samples <- self$get_sample_by_type("BLANK", exclude = TRUE) # from these values - - # aggregate blank values - - if (method == "avg"){ - agg_dataframe <- NULL - for (sample in blank_samples){ - if (is.null(agg_dataframe)){ - agg_dataframe <- sample$data - } - else { - agg_dataframe <- agg_dataframe + sample$data - } - } - if ("Count" %in% rownames(agg_dataframe)){ - agg_dataframe["Count", ] = 0 # count row is ommited - } - if ("Total Events" %in% colnames(agg_dataframe)){ - agg_dataframe["Total Events"] = 0 - } + #' @description + #' Function adjusts the values of test samples substracting values from BLANK samples to remove background light + #' In short it substracts the values from data in all samples, except from Blanks. It does not substract values from + #' `Count` values + #' + #' @param method How the values of different blanks should be aggregated. By default `avg`. For now it is the only available method + #' @param inplace Whether the method should produce new plate with adjusted values or not, By default `TRUE` - operates on the current plate. + blank_adjustment = function(method = "avg", inplace = "TRUE") { + if (private$blank_already_adjusted) { + stop("Blank values have been already adjusted in this plate, if you want to try doing it using different method consider reversing this process") + } - agg_dataframe = agg_dataframe / length(blank_samples) # average the results + private$blank_already_adjusted <- TRUE + available_methods <- c("avg") + if (!method %in% available_methods) { + stop(paste0(method, "not available for now, consider using one of the following: ", available_methods)) + } + if (inplace == FALSE) { + newplate <- self$copy() + } else { + newplate <- self + } - for (sample in non_blank_samples){ - sample$data = sample$data - agg_dataframe # substract the aggregated values - } + blank_samples <- self$get_sample_by_type("BLANK") # these values will be substracted + non_blank_samples <- self$get_sample_by_type("BLANK", exclude = TRUE) # from these values + # aggregate blank values - } + if (method == "avg") { + agg_dataframe <- NULL + for (sample in blank_samples) { + if (is.null(agg_dataframe)) { + agg_dataframe <- sample$data + } else { + agg_dataframe <- agg_dataframe + sample$data + } + } + if ("Count" %in% rownames(agg_dataframe)) { + agg_dataframe["Count", ] <- 0 # count row is ommited + } + if ("Total Events" %in% colnames(agg_dataframe)) { + agg_dataframe["Total Events"] <- 0 + } - }, + agg_dataframe <- agg_dataframe / length(blank_samples) # average the results - #' @description - #' Function verifies if there are any MFI values below zero after blank removal - check_MFI_after_adjustment = function() { - if (!self$check_if_blanks_already_adjusted){ - stop("Consider adjusting the blanks first") - } + for (sample in non_blank_samples) { + sample$data <- sample$data - agg_dataframe # substract the aggregated values + } + } + }, - below_zero_list <- list() - below_zero_flag = FALSE + #' @description + #' Function verifies if there are any MFI values below zero after blank removal + check_MFI_after_adjustment = function() { + if (!self$check_if_blanks_already_adjusted) { + stop("Consider adjusting the blanks first") + } - for (sample in self$get_sample_by_type("BLANK", exclude = TRUE)) { - below_min <- which(sample$data < 0, arr.ind = TRUE) - if(length(below_min) > 0) { - below_min_analytes <- names(sample$data)[below_min[, "col"]] - new_warnings <- paste0("An analyte ", below_min_analytes, " has value below 0 after blank adjustment") - sample$warnings <- c(sample$warnings, new_warnings) + below_zero_list <- list() - } - } + below_zero_flag <- FALSE - return(below_zero_flag) + for (sample in self$get_sample_by_type("BLANK", exclude = TRUE)) { + below_min <- which(sample$data < 0, arr.ind = TRUE) + if (length(below_min) > 0) { + below_min_analytes <- names(sample$data)[below_min[, "col"]] + new_warnings <- paste0("An analyte ", below_min_analytes, " has value below 0 after blank adjustment") + sample$warnings <- c(sample$warnings, new_warnings) + } + } - }, + return(below_zero_flag) + }, - #' @description performs copy of the plate - copy = function() { - stop("Not implemented yet") - } + #' @description performs copy of the plate + copy = function() { + stop("Not implemented yet") + } ), private = list( blank_already_adjusted = FALSE @@ -1160,7 +1152,7 @@ Plate <- R6Class( #' @field batch_name Metdata: batch name batch_name = function() { - if (!is.null(self$batch_info$batch_name) && !is.na(self$batch_info$batch_name)){ + if (!is.null(self$batch_info$batch_name) && !is.na(self$batch_info$batch_name)) { return(self$batch_info$batch_name) } return("___") @@ -1168,13 +1160,13 @@ Plate <- R6Class( #' @field min_events_per_bead minimal number of events that is valid for one bead - sample and analyte min_events_per_bead = function() { - if (is.null(self$batch_info$min_events_per_bead)){ + if (is.null(self$batch_info$min_events_per_bead)) { return(50) } return(self$batch_info$min_events_per_bead) }, #' @field check_if_blanks_already_adjusted flag that specifies if the blanks were already adjusted and its MFI values subtracted from remaining samples - check_if_blanks_already_adjusted = function(){ + check_if_blanks_already_adjusted = function() { return(private$blank_already_adjusted) }, diff --git a/R/read_data.R b/R/read_data.R index 2d1f3dcd..1530cb66 100644 --- a/R/read_data.R +++ b/R/read_data.R @@ -9,17 +9,19 @@ #' @param check_plate if TRUE veryfies the plate - checks the consistency etc. #' @param verbose if TRUE, print out the progress of the function #' @param colorize if TRUE, colorize the output +#' @param ... additional arguments passed down #' -#'@examples -#' plate <- read_data("path/to/file.csv") +#' @examples +#' plate_file <- system.file("extdata", "random.csv", package = "PvSTATEM") +#' plate <- read_data(plate_file) #' #' @export read_data <- function(file_path, - layout_file_path = NULL, - check_plate = TRUE, - ..., - verbose = TRUE, - colorize = !isTRUE(getOption('knitr.in.progress'))) { + layout_file_path = NULL, + check_plate = TRUE, + verbose = TRUE, + colorize = !isTRUE(getOption("knitr.in.progress")), + ...) { # firstly we read the raw csv file as a dataframe # data <- read.csv(filepath, header = FALSE, sep = ",", stringsAsFactors = FALSE) @@ -37,7 +39,9 @@ read_data <- function(file_path, verbose_cat( color_codes$green_start, "Reading MBA plate csv file...\n\n", - color_codes$green_end, verbose = verbose) + color_codes$green_end, + verbose = verbose + ) blocks <- extract_blocks(file_path) @@ -51,22 +55,25 @@ read_data <- function(file_path, parse_results_blocks(results_blocks, verbose = verbose) results_plate <- - parse_header_blocks(results_plate, header_blocks, check_consistency=check_plate, verbose = verbose) + parse_header_blocks(results_plate, header_blocks, check_consistency = check_plate, verbose = verbose) # merging the layout file if (!is.null(layout_file_path)) { verbose_cat( - "",color_codes$green_start, + "", color_codes$green_start, "reading layout file...\n\n", - color_codes$green_end, verbose = verbose) + color_codes$green_end, + verbose = verbose + ) results_plate <- - read_layout_data(layout_file_path, results_plate, check_plate=check_plate, verbose = verbose) + read_layout_data(layout_file_path, results_plate, check_plate = check_plate, verbose = verbose) } verbose_cat( color_codes$green_start, "New plate object has been created!\n", - color_codes$green_end,"\n", - verbose = verbose) + color_codes$green_end, "\n", + verbose = verbose + ) # consistency and validation checks if (check_plate) { @@ -95,7 +102,8 @@ read_data <- function(file_path, ")", "\nPlate contains at least one region that did not reach the specified bead count - ", results_plate$min_events_per_bead, "\n", verbose = verbose - ) } + ) + } } # add name from filepath @@ -104,7 +112,6 @@ read_data <- function(file_path, results_plate$plate_name <- filename_without_extension return(results_plate) - } read_layout_data <- function(layout_file_path, @@ -114,13 +121,14 @@ read_layout_data <- function(layout_file_path, verbose = TRUE) { # function modifies the results_plate object by adding the location information from the layout file - ext = tools::file_ext(layout_file_path) + ext <- tools::file_ext(layout_file_path) stopifnot(ext %in% c("csv", "xlsx")) location_data <- switch(ext, - csv = read_location_data_csv(layout_file_path), - xlsx = read_location_data_xlsx(layout_file_path)) + csv = read_location_data_csv(layout_file_path), + xlsx = read_location_data_xlsx(layout_file_path) + ) for (sample in results_plate$samples) { row <- sample$sample_location$row @@ -141,8 +149,9 @@ read_location_data_csv <- function(location_file_path, ..., verbose_cat("not tested implementation location csv file\n", verbose = verbose) location_data <- read.csv(location_file_path, - header = TRUE, - stringsAsFactors = FALSE) + header = TRUE, + stringsAsFactors = FALSE + ) return(location_data) } @@ -189,8 +198,9 @@ extract_blocks <- function(file_path) { blocks[[length(blocks) + 1]] <- current_block # remove empty blocks - blocks <- blocks[sapply(blocks, function(x) - length(x) > 0)] + blocks <- blocks[sapply(blocks, function(x) { + length(x) > 0 + })] return(blocks) } @@ -205,20 +215,22 @@ parse_lines <- function(lines, csv_delim = ",") { # remove trailing delims leading_trailing_delim_regex <- - paste0("^", csv_delim, "*|", csv_delim , "*$") + paste0("^", csv_delim, "*|", csv_delim, "*$") lines <- - lapply(lines, function(line) - gsub(leading_trailing_delim_regex, "", line)) + lapply(lines, function(line) { + gsub(leading_trailing_delim_regex, "", line) + }) replace_delim_in_brackets <- paste0("(?:\\G(?!^)|\\()[^)(", csv_delim, "]*\\K,(?=[^)()*])") lines <- - lapply(lines, function(line) - gsub(replace_delim_in_brackets, replacement_delim, line, perl = TRUE)) + lapply(lines, function(line) { + gsub(replace_delim_in_brackets, replacement_delim, line, perl = TRUE) + }) delim_regex <- paste0( - '(?:^|', + "(?:^|", csv_delim, ')(?=[^"]|(")?)"?((?(1)(?:[^"]|"")*|[^', csv_delim, @@ -229,12 +241,14 @@ parse_lines <- function(lines, csv_delim = ",") { regmatches(lines, gregexpr(delim_regex, lines, perl = TRUE)) # remove trailing delims matches <- - lapply(matches, function(line) - gsub(leading_trailing_delim_regex, "", line)) + lapply(matches, function(line) { + gsub(leading_trailing_delim_regex, "", line) + }) # remove empty strings - matches <- lapply(matches, function(line) - line[nzchar(line)]) + matches <- lapply(matches, function(line) { + line[nzchar(line)] + }) return(matches) } @@ -247,7 +261,7 @@ divide_blocks <- function(blocks) { for (i in seq_len(length(blocks))) { # Check if the block contains the keyword "results" if (any(grepl("^Results", blocks[[i]], ignore.case = TRUE))) { - results_block_index = i + results_block_index <- i } } @@ -272,8 +286,9 @@ parse_header_blocks <- verbose = TRUE) { # this function parses the header blocks and writes the metadata into the results plate - if (length(header_blocks) < 5) + if (length(header_blocks) < 5) { stop("Improper data formatting - there are no enough blocks in the header section") + } batch_info <- parse_batch_info(header_blocks[[1]], header_blocks[[2]]) @@ -287,7 +302,7 @@ parse_header_blocks <- sample_info <- parse_sample_info(header_blocks[[5]]) if (check_consistency) { - if (results_plate$number_of_samples != sample_info$samples_count) + if (results_plate$number_of_samples != sample_info$samples_count) { stop( "According to plate metadata there are ", sample_info$samples_count, @@ -295,6 +310,7 @@ parse_header_blocks <- results_plate$number_of_samples, "found" ) + } # TODO what is min events ? batch_info$min_events_per_bead <- @@ -305,7 +321,6 @@ parse_header_blocks <- results_plate$batch_info <- batch_info return(results_plate) - } @@ -329,7 +344,6 @@ parse_header <- function(header_blocks) { sample_info = sample_info ) ) - } parse_date <- function(date) { @@ -380,7 +394,6 @@ parse_calibration_info <- function(cal_block1, cal_block2) { # TODO return(list()) - } parse_sample_info <- function(sample_block) { @@ -441,20 +454,22 @@ parse_single_results_block <- "NOTE", color_codes$yellow_end, ")\n", - "CRC block found, omiting it for now\n", verbose = verbose) + "CRC block found, omiting it for now\n", + verbose = verbose + ) return(list(data_type = "CRC", list())) } return(list(data_type = "CRC", list())) } - data_type = vector_data_type[2] + data_type <- vector_data_type[2] block_header <- results_block[[2]] # fields and antigen names if (length(results_block) <= 2) { # there are no data stored - return(list(data_type=data_type, list())) + return(list(data_type = data_type, list())) } results_df <- @@ -464,11 +479,11 @@ parse_single_results_block <- warning_datatypes <- c("Warnings/Errors", "Audit Logs") if (data_type %in% warning_datatypes) { - warnings <- list() - if (nrow(results_df) == 0) + if (nrow(results_df) == 0) { return(list(data_type = data_type, list())) + } for (row in 1:nrow(results_df)) { @@ -487,7 +502,7 @@ parse_single_results_block <- if (data_type %in% analysis_datatypes) { rownames(results_df) <- results_df[1:nrow(results_df), 1] results_df <- - results_df[,-1] # remove the first column - it should have the known format + results_df[, -1] # remove the first column - it should have the known format # replace values null_values <- c("None", "Alysis Types", "Analysis Types") for (val in null_values) { @@ -503,8 +518,9 @@ parse_single_results_block <- ")", "\nThe datatype ", data_type, - " contains NA values\n" - , verbose = verbose) + " contains NA values\n", + verbose = verbose + ) } analyte_types <- list() @@ -513,8 +529,9 @@ parse_single_results_block <- analysis_type <- results_df[1, col] - if (is.na(analysis_type)) - analysis_type = NULL + if (is.na(analysis_type)) { + analysis_type <- NULL + } analyte_types[[analyte_name]] <- analysis_type } @@ -527,7 +544,7 @@ parse_single_results_block <- if (data_type %in% beads_datatypes) { rownames(results_df) <- results_df[1:nrow(results_df), 1] results_df <- - results_df[,-1] # remove the first column - it should have the known format + results_df[, -1] # remove the first column - it should have the known format # replace values null_values <- c("None", "Units", "Units:") for (val in null_values) { @@ -552,7 +569,7 @@ parse_single_results_block <- analytes <- list() for (col in 1:ncol(results_df)) { analyte_name <- colnames(results_df)[col] - id = NA + id <- NA per_bead_count <- NA analyte_units <- NULL analysis_type <- NULL @@ -560,16 +577,16 @@ parse_single_results_block <- if (data_type == "Units") { analyte_units <- results_df[2, col] - } - else if (data_type == "Per Bead Count") { + } else if (data_type == "Per Bead Count") { per_bead_count <- as.numeric(results_df[2, col]) } id <- as.numeric(results_df[1, col]) - if (!is.null(analyte_units) && is.na(analyte_units)) - analyte_units = NULL + if (!is.null(analyte_units) && is.na(analyte_units)) { + analyte_units <- NULL + } analyte <- Analyte$new( @@ -581,7 +598,6 @@ parse_single_results_block <- ) analytes[[as.character(id)]] <- analyte - } return(list(data_type = "analytes", analytes)) } @@ -595,7 +611,7 @@ parse_single_results_block <- # check if the dataframe contains a column with name or Location - first_analyte_col_index = 1 + first_analyte_col_index <- 1 if ("Location" %in% names(results_df)) { first_analyte_col_index <- first_analyte_col_index + 1 } @@ -611,9 +627,11 @@ parse_single_results_block <- id <- row # TODO better labeling if (is.null(results_df[row, "Sample"])) { - stop(paste0("No name specified for the sample of id: ", - row, - " - omiting it")) + stop(paste0( + "No name specified for the sample of id: ", + row, + " - omiting it" + )) next } @@ -629,8 +647,7 @@ parse_single_results_block <- if (data_type == "Dilution Factor") { dilution_factor <- as.numeric(results_df[row, "Dilution Factor"]) - } - else { + } else { sample_df <- results_df[row, first_analyte_col_index:length(results_df)] @@ -653,7 +670,6 @@ parse_single_results_block <- data = sample_df ) samples[[row]] <- sample - } return(list(data_type = "samples", samples)) diff --git a/R/standard_curves.R b/R/standard_curves.R index 4e88efc0..7fae1366 100644 --- a/R/standard_curves.R +++ b/R/standard_curves.R @@ -12,24 +12,25 @@ library(ggplot2) #' @param log_scale Which elements on the plot should be displayed in log scale. By default c("dilutions"). If `NULL` no log scale is used, if "all" or c("dilutions", "MFI") all elements are displayed in log scale. #' @param verbose If `TRUE` print messages, `TRUE` by default #' +#' @import ggplot2 +#' #' @export -plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Median", file_path = NULL, decreasing_dilution_order = TRUE, log_scale = c("all"), verbose = TRUE) { - +plot_standard_curve_antibody <- function(plates, antibody_name, data_type = "Median", file_path = NULL, decreasing_dilution_order = TRUE, log_scale = c("all"), verbose = TRUE) { if (inherits(plates, "Plate")) { # an instance of Plate plates <- list(plates) } - if (!inherits(plates, "list")){ + if (!inherits(plates, "list")) { stop("plates object should be a plate or a list of plates") } - for( plate in plates ){ - if (!inherits(plate, "Plate")){ + for (plate in plates) { + if (!inherits(plate, "Plate")) { stop("plates object should be a plate or a list of plates") } } # check if log_scale is a character vector and contains element from set available_log_scale_values <- c("all", "dilutions", "MFI") - if (!is.null(log_scale) && !all(log_scale %in% available_log_scale_values)){ + if (!is.null(log_scale) && !all(log_scale %in% available_log_scale_values)) { stop("log_scale should be a character vector containing elements from set: ", paste(available_log_scale_values, collapse = ", ")) } @@ -38,7 +39,7 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi standard_curve_values_list <- list() - for (plate in plates){ + for (plate in plates) { if (!plate$check_if_blanks_already_adjusted) { verbose_cat( "(", @@ -52,7 +53,7 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi } standard_curves <- plate$get_sample_by_type("STANDARD CURVE") - if (length(standard_curves) == 0){ + if (length(standard_curves) == 0) { verbose_cat( "(", color_codes$red_start, @@ -65,12 +66,12 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi standard_curves <- plate$get_sample_by_type("POSITIVE CONTROL") } if (is.null(standard_curve_num_samples)) { - standard_curve_num_samples = length(standard_curves) + standard_curve_num_samples <- length(standard_curves) } else if (standard_curve_num_samples != length(standard_curves)) { stop("Inconsistent number of positive control or standard curve samples accross plates") } - if (!antibody_name %in% plate$analyte_names){ + if (!antibody_name %in% plate$analyte_names) { stop("Antibody ", antibody_name, " not present in the plate") } @@ -87,31 +88,28 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi if (is.null(dilutions_numeric_base)) { dilutions_numeric_base <- dilutions_numeric - } - else if(!all.equal(dilutions_numeric_base, dilutions_numeric)) { + } else if (!all.equal(dilutions_numeric_base, dilutions_numeric)) { stop("Inconsistent dilutions accross plates") } curve_values <- sapply(standard_curves, function(sample) sample$data[data_type, antibody_name]) - if (any(is.na(curve_values))){ + if (any(is.na(curve_values))) { stop(data_type, " not present in the dataframe") } - standard_curve_values_list = append(standard_curve_values_list, list(curve_values)) - + standard_curve_values_list <- append(standard_curve_values_list, list(curve_values)) } - plot_name <- paste0("Standard curve for analyte: ", antibody_name) - if (length(plates) >= 3){ + if (length(plates) >= 3) { colors <- RColorBrewer::brewer.pal(length(plates), "Set1") - }else { + } else { colors <- c("red", "blue") } - par(mfrow=c(1,1)) + par(mfrow = c(1, 1)) log_if_needed_mfi <- function(x) { if ("MFI" %in% log_scale || "all" %in% log_scale) { @@ -151,17 +149,19 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi x_ticks <- c(log_if_needed_dilutions(dilutions_numeric), max(log_if_needed_dilutions(dilutions_numeric)) + 1) x_labels <- c(dilutions, "") - legend_position <- c(0.8, 0.2) # Automatically position the legend + legend_position <- c(0.8, 0.2) # Automatically position the legend if (decreasing_dilution_order) { - if (x_log_scale && !y_log_scale) + if (x_log_scale && !y_log_scale) { legend_position <- c(0.8, 0.8) - else + } else { legend_position <- c(0.2, 0.2) + } } else { - if (x_log_scale && !y_log_scale) + if (x_log_scale && !y_log_scale) { legend_position <- c(0.2, 0.8) - else + } else { legend_position <- c(0.8, 0.2) + } } p <- ggplot(plot_data, aes(x = dilutions, y = mfi, color = plate)) + @@ -172,22 +172,21 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi scale_x_continuous(breaks = x_ticks, labels = x_labels, trans = if (decreasing_dilution_order) "reverse" else "identity") + scale_y_continuous() + theme_minimal() + - theme(axis.line = element_line(colour = "black"), - axis.text.x = element_text(size = 9), - axis.text.y = element_text(size = 9), - legend.position = legend_position, # Automatically position the legend - legend.background = element_rect(fill = "white", color = "black")) + theme( + axis.line = element_line(colour = "black"), + axis.text.x = element_text(size = 9), + axis.text.y = element_text(size = 9), + legend.position = legend_position, # Automatically position the legend + legend.background = element_rect(fill = "white", color = "black") + ) - if (!is.null(file_path)){ + if (!is.null(file_path)) { ggsave(file_path, plot = p, width = 10, height = 7, units = "in", dpi = 300) } else { print(p) } - - - } @@ -206,4 +205,3 @@ color_codes <- green_start = "\033[32m", green_end = "\033[39m" ) - diff --git a/README.md b/README.md index d8652d53..a8ee60ef 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,9 @@ # PvSTATEM - an R package for automated analysis of serological data + + +[![R-CMD-check](https://github.com/ZetrextJG/PvSTATEM/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ZetrextJG/PvSTATEM/actions/workflows/R-CMD-check.yaml) + + *pre-release* version ## Overview This package is a simple tool, which handles the raw data of various formats, produced in Multiplex Bead Assay (MBA). In short, it reads the unstructured, raw data from e.g. Luminex device and output normalized and well-structured data which can be later used in more advanced, downstream analysis. diff --git a/inst/extdata/random.csv b/inst/extdata/random.csv new file mode 100644 index 00000000..6bb15fe2 --- /dev/null +++ b/inst/extdata/random.csv @@ -0,0 +1,62 @@ +Program,xPONENT,,MAGPIX,,,,,,,,,, +Build,3.1.871.0,,,,,,,,,,,, +Date,01/07/2024,00:00,,,,,,,,,,, +,,,,,,,,,,,,, +SN,PvSTATEM1234,,,,,,,,,,,, +Batch,test_plate,,,,,,,,,,,, +Version,1,,,,,,,,,,,, +Operator,,,,,,,,,,,,, +ComputerName,test,,,,,,,,,,,, +Country Code,616,,,,,,,,,,,, +ProtocolName,test,,,,,,,,,,,, +ProtocolVersion,1,,,,,,,,,,,, +,,,,,,,,,,,,, +Most Recent Calibration and Verification Results:,,,,,,,,,,,,, +Last VER Calibration,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +Last CAL Calibration,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +Last Fluidics Test,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +,,,,,,,,,,,,, +CALInfo:,,,,,,,,,,,,, +Calibrator,,,,,,,,,,,,, +Lot,ExpirationDate,CalibrationTime,DDTemp,CL1Temp,CL2Temp,DDVolts,CL1Volts,CL2Volts,DDRVal,CL1RVal,CL2RVal,Result,MachineSerialNo +,,,,,,,,,,,,, +,,,,,,,,,,,,, +Samples,8,Min Events,50,Per Bead,,,,,,,,, +,,,,,,,,,,,,, +Results,,,,,,,,,,,,, +,,,,,,,,,,,,, +DataType:,Median,,,,,,,,,,,, +Location,Sample,Etramp5_ag1,Etramp4_ag1,AMA1,,,,,,,,, +1(1,A1),BLANK,20,21,23,,,,,,,, +2(1,A2),test1,1292,1389,1414,,,,,,,, +3(1,A3),test2,1430,1435,867,,,,,,,, +4(1,A4),test3,657,2031,1300,,,,,,,, +5(1,B1),CP3 1/50,1486,1437,1893,,,,,,,, +6(1,B2),CP3 1/250,130,87,90,,,,,,,, +7(1,B3),CP3 1/1250,78,67,64,,,,,,,, +8(1,B4),CP3 1/6250,65,60,64,,,,,,,, +,,,,,,,,,,,,, +DataType:,Count,,,,,,,,,,,, +Location,Sample,Etramp5_ag1,Etramp4_ag1,AMA1,,,,,,,,, +1(1,A1),BLANK,100,121,143,,,,,,,, +2(1,A2),test1,102,104,105,,,,,,,, +3(1,A3),test2,130,135,67,,,,,,,, +4(1,A4),test3,57,231,100,,,,,,,, +5(1,B1),CP3 1/50,86,37,193,,,,,,,, +6(1,B2),CP3 1/250,130,87,90,,,,,,,, +7(1,B3),CP3 1/1250,53,47,65,,,,,,,, +8(1,B4),CP3 1/6250,85,50,54,,,,,,,, +,,,,,,,,,,,,, +DataType:,Per Bead Count,,,,,,,,,,,, +Alyte:,Etramp5_ag1,Etramp4_ag1,AMA1,,,,,,,,,, +BeadID:,73,75,34,,,,,,,,,, +Per Bead:,50,50,50,,,,,,,,,, +,,,,,,,,,,,,, +DataType:,Audit Logs,,,,,,,,,,,, +UserId,Date,Message,,,,,,,,,,, +,,,,,,,,,,,,, +DataType:,Warnings/Errors,,,,,,,,,,,, +Location,Status,Message,,,,,,,,,,, +,,,,,,,,,,,,, +-- CRC --,,,,,,,,,,,,, +CRC32: 85C2BFCD,,,,,,,,,,,,, diff --git a/inst/extdata/random2.csv b/inst/extdata/random2.csv new file mode 100644 index 00000000..b93b8160 --- /dev/null +++ b/inst/extdata/random2.csv @@ -0,0 +1,62 @@ +Program,xPONENT,,MAGPIX,,,,,,,,,, +Build,3.1.872.0,,,,,,,,,,,, +Date,02/07/2024,00:00,,,,,,,,,,, +,,,,,,,,,,,,, +SN,PvSTATEM1234,,,,,,,,,,,, +Batch,test_plate,,,,,,,,,,,, +Version,1,,,,,,,,,,,, +Operator,,,,,,,,,,,,, +ComputerName,test,,,,,,,,,,,, +Country Code,48,,,,,,,,,,,, +ProtocolName,test,,,,,,,,,,,, +ProtocolVersion,1,,,,,,,,,,,, +,,,,,,,,,,,,, +Most Recent Calibration and Verification Results:,,,,,,,,,,,,, +Last VER Calibration,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +Last CAL Calibration,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +Last Fluidics Test,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +,,,,,,,,,,,,, +CALInfo:,,,,,,,,,,,,, +Calibrator,,,,,,,,,,,,, +Lot,ExpirationDate,CalibrationTime,DDTemp,CL1Temp,CL2Temp,DDVolts,CL1Volts,CL2Volts,DDRVal,CL1RVal,CL2RVal,Result,MachineSerialNo +,,,,,,,,,,,,, +,,,,,,,,,,,,, +Samples,8,Min Events,50,Per Bead,,,,,,,,, +,,,,,,,,,,,,, +Results,,,,,,,,,,,,, +,,,,,,,,,,,,, +DataType:,Median,,,,,,,,,,,, +Location,Sample,Etramp5_ag1,Etramp4_ag1,AMA1,GEXP18,,,,,,,, +1(1,A1),BLANK,19,22,22,40,,,,,,, +2(1,B1),test1,1300,1389,1454,70,,,,,,, +3(1,C1),test2,1433,1435,880,1500,,,,,,, +4(1,D1),test3,660,2031,1200,415,,,,,,, +5(1,E1),CP3 1/250,1500,1437,1903,400,,,,,,, +6(1,F1),CP3 1/1250,135,87,93,50,,,,,,, +7(1,G1),CP3 1/6250,80,67,66,43,,,,,,, +8(1,H1),CP3 1/31250,66,60,66,42,,,,,,, +,,,,,,,,,,,,, +DataType:,Count,,,,,,,,,,,, +Location,Sample,Etramp5_ag1,Etramp4_ag1,AMA1,GEXP18,,,,,,,, +1(1,A1),BLANK,102,126,141,140,,,,,,, +2(1,B1),test1,101,100,100,80,,,,,,, +3(1,C1),test2,120,133,69,160,,,,,,, +4(1,D1),test3,70,233,103,100,,,,,,, +5(1,E1),CP3 1/250,90,38,190,170,,,,,,, +6(1,F1),CP3 1/1250,131,88,85,160,,,,,,, +7(1,G1),CP3 1/6250,57,46,70,180,,,,,,, +8(1,H1),CP3 1/31250,83,52,59,185,,,,,,, +,,,,,,,,,,,,, +DataType:,Per Bead Count,,,,,,,,,,,, +Alyte:,Etramp5_ag1,Etramp4_ag1,AMA1,GEXP18,,,,,,,,, +BeadID:,73,75,34,36,,,,,,,,, +Per Bead:,50,50,50,50,,,,,,,,, +,,,,,,,,,,,,, +DataType:,Audit Logs,,,,,,,,,,,, +UserId,Date,Message,,,,,,,,,,, +,,,,,,,,,,,,, +DataType:,Warnings/Errors,,,,,,,,,,,, +Location,Status,Message,,,,,,,,,,, +,,,,,,,,,,,,, +-- CRC --,,,,,,,,,,,,, +CRC32: 85C2BFCD,,,,,,,,,,,,, diff --git a/inst/extdata/random_broken_colB.csv b/inst/extdata/random_broken_colB.csv new file mode 100644 index 00000000..0a26af17 --- /dev/null +++ b/inst/extdata/random_broken_colB.csv @@ -0,0 +1,62 @@ +Program,,,MAGPIX,,,,,,,,,, +Build,xPONENT,,,,,,,,,,,, +Date,3.1.871.0,00:00,,,,,,,,,,, +,01/07/2024,,,,,,,,,,,, +SN,,,,,,,,,,,,, +Batch,PvSTATEM1234,,,,,,,,,,,, +Version,test_plate,,,,,,,,,,,, +Operator,1,,,,,,,,,,,, +ComputerName,,,,,,,,,,,,, +Country Code,test,,,,,,,,,,,, +ProtocolName,616,,,,,,,,,,,, +ProtocolVersion,test,,,,,,,,,,,, +,1,,,,,,,,,,,, +Most Recent Calibration and Verification Results:,,,,,,,,,,,,, +Last VER Calibration,,,,,,,,,,,,, +Last CAL Calibration,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +Last Fluidics Test,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +CALInfo:,,,,,,,,,,,,, +Calibrator,,,,,,,,,,,,, +Lot,,CalibrationTime,DDTemp,CL1Temp,CL2Temp,DDVolts,CL1Volts,CL2Volts,DDRVal,CL1RVal,CL2RVal,Result,MachineSerialNo +,ExpirationDate,,,,,,,,,,,, +,,,,,,,,,,,,, +Samples,,Min Events,50,Per Bead,,,,,,,,, +,8,,,,,,,,,,,, +Results,,,,,,,,,,,,, +,,,,,,,,,,,,, +DataType:,,,,,,,,,,,,, +Location,Median,Etramp5_ag1,Etramp4_ag1,AMA1,,,,,,,,, +1(1,Sample,BLANK,20,21,23,,,,,,,, +2(1,A1),test1,1292,1389,1414,,,,,,,, +3(1,A2),test2,1430,1435,867,,,,,,,, +4(1,A3),test3,657,2031,1300,,,,,,,, +5(1,A4),CP3 1/50,1486,1437,1893,,,,,,,, +6(1,B1),CP3 1/250,130,87,90,,,,,,,, +7(1,B2),CP3 1/1250,78,67,64,,,,,,,, +8(1,B3),CP3 1/6250,65,60,64,,,,,,,, +,B4),,,,,,,,,,,, +DataType:,,,,,,,,,,,,, +Location,Count,Etramp5_ag1,Etramp4_ag1,AMA1,,,,,,,,, +1(1,Sample,BLANK,100,121,143,,,,,,,, +2(1,A1),test1,102,104,105,,,,,,,, +3(1,A2),test2,130,135,67,,,,,,,, +4(1,A3),test3,57,231,100,,,,,,,, +5(1,A4),CP3 1/50,86,37,193,,,,,,,, +6(1,B1),CP3 1/250,130,87,90,,,,,,,, +7(1,B2),CP3 1/1250,53,47,65,,,,,,,, +8(1,B3),CP3 1/6250,85,50,54,,,,,,,, +,B4),,,,,,,,,,,, +DataType:,,,,,,,,,,,,, +Alyte:,Per Bead Count,Etramp4_ag1,AMA1,,,,,,,,,, +BeadID:,Etramp5_ag1,75,34,,,,,,,,,, +Per Bead:,73,50,50,,,,,,,,,, +,50,,,,,,,,,,,, +DataType:,,,,,,,,,,,,, +UserId,Audit Logs,Message,,,,,,,,,,, +,Date,,,,,,,,,,,, +DataType:,,,,,,,,,,,,, +Location,Warnings/Errors,Message,,,,,,,,,,, +,Status,,,,,,,,,,,, +-- CRC --,,,,,,,,,,,,, +CRC32: 85C2BFCD,,,,,,,,,,,,, diff --git a/inst/extdata/random_layout.xlsx b/inst/extdata/random_layout.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..95d0958324308f2efb096579ecd3d2127119ed87 GIT binary patch literal 6129 zcmaJ_1z1$w)~35tkuCx04(V=?E{P!}1{kC}MOwOL=o~^oT1KQnQUoN15|9B2LHdH< z{jXf_|9$H`=ggev*=y~y*IxTwYrR^k$S5QT7#J7`6%m_;2zLbg=D)eSjT4BA^Y&T& zs$1hdH%{c9-%GX=fdwgyf|52hr8YW#>UWGT4;rHjd%_Nm<%o%J;_BbWy9EYaSrsp; zxv(#CGpjz0)-#Gm>R<*0l=ci=x%qGjj(`Ju356A(0A<Z;?2UjaMT2;x#v?q+&kyjRvQHo}c|Vu{@B^ueElP};-QogD>SlxZk^CHPhh=#%L@ z`&HBm{Y!=0iEwQqv3wS70$PTa_EPZf<8Ujf6Q3%?8nQuCBEkYe=|JWy1v1B%B>iqq zrJG$|az+j>kH^9oN1;MM+q^WPug1O~Evm-YY+>9M)McjRGH}2#oQ`TZ!!D^ZRJEGc zmb`Yy_J@nl#73e+!1vA0dY(#x?1;Vj_Ry^P0v9RmqN$P%rIOk6CZ2cLCWsasmn;}1 zfg=%CR8Z&zek7-A(tCxkeH0{~v}lZqR=`K&hE2w4GEl%wVqf+KA0*lnYBPE<{Rj@} z9r6wo7G$;Qt6G~t5-{GHOy3b(Q=)QX%`|WgccqR!Pm!eMf5MzfXZOvU#I?R_wR(#H z*ht7_OqbsF#hw)qx#3-{Ag8@{{c>kcTKxO)@#tZ)R)oZdSbYy4kVMOBGD|HORYS4C zZLbZr4#I8Q;7ZMd@8h|I$7Y)xT$2hl^pK~-D%Hpf*={5gSnJyAhgV(u4&w4h6ee1# zruuuK>UoMvV~Jxw;#H0vk9XXZ_(>*$bJ^?z~!o6R6Tvm~e_9 zv(f3rU^xxOC~1Ol4$hqYLiv*p73F85*Gvytd69F2bJ8-r_~~sW7c~nVc(WD4?5X0> zEZ^qhNW}IgOSh`)WSQ10Z)|B!q{^C&C3{?OP@F067JMZ`EBI9Y3a$mL_|PvMgFJA!Tl?8Kps9$HlW+Mfg5WlWD64ncA1BG1L+cN-m@xC zKUw$SD&1~PWy{^|MvO8P5aiX!KAOo#M@F>z>N%;O-huW8$@XHz-=F%y3$LqZ=2~C* z$Vg#MFpZ~YHDN}Be9(i4bC~51s}0dqId<6;zIIpqx9;W~L74^zQv2r|y%XAmhK;Vx zPtv-4pQgIFg;b=s_o(>@nvy70s3!U8v2qWxA&^dC`%4~R$z`+~S+5uQ_Z5N1 zGbsapitF^+Z;OhWLiyy^g@$XTrhcS>Aegl|Kw+`oXj5my(Mrguot$1GCcmc0I+XC# z7gnPm*b$Ohfb`846Ku_aaZjj>_UQG9H0~n3cKQ6L{p}smK9)(AHAiXwnLw#)V^}Hg z7QAiX%_lro&wKu6E6aU9Gy{T(=u$kHc%L^~if|>R7}ln?x@H|l$(C=&mP2POw>_^X zM9DX~F?Xr)%Y>Qk*Y+o0Id{cbMsa^DVA!{Wte#i}0t6dTm96?ccRT@2XRrCqKuP7w zMS23RCdtW!DWJ&BD0i3(L3QnX=HdZYl>p~g5@3-EgHmTi;Tlsd?8$*l?wfnGU!2gpY)E=!5&%nK6Bz;)eh4VzMrQ35c_nCRbYV zb+8F`3)DqJqKDy3m&2;9GuLC|`Qja1dFQH<-pAQG+UMdYak&>b;g?+24Z5q>GL2i4 zx_%eo?!<8PVdzKW#qtRkkQ(6+y^y8sQH6s}gS=kb zBfN}#olB5W9EAsxe5dx&60OA+`AY}p2_E()1dj-x){FUt{s^Y;`o@nX^h#R}MXG|Z z6JgaX)g%b(X%;W*u^%@*hTcKP#acpEWaBny|X!RnpM zF{MGGn$K{Cq4S1V+$wi0eGGU6Jb@Y3Q({7L+R2d>_kecTCqBkLVPl0tkHnp{*1r68}JFY>2sM0<@|wtzDPo~=bMjfAz8ctD7W&k4{et-%DlLn)1eec zmLva5_HDV^?DIZB6)1Q72=%*+`{4y6$c5v(bvy`of|`bsBkkiC@W#VduM5q@b;(DiFN{ z&YTb|2W_*XL2v*URHPd45J`k#(H>(?4(Ph zI&vvpoYL82{viMS!0k+hQEiHXj1q7APS}?V-ZMwnq8+9s%VFFk+L5S@_`{tBn{^o8 zLl)pTPN|i(s!`}$VVjToc|5W70<6&-r3=T^C`nUO_az#Z2Be!!h*o%TPcSuFG@!1r zZ|x(u|LF9NnrEEBu@0`_mH>$8%hK9u+B%8-r2sma!M@DLiZN&Mj*#uGNcZu(``SI7*2H{sUZS1w3=a8Yjoyn!dSxXo zGEe>;=-XbpZ^}Z)S$+1d%TP%lNg>Y*`~d;zllrQ#RFQHuakWfh&+;Qcdu^Y?2ke$g z3pII@(BdSWQdqTyKE6MuR-9L-q|9fuLfuzqTIvwa>q&QTUgOZNll3@~g^X?1rOmTb zdy+2I7i}|cHhibcD5)8<%kt@tX^C?paITv$bnh z#U4YO3^h|p&g#qxRRwP|7CPOZ%JiMd0gaDmw$3|h47M0eGXdo-rn*Cf$a~OuB2qN1 zYL8%iVKQP}#plc_o2bSQsbbM&rcEzX&w5#gio@h5T=PL$B-IWHo{TSDeVeK(GG7~v zm;7WbKN~*HK2oZvo)qSoNyRd2wkj>#)Fp*ifZ>glU(c8pz(szYMPJRjiVRYYM$>mZ zA?6cYDra`8PrIHrm(6tKCUtlrI@Vb#gnNS{KAYptWaLU2%eD1PsQVM(Ex?VH+eFV8vfWd#+gb$4E@2GGT z45Zq8fC9oGBVea9lqQg>jkK6-SeFidACRU2-B zDvi)h5gO+wZM9cI>jPI-&Y6lsCejS|X=!iTXzz)ou@QyWSfY=PTd;w8Oubz??0`ew4f zF>^-Fn5Nm5F0lK?*FfK=f*PWk??9U=)DMxao_*(iCZar)NeVf43DIYx>53|bS zBuJ3e+Q z>}0-x*3EUfG`D_;_jDU}D2eY8`X23Gw|3xE+Vo*P({5gT-bIXR$!)GCqoFt@pQgT| z<1)b3n&tgYQahmIL}%yeP_BLBS92Fc^3v%%h(kXKzM5XbCeLN(`>im;;<0?fI;9`f zHuknDZjl(|aeXZQr&$V;Uf~81!q^uDbMXN$R)6L(A4I5e3N1_$YxmtJOpkM_);$ah z=VzFpYP2cR%Va~0ec{wBv!9}gIV7o~_ml1=b5w%kGyd1wMN5J&u!}s|@t!NhRj&H- zxEXhhP+a2QVm@ic30e&*0s`TKf5LodI&r0)2ug}WqGIQgKA4u}=* zh?Rz1{zUB6UK#5VeD$gEI*{{$36{;ySn}A+VW-lMZT`6J7d^5#Xcb_Ti0%)b7#$&( zyKvZj{SoluvOQBmEe%m+JMrgWa%C>#hC^W^PAh1lm|4+)ibna&Kx-IWn``Jmc`^ra zgjA>61o@52BdsgNtWg=$FskkBd3tPMyRz{v^R_`4JZ{`jPYLy7;;zJ=JH1 zK}cBs6G1P`X>(iZJzs1f=6tnQ;LZi(;X!kzd^Kco!5`_?Gn(H|eJ5_#&Ge6L8p*$l zZF@_18*3d8cLx`{Up05!c()6b8wYLqJdggM<1Gi0GYjrq;@e#)HSIHTigGuV955U{ zu-s!aIDTw*jsER2A%7|r{PrCces=mvy2cMQ`Lg;G?m2ULErfb`IG;hd!?u3m|Hy zCkjrJuQ;mEBs?92gp-s0{i0VgXd?C= z{4O7kOkylRUi;D?a#%qT=*Z;70<&c&8UoM-mzlZ7N9B8xB_EUbt0)9?TQAr!8MksN zU=yKIO31)>3G2@`$}hhQ(fPz?`)On)bvDqg8Il3Uz6*xryqUzmE0AOSZX2^%-fQ5`JP5Vcfw2 zwJ#NkttMKB1=<|Ya7^eVJjG{4Oora^X+MFEke#<8I3{u$d66cwuq zP+?oGpIAW1Zagk_FzbMvOKgFxsU;{Y_H~#DFyuv>8Ru_us!=F5ki*r+@kyOQxZQqH zRSWN-TaMK|$})=WG1K5pIn&*Rpn}GYsPml?%DAuEPeS=QKvI~UCB>dq91$pIo9n95 zd9eyX(%Le@>g}tK(oPvedEZ+oBQu+8{)f; z1Uv_M0G)sNaYy%o09p;d;c1z=4bRkOO#GEUer;uxvpc{(2paz0GsRlmYd{0AdU~t)oaO$+P0m zqszGIA^=VXdEk>gw*HzzGW%8V>yBu?rzPpygV?{#f^Kx* zn{+{!jBoAn5yXIS)cB)({&MA4Fm03B2r^oYe&^T4^^`W-**dqXTeLtgp4mH=NJ|Y= zFi6?KPCenspB-QS;Zv}x48&ceqFqQRTv$J(2+t9jO&80V>4&AGsuhdL1X2IGx4(Rb zK6Q#_z?#(L4jxu(MSy!3W?!{SaN$}LJm!fAxOXa4n`V$vU-{vv4BR)^@dN&sV)Ic~ zDM9fXqF%F$8Vg+?T3Ty0vX5U0))(B~%IbeTHeXv%SL*94g9yq6!FsbpvQ z+qd~o`bM8aI%L(!BP`wtm<%9vj~c%IqKEMG(_5puA{nZ2uRxQq2gV(Am$C422hZymnt`sM8{SlfT~d zdKq%IGexi@iXK=YXd(hxpFQ9h(cITo$TIgm!@S+36|rV~>zlxvx=Gbqs)$G=2*1hI zyL{p;x%%I7hid&(@h+WoOE>|C#rHddokp+|AFo^~rB}bMrknAMiKT%Aa=brnlQ{{#!=z{#wYNx&2QIcOmvR z4gQvy8@K*XQv6f>F0kC<@NY@IvGyPJf1~oB+INrdE%^MFMWQ>}zoF<)19wB~zYR3q ay#7CDUP~1f?RFNdn=AMxtC*19zWocPq;>QF literal 0 HcmV?d00001 diff --git a/man/Analyte.Rd b/man/Analyte.Rd index b08d6d2d..dc7e4eec 100644 --- a/man/Analyte.Rd +++ b/man/Analyte.Rd @@ -7,7 +7,7 @@ Analyte class is an object that gathers information about one specific Analyte on the Luminex plate } \examples{ -etramp <- Analyte$new(id = 73, analyte_name = "Etramp5_ag1", bead_count=50) +etramp <- Analyte$new(id = 73, analyte_name = "Etramp5_ag1", bead_count = 50) print(etramp) } diff --git a/man/Plate.Rd b/man/Plate.Rd index 9ad4dda9..84f0c0c6 100644 --- a/man/Plate.Rd +++ b/man/Plate.Rd @@ -12,7 +12,7 @@ analyte id of given name A class to represent the luminex plate. It contains information about the samples and analytes that were examined on the plate as well as some additional metadata and batch info } \examples{ -#TODO +# TODO } \section{Public fields}{ @@ -27,6 +27,10 @@ A class to represent the luminex plate. It contains information about the sample \item{\code{calibration_info}}{a list containing calibration logs read from Luminex file} \item{\code{audit_logs}}{a list containing audit logs read from Luminex file} + +\item{\code{plate_name}}{\itemize{ +\item plate name obtained from filename +}} } \if{html}{\out{}} } @@ -84,7 +88,8 @@ creates a new \code{Plate} object samples = list(), batch_info = list(), calibration_info = list(), - audit_logs = list() + audit_logs = list(), + plate_name = "" )}\if{html}{\out{}} } diff --git a/man/Sample.Rd b/man/Sample.Rd index 45c3ffec..0d36063a 100644 --- a/man/Sample.Rd +++ b/man/Sample.Rd @@ -7,7 +7,8 @@ A class to represent the sample. It contains all the necessary information about the sample } \examples{ -#TODO +# TODO + } \section{Public fields}{ \if{html}{\out{
}} diff --git a/man/SampleLocation.Rd b/man/SampleLocation.Rd index 4cd4ae44..53d78363 100644 --- a/man/SampleLocation.Rd +++ b/man/SampleLocation.Rd @@ -15,6 +15,7 @@ sample_location$location_name sample_location <- SampleLocation$parse_sample_location("65(1,F5)") sample_location$location_name + } \section{Public fields}{ \if{html}{\out{
}} diff --git a/man/plot_standard_curve_antibody.Rd b/man/plot_standard_curve_antibody.Rd index f6f0f727..812185ec 100644 --- a/man/plot_standard_curve_antibody.Rd +++ b/man/plot_standard_curve_antibody.Rd @@ -10,7 +10,7 @@ plot_standard_curve_antibody( data_type = "Median", file_path = NULL, decreasing_dilution_order = TRUE, - log_scale = c("dilutions"), + log_scale = c("all"), verbose = TRUE ) } diff --git a/man/read_data.Rd b/man/read_data.Rd index 58b748d8..da7ebd65 100644 --- a/man/read_data.Rd +++ b/man/read_data.Rd @@ -8,9 +8,9 @@ read_data( file_path, layout_file_path = NULL, check_plate = TRUE, - ..., verbose = TRUE, - colorize = !isTRUE(getOption("knitr.in.progress")) + colorize = !isTRUE(getOption("knitr.in.progress")), + ... ) } \arguments{ @@ -23,12 +23,15 @@ read_data( \item{verbose}{if TRUE, print out the progress of the function} \item{colorize}{if TRUE, colorize the output} + +\item{...}{additional arguments passed down} } \description{ file structure based on the https://genome.med.harvard.edu/documents/luminex/IS2.3_SW_110_eng_us.pdf page 97 and Kenya screening csv files } \examples{ -plate <- read_data("path/to/file.csv") +plate_file <- system.file("extdata", "random.csv", package = "PvSTATEM") +plate <- read_data(plate_file) } diff --git a/tests/testthat/test-initial-preprocessing.R b/tests/testthat/test-initial-preprocessing.R index c6195f52..1d7b0f2d 100644 --- a/tests/testthat/test-initial-preprocessing.R +++ b/tests/testthat/test-initial-preprocessing.R @@ -1,35 +1,45 @@ -test_that("blank adjustment works", { - kenya_filepath <- system.file("extdata", "kenya.csv", package = "PvSTATEM", mustWork = TRUE) +library(testthat) - plate <- read_data(kenya_filepath, verbose=FALSE) +test_that("blank adjustment", { + plate_file <- system.file("extdata", "random.csv", package = "PvSTATEM", mustWork = TRUE) - initial_mfi_value = plate$samples[[1]]$data["Net MFI", "Etramp5_ag1"] - expect_equal(initial_mfi_value, 5595.5) + plate <- read_data(plate_file, verbose = FALSE) + + initial_mfi_value <- plate$samples[[2]]$data["Median", "Etramp5_ag1"] + expect_equal(initial_mfi_value, 1292) plate$blank_adjustment(inplace = T, method = "avg") - recalculated_mfi_value = plate$samples[[1]]$data["Net MFI", "Etramp5_ag1"] - expect_equal(recalculated_mfi_value, 5545.3333) + recalculated_mfi_value <- plate$samples[[2]]$data["Median", "Etramp5_ag1"] + expect_equal(recalculated_mfi_value, 1272) }) test_that("plotting standard curves work", { + plate_file1 <- system.file("extdata", "random.csv", package = "PvSTATEM", mustWork = TRUE) + plate_file2 <- system.file("extdata", "random.csv", package = "PvSTATEM", mustWork = TRUE) - kenya_filepath1 <- system.file("extdata", "kenya.csv", package = "PvSTATEM", mustWork = TRUE) - kenya_filepath6 <- system.file("extdata", "kenya_P6.csv", package = "PvSTATEM", mustWork = TRUE) - kenya_filepath4 <- system.file("extdata", "kenya_P4.csv", package = "PvSTATEM", mustWork = TRUE) - - plate1 <- read_data(kenya_filepath1, verbose=FALSE) - plate6 <- read_data(kenya_filepath6, verbose=FALSE) - plate4 <- read_data(kenya_filepath4, verbose=FALSE) + plate1 <- read_data(plate_file1, verbose = FALSE) + plate2 <- read_data(plate_file2, verbose = FALSE) plate1$blank_adjustment() - plate4$blank_adjustment() - plate6$blank_adjustment() + plate2$blank_adjustment() + + plates <- list(plate1, plate2) + + expect_error(plot_standard_curve_antibody(plates, antibody_name = "Etramp5_ag1"), NA) +}) +test_that("plotting standard curves fail - different dilutions", { + plate_file1 <- system.file("extdata", "random.csv", package = "PvSTATEM", mustWork = TRUE) + plate_file2 <- system.file("extdata", "random2.csv", package = "PvSTATEM", mustWork = TRUE) + plate1 <- read_data(plate_file1, verbose = FALSE) + plate2 <- read_data(plate_file2, verbose = FALSE) - plates <- list(plate1, plate4, plate6) + plate1$blank_adjustment() + plate2$blank_adjustment() + plates <- list(plate1, plate2) - expect_error(plot_standard_curve_antibody(plates, antibody_name = "Etramp5_ag1"), NA) + expect_error(plot_standard_curve_antibody(plates, antibody_name = "Etramp5_ag1")) }) diff --git a/tests/testthat/test-read-data.R b/tests/testthat/test-read-data.R index 6e57e6a8..baa2cc21 100644 --- a/tests/testthat/test-read-data.R +++ b/tests/testthat/test-read-data.R @@ -1,23 +1,20 @@ -test_that("reading file works", { - kenya_filepath <- system.file("extdata", "kenya.csv", package = "PvSTATEM", mustWork = TRUE) +test_that("reading file", { + plate_filepath <- system.file("extdata", "random.csv", package = "PvSTATEM", mustWork = TRUE) - plate <- read_data(kenya_filepath, verbose=FALSE) - expect_equal(length(plate$samples), 96) + plate <- read_data(plate_filepath, verbose = FALSE) + expect_equal(length(plate$samples), 8) }) -test_that("reading file works", { - oise_filepath <- system.file("extdata", "OISE.csv", package = "PvSTATEM", mustWork = TRUE) - oise_layout_filepath <- system.file("extdata", "OISE_layout.xlsx", package = "PvSTATEM", mustWork = TRUE) - - plate <- read_data(oise_filepath, layout_file_path = oise_layout_filepath, check_plate = FALSE, verbose=FALSE) - expect_equal(length(plate$samples), 96) +test_that("reading file with layout", { + plate_filepath <- system.file("extdata", "random.csv", package = "PvSTATEM", mustWork = TRUE) + layout_filepath <- system.file("extdata", "random_layout.xlsx", package = "PvSTATEM", mustWork = TRUE) + plate <- read_data(plate_filepath, layout_file_path = layout_filepath, check_plate = FALSE, verbose = FALSE) + expect_equal(length(plate$samples), 8) }) test_that("incorrect format detection", { # add better handling of this error - kenya_filepath <- system.file("extdata", "kenya_P3_shifted.csv", package = "PvSTATEM", mustWork = TRUE) + kenya_filepath <- system.file("extdata", "random_broken_colB.csv", package = "PvSTATEM", mustWork = TRUE) - expect_warning( - expect_error(read_data(kenya_filepath, verbose=FALSE)) - ) + expect_error(read_data(kenya_filepath, verbose = FALSE)) }) diff --git a/vignettes/.gitignore b/vignettes/.gitignore new file mode 100644 index 00000000..097b2416 --- /dev/null +++ b/vignettes/.gitignore @@ -0,0 +1,2 @@ +*.html +*.R diff --git a/vignettes/example_script.Rmd b/vignettes/example_script.Rmd index 1ac5db3d..0197d753 100644 --- a/vignettes/example_script.Rmd +++ b/vignettes/example_script.Rmd @@ -7,6 +7,7 @@ vignette: > %\VignetteIndexEntry{Simple example of basic PvSTATEM package pre-release version functionalities} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} + %\VignetteDepends{ggplot2} --- ```{r setup, include=FALSE} @@ -24,18 +25,15 @@ The basic functionality of the `PvSTATEM` package is reading raw MBA data. To pr ```{r} library(PvSTATEM) -plate_filepath <- system.file("extdata", "artificial_plate.csv", package = "PvSTATEM", mustWork = TRUE) # get the filepath of the csv dataset +plate_filepath <- system.file("extdata", "random.csv", package = "PvSTATEM", mustWork = TRUE) # get the filepath of the csv dataset plate <- read_data(plate_filepath) # read the data - ``` After the plate is successfully loaded, we can look into some basic information about the plate. ```{r} - - plate$summary() plate$get_sample(1) @@ -50,7 +48,6 @@ plate$analyte_names ## Warnings Our scripts are designed to catch potential errors in the data. If there are any warnings, they will be stored in the `warnings` field of the plate object. The warnings can be accessed by the `$warnings` field of the plate object or by the `$warnings` field of the sample object. -```{r} ```{r} ## warnings ---------------------------------------------------------------- @@ -72,6 +69,4 @@ plot_standard_curve_antibody(plate, antibody_name = "AMA1") plate$blank_adjustment() plot_standard_curve_antibody(plate, antibody_name = "AMA1") - - ``` From 4917c6104c7d165d294a830f79696d3782f96098 Mon Sep 17 00:00:00 2001 From: Jakub Grzywaczewski <44850303+ZetrextJG@users.noreply.github.com> Date: Mon, 8 Jul 2024 21:14:24 +0200 Subject: [PATCH 3/3] Add dev to GHA for R check and test coverage (#19) --- .github/workflows/R-CMD-check.yaml | 4 ++-- .github/workflows/test-coverage.yaml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml index 0f2fe080..c3274143 100644 --- a/.github/workflows/R-CMD-check.yaml +++ b/.github/workflows/R-CMD-check.yaml @@ -2,9 +2,9 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: [main, dev] pull_request: - branches: [main, master] + branches: [main, dev] name: R-CMD-check diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml index c0f85efd..11a5807c 100644 --- a/.github/workflows/test-coverage.yaml +++ b/.github/workflows/test-coverage.yaml @@ -2,9 +2,9 @@ # Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help on: push: - branches: [main, master] + branches: [main, dev] pull_request: - branches: [main, master] + branches: [main, dev] name: test-coverage