diff --git a/.Rbuildignore b/.Rbuildignore index 91114bf2..31e37542 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,2 +1,4 @@ ^.*\.Rproj$ ^\.Rproj\.user$ +^LICENSE\.md$ +^\.github$ diff --git a/.github/.gitignore b/.github/.gitignore new file mode 100644 index 00000000..2d19fc76 --- /dev/null +++ b/.github/.gitignore @@ -0,0 +1 @@ +*.html diff --git a/.github/workflows/R-CMD-check.yaml b/.github/workflows/R-CMD-check.yaml new file mode 100644 index 00000000..0f2fe080 --- /dev/null +++ b/.github/workflows/R-CMD-check.yaml @@ -0,0 +1,52 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: R-CMD-check + +permissions: read-all + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: macos-latest, r: 'release'} + - {os: windows-latest, r: 'release'} + - {os: ubuntu-latest, r: 'devel', http-user-agent: 'release'} + - {os: ubuntu-latest, r: 'release'} + - {os: ubuntu-latest, r: 'oldrel-1'} + + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + R_KEEP_PKG_SOURCE: yes + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-pandoc@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + http-user-agent: ${{ matrix.config.http-user-agent }} + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::rcmdcheck + needs: check + + - uses: r-lib/actions/check-r-package@v2 + with: + upload-snapshots: true + build_args: 'c("--no-manual","--compact-vignettes=gs+qpdf")' diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 00000000..f5d92097 --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,34 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, dev] + pull_request: + branches: [main, dev] + +name: lint + +permissions: read-all + +jobs: + lint: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::lintr, local::. + needs: lint + + - name: Lint + run: lintr::lint_package() + shell: Rscript {0} + env: + LINTR_ERROR_ON_LINT: false diff --git a/.github/workflows/pkgdown.yaml b/.github/workflows/pkgdown.yaml index 05540872..19c96e79 100644 --- a/.github/workflows/pkgdown.yaml +++ b/.github/workflows/pkgdown.yaml @@ -47,4 +47,4 @@ jobs: with: clean: false branch: gh-pages - folder: docs \ No newline at end of file + folder: docs diff --git a/.github/workflows/style.yaml b/.github/workflows/style.yaml new file mode 100644 index 00000000..be9b34aa --- /dev/null +++ b/.github/workflows/style.yaml @@ -0,0 +1,78 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + paths: ["**.[rR]", "**.[qrR]md", "**.[rR]markdown", "**.[rR]nw", "**.[rR]profile"] + branches: [main, dev] + +name: Style + +permissions: read-all + +jobs: + style: + runs-on: ubuntu-latest + permissions: + contents: write + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + steps: + - name: Checkout repo + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Setup R + uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - name: Install dependencies + uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::styler, any::roxygen2 + needs: styler + + - name: Enable styler cache + run: styler::cache_activate() + shell: Rscript {0} + + - name: Determine cache location + id: styler-location + run: | + cat( + "location=", + styler::cache_info(format = "tabular")$location, + "\n", + file = Sys.getenv("GITHUB_OUTPUT"), + append = TRUE, + sep = "" + ) + shell: Rscript {0} + + - name: Cache styler + uses: actions/cache@v4 + with: + path: ${{ steps.styler-location.outputs.location }} + key: ${{ runner.os }}-styler-${{ github.sha }} + restore-keys: | + ${{ runner.os }}-styler- + ${{ runner.os }}- + + - name: Style + run: styler::style_pkg() + shell: Rscript {0} + + - name: Commit and push changes + run: | + if FILES_TO_COMMIT=($(git diff-index --name-only ${{ github.sha }} \ + | egrep --ignore-case '\.(R|[qR]md|Rmarkdown|Rnw|Rprofile)$')) + then + git config --local user.name "$GITHUB_ACTOR" + git config --local user.email "$GITHUB_ACTOR@users.noreply.github.com" + git commit ${FILES_TO_COMMIT[*]} -m "Style code (GHA)" + git pull --ff-only + git push origin + else + echo "No changes to commit." + fi diff --git a/.github/workflows/test-coverage.yaml b/.github/workflows/test-coverage.yaml new file mode 100644 index 00000000..c0f85efd --- /dev/null +++ b/.github/workflows/test-coverage.yaml @@ -0,0 +1,53 @@ +# Workflow derived from https://github.com/r-lib/actions/tree/v2/examples +# Need help debugging build failures? Start at https://github.com/r-lib/actions#where-to-find-help +on: + push: + branches: [main, master] + pull_request: + branches: [main, master] + +name: test-coverage + +permissions: read-all + +jobs: + test-coverage: + runs-on: ubuntu-latest + env: + GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + + steps: + - uses: actions/checkout@v4 + + - uses: r-lib/actions/setup-r@v2 + with: + use-public-rspm: true + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + extra-packages: any::covr, any::xml2 + needs: coverage + + - name: Test coverage + run: | + cov <- covr::package_coverage( + quiet = FALSE, + clean = FALSE, + install_path = file.path(normalizePath(Sys.getenv("RUNNER_TEMP"), winslash = "/"), "package") + ) + covr::to_cobertura(cov) + shell: Rscript {0} + + - name: Show testthat output + if: always() + run: | + ## -------------------------------------------------------------------- + find '${{ runner.temp }}/package' -name 'testthat.Rout*' -exec cat '{}' \; || true + shell: bash + + - name: Upload test results + if: failure() + uses: actions/upload-artifact@v4 + with: + name: coverage-test-failures + path: ${{ runner.temp }}/package diff --git a/.gitignore b/.gitignore index 5b6a0652..c833a2c6 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ .Rhistory .RData .Ruserdata +inst/doc diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 00000000..49d2591f --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,19 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v3.2.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files + +- repo: local + hooks: + - id: styler + name: Style R code with styler + entry: Rscript -e "errors <- styler::style_pkg(); print(errors); quit(save = 'no', status = sum(errors[[2]]))" + language: system + files: '\.Rproj$' + always_run: true diff --git a/DESCRIPTION b/DESCRIPTION index bf89ee92..8ed390cd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,18 +1,24 @@ Package: PvSTATEM Type: Package Title: Reading, quality control and preprocessin of MBA assay data +Description: Reading, quality control and preprocessin of MBA assay data Version: 0.0.1 Authors@R: person("Tymoteusz", "Kwiecinski", email = "tymoteuszkwiecinski@gmail.com", role = c("aut", "cre")) LazyData: true -RoxygenNote: 7.3.1 +RoxygenNote: 7.3.2 Imports: dplyr, R6, tools, + qpdf, readxl, RColorBrewer, + knitr, + rmarkdown, ggplot2 -Suggests: +Suggests: testthat (>= 3.0.0) -Config/testthat/edition: 3 +Config/testfhat/edition: 3 Roxygen: list(markdown = TRUE, r6 = TRUE) +License: BSD 3 +VignetteBuilder: knitr diff --git a/LICENSE b/LICENSE.md similarity index 99% rename from LICENSE rename to LICENSE.md index 599355e9..8696e19f 100644 --- a/LICENSE +++ b/LICENSE.md @@ -26,3 +26,4 @@ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + diff --git a/NAMESPACE b/NAMESPACE index 1b1abc7b..68d38f58 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,5 +1,10 @@ # Generated by roxygen2: do not edit by hand +export(Analyte) export(Plate) +export(Sample) +export(SampleLocation) +export(SampleType) export(plot_standard_curve_antibody) export(read_data) +import(ggplot2) diff --git a/R/classes.R b/R/classes.R index f34dc046..8440d7bd 100644 --- a/R/classes.R +++ b/R/classes.R @@ -1,7 +1,7 @@ library(R6) -verify_numeric_join = function(x, y) { +verify_numeric_join <- function(x, y) { # check if two numeric values are equal if (is.na(x) || is.na(y)) { return(TRUE) @@ -9,7 +9,7 @@ verify_numeric_join = function(x, y) { return(x == y) } -verify_character_join = function(x, y) { +verify_character_join <- function(x, y) { # check if two character values are equal if (is.null(x) || is.null(y)) { return(TRUE) @@ -17,7 +17,7 @@ verify_character_join = function(x, y) { return(x == y) } -get_join_value = function(x, y) { +get_join_value <- function(x, y) { if (is.na(x) || is.null(x)) { return(y) } @@ -48,9 +48,10 @@ get_join_value = function(x, y) { #' @field units Units of the analyte in which the results are expressed #' #' @examples -#' etramp <- Analyte$new(id = 73, analyte_name = "Etramp5_ag1", bead_count=50) +#' etramp <- Analyte$new(id = 73, analyte_name = "Etramp5_ag1", bead_count = 50) #' print(etramp) #' +#' @export Analyte <- R6Class( "Analyte", list( @@ -86,14 +87,14 @@ Analyte <- R6Class( stopifnot(length(id) == 1 && is.numeric(id)) stopifnot(length(analyte_name) == 1 && - is.character(analyte_name)) + is.character(analyte_name)) stopifnot(length(bead_count) == 1 && - (is.na(bead_count) || is.numeric(bead_count))) + (is.na(bead_count) || is.numeric(bead_count))) stopifnot(length(analysis_type) == 0 || - (is.character(analysis_type) && - length(analysis_type) == 1)) + (is.character(analysis_type) && + length(analysis_type) == 1)) stopifnot(length(units) == 0 || - (is.character(units) && length(units) == 1)) + (is.character(units) && length(units) == 1)) self$id <- id self$analyte_name <- analyte_name @@ -144,7 +145,6 @@ Analyte <- R6Class( get_join_value(self$analysis_type, new_analyte$analysis_type) self$units <- get_join_value(self$units, new_analyte$units) } - ) ) @@ -169,6 +169,8 @@ Analyte <- R6Class( #' #' sample_location <- SampleLocation$parse_sample_location("65(1,F5)") #' sample_location$location_name +#' +#' @export SampleLocation <- R6Class( "SampleLocation", public = list( @@ -188,7 +190,6 @@ SampleLocation <- R6Class( self$col <- col self$row <- row - }, #' @description @@ -206,14 +207,13 @@ SampleLocation <- R6Class( # join the data of two samples if (!verify_numeric_join(self$row, new_location$row) || - !verify_numeric_join(self$col, new_location$col)) { + !verify_numeric_join(self$col, new_location$col)) { stop("Cannot join samples of different locations") } self$col <- get_join_value(self$col, new_location$col) self$row <- get_join_value(self$row, new_location$row) } - ), active = list( #' @description @@ -224,7 +224,7 @@ SampleLocation <- R6Class( #' @description #' Returns the location of the sample in the format `(row_letter, column)` - location_name = function() { + location_name = function() { return(paste0("(", self$row_letter, ", ", self$col, ")")) } ) @@ -239,17 +239,16 @@ SampleLocation <- R6Class( #' #' @returns New SampleLocation object with parsed location information. #' -SampleLocation$parse_sample_location = function(location_string) { +SampleLocation$parse_sample_location <- function(location_string) { cleaned_string <- gsub("\\\"", "", location_string) cleaned_string <- unlist(strsplit(cleaned_string, "[()]")) - if (length(cleaned_string) > 1){ + if (length(cleaned_string) > 1) { id <- as.numeric(cleaned_string[1]) cleaned_string <- unlist(strsplit(cleaned_string[2], "[;,]")) plate_id <- as.numeric(cleaned_string[1]) location <- cleaned_string[2] - } - else{ + } else { cleaned_string <- unlist(strsplit(cleaned_string[1], "[;,]")) plate_id <- as.numeric(cleaned_string[1]) location <- cleaned_string[2] @@ -283,6 +282,8 @@ SampleLocation$parse_sample_location = function(location_string) { #' #' @field dilution_factor A numeric value that represents the dilution factor of the sample. Used only in the case of control samples #' +#' +#' @export SampleType <- R6Class( "SampleType", public = list( @@ -302,24 +303,23 @@ SampleType <- R6Class( # for now there should passed a sample type only # check for valid input stopifnot(length(sample_type) == 1 && - is.character(sample_type)) + is.character(sample_type)) stopifnot(length(dilution_factor) == 1 && - (is.na(dilution_factor) || - is.numeric(dilution_factor))) + (is.na(dilution_factor) || + is.numeric(dilution_factor))) SampleType$validate_sample_type(sample_type) # allow for lazy loading - dont verify the dilution factor after creation, but wait until checks - if (validate_dilution){ + if (validate_dilution) { SampleType$validate_dilution_factor(sample_type, dilution_factor) - private$is_validated = TRUE - }else { - private$is_validated = FALSE + private$is_validated <- TRUE + } else { + private$is_validated <- FALSE } self$sample_type <- sample_type self$dilution_factor <- dilution_factor - }, #' @description @@ -372,18 +372,18 @@ SampleType <- R6Class( #' Possible types for the samples #' SampleType$valid_sample_types <- - c("BLANK", + c( + "BLANK", "TEST", "NEGATIVE CONTROL", "STANDARD CURVE", - "POSITIVE CONTROL") - + "POSITIVE CONTROL" + ) #' #' @description #' Validates the sample type using five possible values: `"BLANK"`, `"POSITIVE CONTROL"`, `"NEGATIVE CONTROL"`, `"TEST"`. #' -SampleType$validate_sample_type = function(sample_type) { - +SampleType$validate_sample_type <- function(sample_type) { if (!(sample_type %in% SampleType$valid_sample_types)) { stop("Invalid sample type") } @@ -391,18 +391,17 @@ SampleType$validate_sample_type = function(sample_type) { #' @description #' Validates the dilution factor based on the sample type -SampleType$validate_dilution_factor = function(sample_type, dilution_factor) { +SampleType$validate_dilution_factor <- function(sample_type, dilution_factor) { if (sample_type == "POSITIVE CONTROL" && is.na(dilution_factor)) { stop("Positive control samples must have a dilution factor") } if (sample_type == "STANDARD CURVE" && is.na(dilution_factor)) { stop("Standard curve samples must have a dilution factor") } - if (sample_type %in% c("POSITIVE CONTROL", "STANDARD CURVE") && - !is.na(dilution_factor)) { + if (sample_type %in% c("POSITIVE CONTROL", "STANDARD CURVE") && + !is.na(dilution_factor)) { stop("Only positive control or standard curve samples should have a dilution factor") } - } #' @description @@ -426,9 +425,9 @@ SampleType$validate_dilution_factor = function(sample_type, dilution_factor) { #' #' @param dilution_factor (`numeric(1)`)\cr #' The dilution factor of the sample from the base luminex file. This parameter is ignored for now -SampleType$parse_sample_type = function(sample_name, - sample_name_loc = "", - dilution_factor = 1) { +SampleType$parse_sample_type <- function(sample_name, + sample_name_loc = "", + dilution_factor = 1) { if (is.null(sample_name_loc) || is.na(sample_name_loc)) { sample_name_loc <- "" } @@ -437,8 +436,8 @@ SampleType$parse_sample_type = function(sample_name, blank_pattern <- "^B..$" if (sample_name %in% blank_types || - grepl(blank_pattern, sample_name) || - grepl(blank_pattern, sample_name_loc)) { + grepl(blank_pattern, sample_name) || + grepl(blank_pattern, sample_name_loc)) { return(SampleType$new("BLANK")) } @@ -446,8 +445,8 @@ SampleType$parse_sample_type = function(sample_name, standard_curve_pattern <- "^(S_|S|S\\s|)(1/\\d+)$" standard_curve_loc_pattern <- "(1/\\d+)" if (sample_name %in% standard_curve_types || - grepl(standard_curve_pattern, sample_name) || - grepl(standard_curve_loc_pattern, sample_name_loc)) { + grepl(standard_curve_pattern, sample_name) || + grepl(standard_curve_loc_pattern, sample_name_loc)) { dilution_factor_pattern <- "1/\\d+" match <- "" if (!is.null(sample_name_loc) && sample_name_loc != "" || !is.na(sample_name_loc) && sample_name_loc != "") { @@ -458,7 +457,7 @@ SampleType$parse_sample_type = function(sample_name, dilution_factor <- eval(parse(text = match)) if (is.null(dilution_factor)) { - dilution_factor = NA # this value needs to be updated later + dilution_factor <- NA # this value needs to be updated later } return(SampleType$new("STANDARD CURVE", dilution_factor = dilution_factor, validate_dilution = FALSE)) @@ -469,8 +468,8 @@ SampleType$parse_sample_type = function(sample_name, "^(N..|.*\\bNEG\\b)" # check if it starts with N or contains NEG string if (sample_name %in% negative_types || - grepl(negative_pattern, sample_name) || - grepl(negative_pattern, sample_name_loc)) { + grepl(negative_pattern, sample_name) || + grepl(negative_pattern, sample_name_loc)) { return(SampleType$new("NEGATIVE CONTROL")) } @@ -478,7 +477,7 @@ SampleType$parse_sample_type = function(sample_name, positive_control_pattern <- c("^(P.+|POS.+|CP.+)(1/\\d+)$") if (grepl(positive_control_pattern, sample_name) || - grepl(positive_control_pattern, sample_name_loc)) { + grepl(positive_control_pattern, sample_name_loc)) { dilution_factor_pattern <- "1/\\d+" match <- "" if (!is.null(sample_name_loc) && sample_name_loc != "" || !is.na(sample_name_loc) && sample_name_loc != "") { @@ -489,7 +488,7 @@ SampleType$parse_sample_type = function(sample_name, dilution_factor <- eval(parse(text = match)) if (is.null(dilution_factor)) { - dilution_factor = NA # this value needs to be updated later + dilution_factor <- NA # this value needs to be updated later } return(SampleType$new("POSITIVE CONTROL", dilution_factor = dilution_factor, validate_dilution = FALSE)) } @@ -504,7 +503,9 @@ SampleType$parse_sample_type = function(sample_name, #' @description #' A class to represent the sample. It contains all the necessary information about the sample #' @examples -#' #TODO +#' # TODO +#' +#' @export Sample <- R6Class( "Sample", list( @@ -558,17 +559,17 @@ Sample <- R6Class( sample_type = NULL, sample_location = NULL, warnings = list(), - errors= list(), + errors = list(), data = data.frame()) { # check for valid input stopifnot(length(id) == 1 && is.numeric(id)) stopifnot(length(sample_name) == 1 && - is.character(sample_name)) + is.character(sample_name)) stopifnot(is.null(sample_type) || - "SampleType" %in% class(sample_type)) + "SampleType" %in% class(sample_type)) stopifnot(is.null(sample_location) || - "SampleLocation" %in% class(sample_location)) + "SampleLocation" %in% class(sample_location)) stopifnot(is.data.frame(data)) self$id <- id @@ -631,7 +632,6 @@ Sample <- R6Class( self$data <- dplyr::bind_rows(self$data, new_sample$data) } - ) ) @@ -644,7 +644,7 @@ Sample <- R6Class( #' #' #' @examples -#' #TODO +#' # TODO #' #' @export Plate <- R6Class( @@ -700,9 +700,7 @@ Plate <- R6Class( self$calibration_info <- calibration_info self$audit_logs <- audit_logs self$plate_name <- plate_name - }, - print = function(...) { cat( "Plate with", @@ -721,14 +719,13 @@ Plate <- R6Class( #' #' @param include_names If `include_names` parameter is `TRUE`, apart from count of control samples, provides also their names.By default `FALSE` summary = function(..., include_names = FALSE) { - positive_control_samples_list <- self$get_sample_by_type("POSITIVE CONTROL") negative_control_samples_list <- self$get_sample_by_type("NEGATIVE CONTROL") blank_samples_num <- length(self$get_sample_by_type("BLANK")) positive_control_num <- length(positive_control_samples_list) - negative_control_num <-length(negative_control_samples_list) + negative_control_num <- length(negative_control_samples_list) positive_control_names <- "" @@ -757,7 +754,6 @@ Plate <- R6Class( "Number of negative control samples: ", negative_control_num, negative_control_names, "\n", - sep = "" ) @@ -765,8 +761,8 @@ Plate <- R6Class( }, #' @description - #' function adds block of information to the current plate - #' + #' function adds block of information to the current plate + #' #' #' the analysis type should be added after the analysis, otherwise there is no enough information about samples add_results_block = function(data_type, parsed_block) { @@ -775,17 +771,17 @@ Plate <- R6Class( return(0) } if (data_type == "Audit Logs") { - self$audit_logs = parsed_block + self$audit_logs <- parsed_block return(0) } if (data_type == "Warnings/Errors") { # warnings should be at the end of the file, thus we assume that there exists samples in given locations - for (warning in parsed_block){ + for (warning in parsed_block) { location <- SampleLocation$parse_sample_location(warning$Location) sample <- self$get_sample(location) - if (warning$Status == "Warning"){ + if (warning$Status == "Warning") { sample$warnings <- c(sample$warnings, warning$Message) - } else if (warning$Status == "Error"){ + } else if (warning$Status == "Error") { sample$errors <- c(sample$errors, warning$Message) } } @@ -798,8 +794,9 @@ Plate <- R6Class( new_samples <- parsed_block for (i in seq_along(new_samples)) { new_sample <- new_samples[[i]] - if (new_sample$id %in% sapply(self$samples, function(x) - x$id)) { + if (new_sample$id %in% sapply(self$samples, function(x) { + x$id + })) { # add new data to the existing sample self$samples[[new_sample$id]]$join(new_sample) } else { @@ -817,8 +814,9 @@ Plate <- R6Class( new_analytes <- parsed_block for (i in seq_along(new_analytes)) { new_analyte <- new_analytes[[i]] - if (new_analyte$id %in% sapply(self$analytes, function(x) - x$id)) { + if (new_analyte$id %in% sapply(self$analytes, function(x) { + x$id + })) { # add new data to the existing sample self$analytes[[as.character(new_analyte$id)]]$join(new_analyte) } else { @@ -826,23 +824,23 @@ Plate <- R6Class( } } return(0) - } if (data_type == "analyte_types") { parsed_block <- Filter(Negate(is.null), parsed_block) - if (length(self$analytes) == 0 || length(parsed_block)) - return(0) # nothing to add + if (length(self$analytes) == 0 || length(parsed_block)) { + return(0) + } # nothing to add for (analyte_name in names(parsed_block)) { analyte_type <- parsed_block[[analyte_name]] analyte_id <- self$get_analyte_id(analyte_name) - if(length(analyte_id) > 0) + if (length(analyte_id) > 0) { self$analytes[[analyte_id]]$analysis_type <- analyte_type + } } return(0) - } }, @@ -866,9 +864,9 @@ Plate <- R6Class( #' @description #' checks analyte consistency - verifies if all of the analytes contained within the samples are listed in the `analytes` list of the plate object check_analyte_consistency = function() { - additional_column_names = c("Total Events") + additional_column_names <- c("Total Events") - is_consistent = TRUE + is_consistent <- TRUE analytes_in_plate <- self$analyte_names @@ -892,7 +890,7 @@ Plate <- R6Class( sample$warnings <- append(sample$warnings, warning_message) # Raise an error - is_consistent = FALSE + is_consistent <- FALSE # stop("Error: There are analytes in the sample not defined in the plate.") } } @@ -906,16 +904,15 @@ Plate <- R6Class( #' @param min_events_per_bead lower bound of acceptable number of events. By default equals to `min_events_per_bead` parameter saved in the plate object #' check_beads_number = function(min_events_per_bead = self$min_events_per_bead) { - below_min_list <- list() - below_min_flag = FALSE + below_min_flag <- FALSE for (sample in self$samples) { - if("Count" %in% row.names(sample$data)){ #TODO there should be option for lowercase + if ("Count" %in% row.names(sample$data)) { # TODO there should be option for lowercase below_min <- which(sample$data["Count", ] < min_events_per_bead, arr.ind = TRUE) - if(length(below_min) > 0) { - below_min_analytes <- names(sample$data["Count", ] )[below_min[, "col"]] + if (length(below_min) > 0) { + below_min_analytes <- names(sample$data["Count", ])[below_min[, "col"]] new_warnings <- paste0("An analyte ", below_min_analytes, " did not reach the specified count in the given sample") sample$warnings <- c(sample$warnings, new_warnings) } @@ -928,25 +925,27 @@ Plate <- R6Class( #' #' @param sample sample name or its id #' @returns sample object of given sample name or id - get_sample = function(sample) { + get_sample = function(sample) { # get the sample by its name, id or location - if ("SampleLocation" %in% class(sample)){ + if ("SampleLocation" %in% class(sample)) { sample <- - which(sapply(self$samples, function(x) - x$sample_location$location_name) == sample$location_name) - } - else if (is.numeric(sample)) { + which(sapply(self$samples, function(x) { + x$sample_location$location_name + }) == sample$location_name) + } else if (is.numeric(sample)) { if (sample < 0 || sample > plate$number_of_samples) { stop("Sample ID out of range") } } else { sample <- - which(sapply(self$samples, function(x) - x$sample_name) == sample) + which(sapply(self$samples, function(x) { + x$sample_name + }) == sample) sample_by_loc <- - which(sapply(self$samples, function(x) - x$sample_location$location_name) == sample) + which(sapply(self$samples, function(x) { + x$sample_location$location_name + }) == sample) sample <- c(sample, sample_by_loc) @@ -957,43 +956,43 @@ Plate <- R6Class( return(self$samples[[sample]]) }, - #' @description - #' Function returns list of samples filtered by the type - #' - #' @param sample_type type of the sample to be filtered. Possible values are: - #' SampleType$valid_sample_types - #' - #' - #' @param exclude If `FALSE` returns list of samples with given `sample_type`, - #' otherwise returns all samples except for the specified `sample_type` - #' - get_sample_by_type = function(sample_type, exclude = FALSE) { - stopifnot(sample_type %in% SampleType$valid_sample_types) - - samples_by_type <- list() - for (sample in self$samples) { - if (sample$sample_type$sample_type == sample_type && !exclude){ - samples_by_type = append(samples_by_type, sample) - } else if (sample$sample_type$sample_type != sample_type && exclude) { - samples_by_type = append(samples_by_type, sample) - } - } - - return(samples_by_type) - }, + #' @description + #' Function returns list of samples filtered by the type + #' + #' @param sample_type type of the sample to be filtered. Possible values are: + #' SampleType$valid_sample_types + #' + #' + #' @param exclude If `FALSE` returns list of samples with given `sample_type`, + #' otherwise returns all samples except for the specified `sample_type` + #' + get_sample_by_type = function(sample_type, exclude = FALSE) { + stopifnot(sample_type %in% SampleType$valid_sample_types) + + samples_by_type <- list() + for (sample in self$samples) { + if (sample$sample_type$sample_type == sample_type && !exclude) { + samples_by_type <- append(samples_by_type, sample) + } else if (sample$sample_type$sample_type != sample_type && exclude) { + samples_by_type <- append(samples_by_type, sample) + } + } + + return(samples_by_type) + }, #' #' @returns analyte id of given name get_analyte_id = function(analyte_name) { analyte_id <- - which(sapply(self$analytes, function(x) - x$analyte_name) == analyte_name) - + which(sapply(self$analytes, function(x) { + x$analyte_name + }) == analyte_name) }, #' @description - #' Function returns data for a specific analyte and sample. - #' + #' Function returns data for a specific analyte and sample. + #' #' @param analyte An analyte name or its id of which data we want to extract #' #' @param sample sample name or id @@ -1025,7 +1024,7 @@ Plate <- R6Class( if (is.null(data_type)) { return(sample$data[analyte_name]) } else { - if (! data_type %in% row.names(sample$data[analyte_name])){ + if (!data_type %in% row.names(sample$data[analyte_name])) { stop(paste0("Incorrect value for `data_type`: ", data_type)) } return(sample$data[data_type, analyte_name]) @@ -1035,96 +1034,89 @@ Plate <- R6Class( return(sample) }, - #' @description - #' Function adjusts the values of test samples substracting values from BLANK samples to remove background light - #' In short it substracts the values from data in all samples, except from Blanks. It does not substract values from - #' `Count` values - #' - #' @param method How the values of different blanks should be aggregated. By default `avg`. For now it is the only available method - #' @param inplace Whether the method should produce new plate with adjusted values or not, By default `TRUE` - operates on the current plate. - blank_adjustment = function(method = "avg", inplace = "TRUE") { - if (private$blank_already_adjusted) { - stop("Blank values have been already adjusted in this plate, if you want to try doing it using different method consider reversing this process") - } - - private$blank_already_adjusted = TRUE - available_methods = c("avg") - if (!method %in% available_methods){ - stop(paste0(method, "not available for now, consider using one of the following: ", available_methods)) - } - - if (inplace == FALSE){ - newplate <- self$copy() - } - else { - newplate <- self - } - - blank_samples <- self$get_sample_by_type("BLANK") # these values will be substracted - non_blank_samples <- self$get_sample_by_type("BLANK", exclude = TRUE) # from these values - - # aggregate blank values - - if (method == "avg"){ - agg_dataframe <- NULL - for (sample in blank_samples){ - if (is.null(agg_dataframe)){ - agg_dataframe <- sample$data - } - else { - agg_dataframe <- agg_dataframe + sample$data - } - } - if ("Count" %in% rownames(agg_dataframe)){ - agg_dataframe["Count", ] = 0 # count row is ommited - } - if ("Total Events" %in% colnames(agg_dataframe)){ - agg_dataframe["Total Events"] = 0 - } + #' @description + #' Function adjusts the values of test samples substracting values from BLANK samples to remove background light + #' In short it substracts the values from data in all samples, except from Blanks. It does not substract values from + #' `Count` values + #' + #' @param method How the values of different blanks should be aggregated. By default `avg`. For now it is the only available method + #' @param inplace Whether the method should produce new plate with adjusted values or not, By default `TRUE` - operates on the current plate. + blank_adjustment = function(method = "avg", inplace = "TRUE") { + if (private$blank_already_adjusted) { + stop("Blank values have been already adjusted in this plate, if you want to try doing it using different method consider reversing this process") + } - agg_dataframe = agg_dataframe / length(blank_samples) # average the results + private$blank_already_adjusted <- TRUE + available_methods <- c("avg") + if (!method %in% available_methods) { + stop(paste0(method, "not available for now, consider using one of the following: ", available_methods)) + } + if (inplace == FALSE) { + newplate <- self$copy() + } else { + newplate <- self + } - for (sample in non_blank_samples){ - sample$data = sample$data - agg_dataframe # substract the aggregated values - } + blank_samples <- self$get_sample_by_type("BLANK") # these values will be substracted + non_blank_samples <- self$get_sample_by_type("BLANK", exclude = TRUE) # from these values + # aggregate blank values - } + if (method == "avg") { + agg_dataframe <- NULL + for (sample in blank_samples) { + if (is.null(agg_dataframe)) { + agg_dataframe <- sample$data + } else { + agg_dataframe <- agg_dataframe + sample$data + } + } + if ("Count" %in% rownames(agg_dataframe)) { + agg_dataframe["Count", ] <- 0 # count row is ommited + } + if ("Total Events" %in% colnames(agg_dataframe)) { + agg_dataframe["Total Events"] <- 0 + } - }, + agg_dataframe <- agg_dataframe / length(blank_samples) # average the results - #' @description - #' Function verifies if there are any MFI values below zero after blank removal - check_MFI_after_adjustment = function() { - if (!self$check_if_blanks_already_adjusted){ - stop("Consider adjusting the blanks first") - } + for (sample in non_blank_samples) { + sample$data <- sample$data - agg_dataframe # substract the aggregated values + } + } + }, - below_zero_list <- list() - below_zero_flag = FALSE + #' @description + #' Function verifies if there are any MFI values below zero after blank removal + check_MFI_after_adjustment = function() { + if (!self$check_if_blanks_already_adjusted) { + stop("Consider adjusting the blanks first") + } - for (sample in self$get_sample_by_type("BLANK", exclude = TRUE)) { - below_min <- which(sample$data < 0, arr.ind = TRUE) - if(length(below_min) > 0) { - below_min_analytes <- names(sample$data)[below_min[, "col"]] - new_warnings <- paste0("An analyte ", below_min_analytes, " has value below 0 after blank adjustment") - sample$warnings <- c(sample$warnings, new_warnings) + below_zero_list <- list() - } - } + below_zero_flag <- FALSE - return(below_zero_flag) + for (sample in self$get_sample_by_type("BLANK", exclude = TRUE)) { + below_min <- which(sample$data < 0, arr.ind = TRUE) + if (length(below_min) > 0) { + below_min_analytes <- names(sample$data)[below_min[, "col"]] + new_warnings <- paste0("An analyte ", below_min_analytes, " has value below 0 after blank adjustment") + sample$warnings <- c(sample$warnings, new_warnings) + } + } - }, + return(below_zero_flag) + }, - #' @description performs copy of the plate - copy = function() { - stop("Not implemented yet") - } + #' @description performs copy of the plate + copy = function() { + stop("Not implemented yet") + } ), private = list( blank_already_adjusted = FALSE @@ -1160,7 +1152,7 @@ Plate <- R6Class( #' @field batch_name Metdata: batch name batch_name = function() { - if (!is.null(self$batch_info$batch_name) && !is.na(self$batch_info$batch_name)){ + if (!is.null(self$batch_info$batch_name) && !is.na(self$batch_info$batch_name)) { return(self$batch_info$batch_name) } return("___") @@ -1168,13 +1160,13 @@ Plate <- R6Class( #' @field min_events_per_bead minimal number of events that is valid for one bead - sample and analyte min_events_per_bead = function() { - if (is.null(self$batch_info$min_events_per_bead)){ + if (is.null(self$batch_info$min_events_per_bead)) { return(50) } return(self$batch_info$min_events_per_bead) }, #' @field check_if_blanks_already_adjusted flag that specifies if the blanks were already adjusted and its MFI values subtracted from remaining samples - check_if_blanks_already_adjusted = function(){ + check_if_blanks_already_adjusted = function() { return(private$blank_already_adjusted) }, diff --git a/R/read_data.R b/R/read_data.R index 2d1f3dcd..1530cb66 100644 --- a/R/read_data.R +++ b/R/read_data.R @@ -9,17 +9,19 @@ #' @param check_plate if TRUE veryfies the plate - checks the consistency etc. #' @param verbose if TRUE, print out the progress of the function #' @param colorize if TRUE, colorize the output +#' @param ... additional arguments passed down #' -#'@examples -#' plate <- read_data("path/to/file.csv") +#' @examples +#' plate_file <- system.file("extdata", "random.csv", package = "PvSTATEM") +#' plate <- read_data(plate_file) #' #' @export read_data <- function(file_path, - layout_file_path = NULL, - check_plate = TRUE, - ..., - verbose = TRUE, - colorize = !isTRUE(getOption('knitr.in.progress'))) { + layout_file_path = NULL, + check_plate = TRUE, + verbose = TRUE, + colorize = !isTRUE(getOption("knitr.in.progress")), + ...) { # firstly we read the raw csv file as a dataframe # data <- read.csv(filepath, header = FALSE, sep = ",", stringsAsFactors = FALSE) @@ -37,7 +39,9 @@ read_data <- function(file_path, verbose_cat( color_codes$green_start, "Reading MBA plate csv file...\n\n", - color_codes$green_end, verbose = verbose) + color_codes$green_end, + verbose = verbose + ) blocks <- extract_blocks(file_path) @@ -51,22 +55,25 @@ read_data <- function(file_path, parse_results_blocks(results_blocks, verbose = verbose) results_plate <- - parse_header_blocks(results_plate, header_blocks, check_consistency=check_plate, verbose = verbose) + parse_header_blocks(results_plate, header_blocks, check_consistency = check_plate, verbose = verbose) # merging the layout file if (!is.null(layout_file_path)) { verbose_cat( - "",color_codes$green_start, + "", color_codes$green_start, "reading layout file...\n\n", - color_codes$green_end, verbose = verbose) + color_codes$green_end, + verbose = verbose + ) results_plate <- - read_layout_data(layout_file_path, results_plate, check_plate=check_plate, verbose = verbose) + read_layout_data(layout_file_path, results_plate, check_plate = check_plate, verbose = verbose) } verbose_cat( color_codes$green_start, "New plate object has been created!\n", - color_codes$green_end,"\n", - verbose = verbose) + color_codes$green_end, "\n", + verbose = verbose + ) # consistency and validation checks if (check_plate) { @@ -95,7 +102,8 @@ read_data <- function(file_path, ")", "\nPlate contains at least one region that did not reach the specified bead count - ", results_plate$min_events_per_bead, "\n", verbose = verbose - ) } + ) + } } # add name from filepath @@ -104,7 +112,6 @@ read_data <- function(file_path, results_plate$plate_name <- filename_without_extension return(results_plate) - } read_layout_data <- function(layout_file_path, @@ -114,13 +121,14 @@ read_layout_data <- function(layout_file_path, verbose = TRUE) { # function modifies the results_plate object by adding the location information from the layout file - ext = tools::file_ext(layout_file_path) + ext <- tools::file_ext(layout_file_path) stopifnot(ext %in% c("csv", "xlsx")) location_data <- switch(ext, - csv = read_location_data_csv(layout_file_path), - xlsx = read_location_data_xlsx(layout_file_path)) + csv = read_location_data_csv(layout_file_path), + xlsx = read_location_data_xlsx(layout_file_path) + ) for (sample in results_plate$samples) { row <- sample$sample_location$row @@ -141,8 +149,9 @@ read_location_data_csv <- function(location_file_path, ..., verbose_cat("not tested implementation location csv file\n", verbose = verbose) location_data <- read.csv(location_file_path, - header = TRUE, - stringsAsFactors = FALSE) + header = TRUE, + stringsAsFactors = FALSE + ) return(location_data) } @@ -189,8 +198,9 @@ extract_blocks <- function(file_path) { blocks[[length(blocks) + 1]] <- current_block # remove empty blocks - blocks <- blocks[sapply(blocks, function(x) - length(x) > 0)] + blocks <- blocks[sapply(blocks, function(x) { + length(x) > 0 + })] return(blocks) } @@ -205,20 +215,22 @@ parse_lines <- function(lines, csv_delim = ",") { # remove trailing delims leading_trailing_delim_regex <- - paste0("^", csv_delim, "*|", csv_delim , "*$") + paste0("^", csv_delim, "*|", csv_delim, "*$") lines <- - lapply(lines, function(line) - gsub(leading_trailing_delim_regex, "", line)) + lapply(lines, function(line) { + gsub(leading_trailing_delim_regex, "", line) + }) replace_delim_in_brackets <- paste0("(?:\\G(?!^)|\\()[^)(", csv_delim, "]*\\K,(?=[^)()*])") lines <- - lapply(lines, function(line) - gsub(replace_delim_in_brackets, replacement_delim, line, perl = TRUE)) + lapply(lines, function(line) { + gsub(replace_delim_in_brackets, replacement_delim, line, perl = TRUE) + }) delim_regex <- paste0( - '(?:^|', + "(?:^|", csv_delim, ')(?=[^"]|(")?)"?((?(1)(?:[^"]|"")*|[^', csv_delim, @@ -229,12 +241,14 @@ parse_lines <- function(lines, csv_delim = ",") { regmatches(lines, gregexpr(delim_regex, lines, perl = TRUE)) # remove trailing delims matches <- - lapply(matches, function(line) - gsub(leading_trailing_delim_regex, "", line)) + lapply(matches, function(line) { + gsub(leading_trailing_delim_regex, "", line) + }) # remove empty strings - matches <- lapply(matches, function(line) - line[nzchar(line)]) + matches <- lapply(matches, function(line) { + line[nzchar(line)] + }) return(matches) } @@ -247,7 +261,7 @@ divide_blocks <- function(blocks) { for (i in seq_len(length(blocks))) { # Check if the block contains the keyword "results" if (any(grepl("^Results", blocks[[i]], ignore.case = TRUE))) { - results_block_index = i + results_block_index <- i } } @@ -272,8 +286,9 @@ parse_header_blocks <- verbose = TRUE) { # this function parses the header blocks and writes the metadata into the results plate - if (length(header_blocks) < 5) + if (length(header_blocks) < 5) { stop("Improper data formatting - there are no enough blocks in the header section") + } batch_info <- parse_batch_info(header_blocks[[1]], header_blocks[[2]]) @@ -287,7 +302,7 @@ parse_header_blocks <- sample_info <- parse_sample_info(header_blocks[[5]]) if (check_consistency) { - if (results_plate$number_of_samples != sample_info$samples_count) + if (results_plate$number_of_samples != sample_info$samples_count) { stop( "According to plate metadata there are ", sample_info$samples_count, @@ -295,6 +310,7 @@ parse_header_blocks <- results_plate$number_of_samples, "found" ) + } # TODO what is min events ? batch_info$min_events_per_bead <- @@ -305,7 +321,6 @@ parse_header_blocks <- results_plate$batch_info <- batch_info return(results_plate) - } @@ -329,7 +344,6 @@ parse_header <- function(header_blocks) { sample_info = sample_info ) ) - } parse_date <- function(date) { @@ -380,7 +394,6 @@ parse_calibration_info <- function(cal_block1, cal_block2) { # TODO return(list()) - } parse_sample_info <- function(sample_block) { @@ -441,20 +454,22 @@ parse_single_results_block <- "NOTE", color_codes$yellow_end, ")\n", - "CRC block found, omiting it for now\n", verbose = verbose) + "CRC block found, omiting it for now\n", + verbose = verbose + ) return(list(data_type = "CRC", list())) } return(list(data_type = "CRC", list())) } - data_type = vector_data_type[2] + data_type <- vector_data_type[2] block_header <- results_block[[2]] # fields and antigen names if (length(results_block) <= 2) { # there are no data stored - return(list(data_type=data_type, list())) + return(list(data_type = data_type, list())) } results_df <- @@ -464,11 +479,11 @@ parse_single_results_block <- warning_datatypes <- c("Warnings/Errors", "Audit Logs") if (data_type %in% warning_datatypes) { - warnings <- list() - if (nrow(results_df) == 0) + if (nrow(results_df) == 0) { return(list(data_type = data_type, list())) + } for (row in 1:nrow(results_df)) { @@ -487,7 +502,7 @@ parse_single_results_block <- if (data_type %in% analysis_datatypes) { rownames(results_df) <- results_df[1:nrow(results_df), 1] results_df <- - results_df[,-1] # remove the first column - it should have the known format + results_df[, -1] # remove the first column - it should have the known format # replace values null_values <- c("None", "Alysis Types", "Analysis Types") for (val in null_values) { @@ -503,8 +518,9 @@ parse_single_results_block <- ")", "\nThe datatype ", data_type, - " contains NA values\n" - , verbose = verbose) + " contains NA values\n", + verbose = verbose + ) } analyte_types <- list() @@ -513,8 +529,9 @@ parse_single_results_block <- analysis_type <- results_df[1, col] - if (is.na(analysis_type)) - analysis_type = NULL + if (is.na(analysis_type)) { + analysis_type <- NULL + } analyte_types[[analyte_name]] <- analysis_type } @@ -527,7 +544,7 @@ parse_single_results_block <- if (data_type %in% beads_datatypes) { rownames(results_df) <- results_df[1:nrow(results_df), 1] results_df <- - results_df[,-1] # remove the first column - it should have the known format + results_df[, -1] # remove the first column - it should have the known format # replace values null_values <- c("None", "Units", "Units:") for (val in null_values) { @@ -552,7 +569,7 @@ parse_single_results_block <- analytes <- list() for (col in 1:ncol(results_df)) { analyte_name <- colnames(results_df)[col] - id = NA + id <- NA per_bead_count <- NA analyte_units <- NULL analysis_type <- NULL @@ -560,16 +577,16 @@ parse_single_results_block <- if (data_type == "Units") { analyte_units <- results_df[2, col] - } - else if (data_type == "Per Bead Count") { + } else if (data_type == "Per Bead Count") { per_bead_count <- as.numeric(results_df[2, col]) } id <- as.numeric(results_df[1, col]) - if (!is.null(analyte_units) && is.na(analyte_units)) - analyte_units = NULL + if (!is.null(analyte_units) && is.na(analyte_units)) { + analyte_units <- NULL + } analyte <- Analyte$new( @@ -581,7 +598,6 @@ parse_single_results_block <- ) analytes[[as.character(id)]] <- analyte - } return(list(data_type = "analytes", analytes)) } @@ -595,7 +611,7 @@ parse_single_results_block <- # check if the dataframe contains a column with name or Location - first_analyte_col_index = 1 + first_analyte_col_index <- 1 if ("Location" %in% names(results_df)) { first_analyte_col_index <- first_analyte_col_index + 1 } @@ -611,9 +627,11 @@ parse_single_results_block <- id <- row # TODO better labeling if (is.null(results_df[row, "Sample"])) { - stop(paste0("No name specified for the sample of id: ", - row, - " - omiting it")) + stop(paste0( + "No name specified for the sample of id: ", + row, + " - omiting it" + )) next } @@ -629,8 +647,7 @@ parse_single_results_block <- if (data_type == "Dilution Factor") { dilution_factor <- as.numeric(results_df[row, "Dilution Factor"]) - } - else { + } else { sample_df <- results_df[row, first_analyte_col_index:length(results_df)] @@ -653,7 +670,6 @@ parse_single_results_block <- data = sample_df ) samples[[row]] <- sample - } return(list(data_type = "samples", samples)) diff --git a/R/standard_curves.R b/R/standard_curves.R index 4e88efc0..7fae1366 100644 --- a/R/standard_curves.R +++ b/R/standard_curves.R @@ -12,24 +12,25 @@ library(ggplot2) #' @param log_scale Which elements on the plot should be displayed in log scale. By default c("dilutions"). If `NULL` no log scale is used, if "all" or c("dilutions", "MFI") all elements are displayed in log scale. #' @param verbose If `TRUE` print messages, `TRUE` by default #' +#' @import ggplot2 +#' #' @export -plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Median", file_path = NULL, decreasing_dilution_order = TRUE, log_scale = c("all"), verbose = TRUE) { - +plot_standard_curve_antibody <- function(plates, antibody_name, data_type = "Median", file_path = NULL, decreasing_dilution_order = TRUE, log_scale = c("all"), verbose = TRUE) { if (inherits(plates, "Plate")) { # an instance of Plate plates <- list(plates) } - if (!inherits(plates, "list")){ + if (!inherits(plates, "list")) { stop("plates object should be a plate or a list of plates") } - for( plate in plates ){ - if (!inherits(plate, "Plate")){ + for (plate in plates) { + if (!inherits(plate, "Plate")) { stop("plates object should be a plate or a list of plates") } } # check if log_scale is a character vector and contains element from set available_log_scale_values <- c("all", "dilutions", "MFI") - if (!is.null(log_scale) && !all(log_scale %in% available_log_scale_values)){ + if (!is.null(log_scale) && !all(log_scale %in% available_log_scale_values)) { stop("log_scale should be a character vector containing elements from set: ", paste(available_log_scale_values, collapse = ", ")) } @@ -38,7 +39,7 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi standard_curve_values_list <- list() - for (plate in plates){ + for (plate in plates) { if (!plate$check_if_blanks_already_adjusted) { verbose_cat( "(", @@ -52,7 +53,7 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi } standard_curves <- plate$get_sample_by_type("STANDARD CURVE") - if (length(standard_curves) == 0){ + if (length(standard_curves) == 0) { verbose_cat( "(", color_codes$red_start, @@ -65,12 +66,12 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi standard_curves <- plate$get_sample_by_type("POSITIVE CONTROL") } if (is.null(standard_curve_num_samples)) { - standard_curve_num_samples = length(standard_curves) + standard_curve_num_samples <- length(standard_curves) } else if (standard_curve_num_samples != length(standard_curves)) { stop("Inconsistent number of positive control or standard curve samples accross plates") } - if (!antibody_name %in% plate$analyte_names){ + if (!antibody_name %in% plate$analyte_names) { stop("Antibody ", antibody_name, " not present in the plate") } @@ -87,31 +88,28 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi if (is.null(dilutions_numeric_base)) { dilutions_numeric_base <- dilutions_numeric - } - else if(!all.equal(dilutions_numeric_base, dilutions_numeric)) { + } else if (!all.equal(dilutions_numeric_base, dilutions_numeric)) { stop("Inconsistent dilutions accross plates") } curve_values <- sapply(standard_curves, function(sample) sample$data[data_type, antibody_name]) - if (any(is.na(curve_values))){ + if (any(is.na(curve_values))) { stop(data_type, " not present in the dataframe") } - standard_curve_values_list = append(standard_curve_values_list, list(curve_values)) - + standard_curve_values_list <- append(standard_curve_values_list, list(curve_values)) } - plot_name <- paste0("Standard curve for analyte: ", antibody_name) - if (length(plates) >= 3){ + if (length(plates) >= 3) { colors <- RColorBrewer::brewer.pal(length(plates), "Set1") - }else { + } else { colors <- c("red", "blue") } - par(mfrow=c(1,1)) + par(mfrow = c(1, 1)) log_if_needed_mfi <- function(x) { if ("MFI" %in% log_scale || "all" %in% log_scale) { @@ -151,17 +149,19 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi x_ticks <- c(log_if_needed_dilutions(dilutions_numeric), max(log_if_needed_dilutions(dilutions_numeric)) + 1) x_labels <- c(dilutions, "") - legend_position <- c(0.8, 0.2) # Automatically position the legend + legend_position <- c(0.8, 0.2) # Automatically position the legend if (decreasing_dilution_order) { - if (x_log_scale && !y_log_scale) + if (x_log_scale && !y_log_scale) { legend_position <- c(0.8, 0.8) - else + } else { legend_position <- c(0.2, 0.2) + } } else { - if (x_log_scale && !y_log_scale) + if (x_log_scale && !y_log_scale) { legend_position <- c(0.2, 0.8) - else + } else { legend_position <- c(0.8, 0.2) + } } p <- ggplot(plot_data, aes(x = dilutions, y = mfi, color = plate)) + @@ -172,22 +172,21 @@ plot_standard_curve_antibody = function(plates, antibody_name, data_type = "Medi scale_x_continuous(breaks = x_ticks, labels = x_labels, trans = if (decreasing_dilution_order) "reverse" else "identity") + scale_y_continuous() + theme_minimal() + - theme(axis.line = element_line(colour = "black"), - axis.text.x = element_text(size = 9), - axis.text.y = element_text(size = 9), - legend.position = legend_position, # Automatically position the legend - legend.background = element_rect(fill = "white", color = "black")) + theme( + axis.line = element_line(colour = "black"), + axis.text.x = element_text(size = 9), + axis.text.y = element_text(size = 9), + legend.position = legend_position, # Automatically position the legend + legend.background = element_rect(fill = "white", color = "black") + ) - if (!is.null(file_path)){ + if (!is.null(file_path)) { ggsave(file_path, plot = p, width = 10, height = 7, units = "in", dpi = 300) } else { print(p) } - - - } @@ -206,4 +205,3 @@ color_codes <- green_start = "\033[32m", green_end = "\033[39m" ) - diff --git a/README.md b/README.md index d8652d53..a8ee60ef 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,9 @@ # PvSTATEM - an R package for automated analysis of serological data + + +[![R-CMD-check](https://github.com/ZetrextJG/PvSTATEM/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/ZetrextJG/PvSTATEM/actions/workflows/R-CMD-check.yaml) + + *pre-release* version ## Overview This package is a simple tool, which handles the raw data of various formats, produced in Multiplex Bead Assay (MBA). In short, it reads the unstructured, raw data from e.g. Luminex device and output normalized and well-structured data which can be later used in more advanced, downstream analysis. diff --git a/inst/extdata/random.csv b/inst/extdata/random.csv new file mode 100644 index 00000000..6bb15fe2 --- /dev/null +++ b/inst/extdata/random.csv @@ -0,0 +1,62 @@ +Program,xPONENT,,MAGPIX,,,,,,,,,, +Build,3.1.871.0,,,,,,,,,,,, +Date,01/07/2024,00:00,,,,,,,,,,, +,,,,,,,,,,,,, +SN,PvSTATEM1234,,,,,,,,,,,, +Batch,test_plate,,,,,,,,,,,, +Version,1,,,,,,,,,,,, +Operator,,,,,,,,,,,,, +ComputerName,test,,,,,,,,,,,, +Country Code,616,,,,,,,,,,,, +ProtocolName,test,,,,,,,,,,,, +ProtocolVersion,1,,,,,,,,,,,, +,,,,,,,,,,,,, +Most Recent Calibration and Verification Results:,,,,,,,,,,,,, +Last VER Calibration,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +Last CAL Calibration,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +Last Fluidics Test,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +,,,,,,,,,,,,, +CALInfo:,,,,,,,,,,,,, +Calibrator,,,,,,,,,,,,, +Lot,ExpirationDate,CalibrationTime,DDTemp,CL1Temp,CL2Temp,DDVolts,CL1Volts,CL2Volts,DDRVal,CL1RVal,CL2RVal,Result,MachineSerialNo +,,,,,,,,,,,,, +,,,,,,,,,,,,, +Samples,8,Min Events,50,Per Bead,,,,,,,,, +,,,,,,,,,,,,, +Results,,,,,,,,,,,,, +,,,,,,,,,,,,, +DataType:,Median,,,,,,,,,,,, +Location,Sample,Etramp5_ag1,Etramp4_ag1,AMA1,,,,,,,,, +1(1,A1),BLANK,20,21,23,,,,,,,, +2(1,A2),test1,1292,1389,1414,,,,,,,, +3(1,A3),test2,1430,1435,867,,,,,,,, +4(1,A4),test3,657,2031,1300,,,,,,,, +5(1,B1),CP3 1/50,1486,1437,1893,,,,,,,, +6(1,B2),CP3 1/250,130,87,90,,,,,,,, +7(1,B3),CP3 1/1250,78,67,64,,,,,,,, +8(1,B4),CP3 1/6250,65,60,64,,,,,,,, +,,,,,,,,,,,,, +DataType:,Count,,,,,,,,,,,, +Location,Sample,Etramp5_ag1,Etramp4_ag1,AMA1,,,,,,,,, +1(1,A1),BLANK,100,121,143,,,,,,,, +2(1,A2),test1,102,104,105,,,,,,,, +3(1,A3),test2,130,135,67,,,,,,,, +4(1,A4),test3,57,231,100,,,,,,,, +5(1,B1),CP3 1/50,86,37,193,,,,,,,, +6(1,B2),CP3 1/250,130,87,90,,,,,,,, +7(1,B3),CP3 1/1250,53,47,65,,,,,,,, +8(1,B4),CP3 1/6250,85,50,54,,,,,,,, +,,,,,,,,,,,,, +DataType:,Per Bead Count,,,,,,,,,,,, +Alyte:,Etramp5_ag1,Etramp4_ag1,AMA1,,,,,,,,,, +BeadID:,73,75,34,,,,,,,,,, +Per Bead:,50,50,50,,,,,,,,,, +,,,,,,,,,,,,, +DataType:,Audit Logs,,,,,,,,,,,, +UserId,Date,Message,,,,,,,,,,, +,,,,,,,,,,,,, +DataType:,Warnings/Errors,,,,,,,,,,,, +Location,Status,Message,,,,,,,,,,, +,,,,,,,,,,,,, +-- CRC --,,,,,,,,,,,,, +CRC32: 85C2BFCD,,,,,,,,,,,,, diff --git a/inst/extdata/random2.csv b/inst/extdata/random2.csv new file mode 100644 index 00000000..b93b8160 --- /dev/null +++ b/inst/extdata/random2.csv @@ -0,0 +1,62 @@ +Program,xPONENT,,MAGPIX,,,,,,,,,, +Build,3.1.872.0,,,,,,,,,,,, +Date,02/07/2024,00:00,,,,,,,,,,, +,,,,,,,,,,,,, +SN,PvSTATEM1234,,,,,,,,,,,, +Batch,test_plate,,,,,,,,,,,, +Version,1,,,,,,,,,,,, +Operator,,,,,,,,,,,,, +ComputerName,test,,,,,,,,,,,, +Country Code,48,,,,,,,,,,,, +ProtocolName,test,,,,,,,,,,,, +ProtocolVersion,1,,,,,,,,,,,, +,,,,,,,,,,,,, +Most Recent Calibration and Verification Results:,,,,,,,,,,,,, +Last VER Calibration,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +Last CAL Calibration,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +Last Fluidics Test,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +,,,,,,,,,,,,, +CALInfo:,,,,,,,,,,,,, +Calibrator,,,,,,,,,,,,, +Lot,ExpirationDate,CalibrationTime,DDTemp,CL1Temp,CL2Temp,DDVolts,CL1Volts,CL2Volts,DDRVal,CL1RVal,CL2RVal,Result,MachineSerialNo +,,,,,,,,,,,,, +,,,,,,,,,,,,, +Samples,8,Min Events,50,Per Bead,,,,,,,,, +,,,,,,,,,,,,, +Results,,,,,,,,,,,,, +,,,,,,,,,,,,, +DataType:,Median,,,,,,,,,,,, +Location,Sample,Etramp5_ag1,Etramp4_ag1,AMA1,GEXP18,,,,,,,, +1(1,A1),BLANK,19,22,22,40,,,,,,, +2(1,B1),test1,1300,1389,1454,70,,,,,,, +3(1,C1),test2,1433,1435,880,1500,,,,,,, +4(1,D1),test3,660,2031,1200,415,,,,,,, +5(1,E1),CP3 1/250,1500,1437,1903,400,,,,,,, +6(1,F1),CP3 1/1250,135,87,93,50,,,,,,, +7(1,G1),CP3 1/6250,80,67,66,43,,,,,,, +8(1,H1),CP3 1/31250,66,60,66,42,,,,,,, +,,,,,,,,,,,,, +DataType:,Count,,,,,,,,,,,, +Location,Sample,Etramp5_ag1,Etramp4_ag1,AMA1,GEXP18,,,,,,,, +1(1,A1),BLANK,102,126,141,140,,,,,,, +2(1,B1),test1,101,100,100,80,,,,,,, +3(1,C1),test2,120,133,69,160,,,,,,, +4(1,D1),test3,70,233,103,100,,,,,,, +5(1,E1),CP3 1/250,90,38,190,170,,,,,,, +6(1,F1),CP3 1/1250,131,88,85,160,,,,,,, +7(1,G1),CP3 1/6250,57,46,70,180,,,,,,, +8(1,H1),CP3 1/31250,83,52,59,185,,,,,,, +,,,,,,,,,,,,, +DataType:,Per Bead Count,,,,,,,,,,,, +Alyte:,Etramp5_ag1,Etramp4_ag1,AMA1,GEXP18,,,,,,,,, +BeadID:,73,75,34,36,,,,,,,,, +Per Bead:,50,50,50,50,,,,,,,,, +,,,,,,,,,,,,, +DataType:,Audit Logs,,,,,,,,,,,, +UserId,Date,Message,,,,,,,,,,, +,,,,,,,,,,,,, +DataType:,Warnings/Errors,,,,,,,,,,,, +Location,Status,Message,,,,,,,,,,, +,,,,,,,,,,,,, +-- CRC --,,,,,,,,,,,,, +CRC32: 85C2BFCD,,,,,,,,,,,,, diff --git a/inst/extdata/random_broken_colB.csv b/inst/extdata/random_broken_colB.csv new file mode 100644 index 00000000..0a26af17 --- /dev/null +++ b/inst/extdata/random_broken_colB.csv @@ -0,0 +1,62 @@ +Program,,,MAGPIX,,,,,,,,,, +Build,xPONENT,,,,,,,,,,,, +Date,3.1.871.0,00:00,,,,,,,,,,, +,01/07/2024,,,,,,,,,,,, +SN,,,,,,,,,,,,, +Batch,PvSTATEM1234,,,,,,,,,,,, +Version,test_plate,,,,,,,,,,,, +Operator,1,,,,,,,,,,,, +ComputerName,,,,,,,,,,,,, +Country Code,test,,,,,,,,,,,, +ProtocolName,616,,,,,,,,,,,, +ProtocolVersion,test,,,,,,,,,,,, +,1,,,,,,,,,,,, +Most Recent Calibration and Verification Results:,,,,,,,,,,,,, +Last VER Calibration,,,,,,,,,,,,, +Last CAL Calibration,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +Last Fluidics Test,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +,Passed 01/07/2024 00:00:00,,,,,,,,,,,, +CALInfo:,,,,,,,,,,,,, +Calibrator,,,,,,,,,,,,, +Lot,,CalibrationTime,DDTemp,CL1Temp,CL2Temp,DDVolts,CL1Volts,CL2Volts,DDRVal,CL1RVal,CL2RVal,Result,MachineSerialNo +,ExpirationDate,,,,,,,,,,,, +,,,,,,,,,,,,, +Samples,,Min Events,50,Per Bead,,,,,,,,, +,8,,,,,,,,,,,, +Results,,,,,,,,,,,,, +,,,,,,,,,,,,, +DataType:,,,,,,,,,,,,, +Location,Median,Etramp5_ag1,Etramp4_ag1,AMA1,,,,,,,,, +1(1,Sample,BLANK,20,21,23,,,,,,,, +2(1,A1),test1,1292,1389,1414,,,,,,,, +3(1,A2),test2,1430,1435,867,,,,,,,, +4(1,A3),test3,657,2031,1300,,,,,,,, +5(1,A4),CP3 1/50,1486,1437,1893,,,,,,,, +6(1,B1),CP3 1/250,130,87,90,,,,,,,, +7(1,B2),CP3 1/1250,78,67,64,,,,,,,, +8(1,B3),CP3 1/6250,65,60,64,,,,,,,, +,B4),,,,,,,,,,,, +DataType:,,,,,,,,,,,,, +Location,Count,Etramp5_ag1,Etramp4_ag1,AMA1,,,,,,,,, +1(1,Sample,BLANK,100,121,143,,,,,,,, +2(1,A1),test1,102,104,105,,,,,,,, +3(1,A2),test2,130,135,67,,,,,,,, +4(1,A3),test3,57,231,100,,,,,,,, +5(1,A4),CP3 1/50,86,37,193,,,,,,,, +6(1,B1),CP3 1/250,130,87,90,,,,,,,, +7(1,B2),CP3 1/1250,53,47,65,,,,,,,, +8(1,B3),CP3 1/6250,85,50,54,,,,,,,, +,B4),,,,,,,,,,,, +DataType:,,,,,,,,,,,,, +Alyte:,Per Bead Count,Etramp4_ag1,AMA1,,,,,,,,,, +BeadID:,Etramp5_ag1,75,34,,,,,,,,,, +Per Bead:,73,50,50,,,,,,,,,, +,50,,,,,,,,,,,, +DataType:,,,,,,,,,,,,, +UserId,Audit Logs,Message,,,,,,,,,,, +,Date,,,,,,,,,,,, +DataType:,,,,,,,,,,,,, +Location,Warnings/Errors,Message,,,,,,,,,,, +,Status,,,,,,,,,,,, +-- CRC --,,,,,,,,,,,,, +CRC32: 85C2BFCD,,,,,,,,,,,,, diff --git a/inst/extdata/random_layout.xlsx b/inst/extdata/random_layout.xlsx new file mode 100644 index 00000000..95d09583 Binary files /dev/null and b/inst/extdata/random_layout.xlsx differ diff --git a/man/Analyte.Rd b/man/Analyte.Rd index b08d6d2d..dc7e4eec 100644 --- a/man/Analyte.Rd +++ b/man/Analyte.Rd @@ -7,7 +7,7 @@ Analyte class is an object that gathers information about one specific Analyte on the Luminex plate } \examples{ -etramp <- Analyte$new(id = 73, analyte_name = "Etramp5_ag1", bead_count=50) +etramp <- Analyte$new(id = 73, analyte_name = "Etramp5_ag1", bead_count = 50) print(etramp) } diff --git a/man/Plate.Rd b/man/Plate.Rd index 9ad4dda9..84f0c0c6 100644 --- a/man/Plate.Rd +++ b/man/Plate.Rd @@ -12,7 +12,7 @@ analyte id of given name A class to represent the luminex plate. It contains information about the samples and analytes that were examined on the plate as well as some additional metadata and batch info } \examples{ -#TODO +# TODO } \section{Public fields}{ @@ -27,6 +27,10 @@ A class to represent the luminex plate. It contains information about the sample \item{\code{calibration_info}}{a list containing calibration logs read from Luminex file} \item{\code{audit_logs}}{a list containing audit logs read from Luminex file} + +\item{\code{plate_name}}{\itemize{ +\item plate name obtained from filename +}} } \if{html}{\out{}} } @@ -84,7 +88,8 @@ creates a new \code{Plate} object samples = list(), batch_info = list(), calibration_info = list(), - audit_logs = list() + audit_logs = list(), + plate_name = "" )}\if{html}{\out{}} } diff --git a/man/Sample.Rd b/man/Sample.Rd index 45c3ffec..0d36063a 100644 --- a/man/Sample.Rd +++ b/man/Sample.Rd @@ -7,7 +7,8 @@ A class to represent the sample. It contains all the necessary information about the sample } \examples{ -#TODO +# TODO + } \section{Public fields}{ \if{html}{\out{