-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
74 changed files
with
1,352 additions
and
367 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,3 +13,4 @@ llfs_rnaseq_data/* | |
plots/ | ||
inst/CITATION.bib | ||
CITATION.cff | ||
tmp/ |
Empty file.
Empty file.
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,3 +9,5 @@ llfs_rnaseq_data/* | |
llfs_rnaseq_data_*/ | ||
*.tar.gz | ||
*.csv | ||
tmp/* | ||
data/ |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
# Use an official R base image | ||
FROM rocker/r-ver:4.3.1 | ||
|
||
# Set maintainer label | ||
LABEL maintainer="[email protected]" | ||
|
||
# Install required system libraries | ||
RUN apt-get update && apt-get install -y \ | ||
libcurl4-openssl-dev \ | ||
libssl-dev \ | ||
libxml2-dev \ | ||
libblas-dev \ | ||
liblapack-dev \ | ||
libgd-dev \ | ||
gfortran \ | ||
gzip \ | ||
bzip2 \ | ||
xz-utils \ | ||
p7zip-full \ | ||
libsqlite3-dev \ | ||
libhdf5-dev \ | ||
libbz2-dev \ | ||
zlib1g-dev \ | ||
libcairo2-dev \ | ||
libxt-dev | ||
|
||
# Install renv & dependencies | ||
RUN R -e "install.packages('renv')" | ||
|
||
# Create a directory for your project | ||
WORKDIR /usr/local/src/my_project | ||
|
||
# Copy your project files to the container (assuming they are in your current directory) | ||
# This would typically include an renv.lock and renv/ directory if you have already snapshot your environment | ||
COPY . . | ||
|
||
# Restore the renv environment based on your lockfile | ||
# RUN R -e 'renv::restore()' | ||
|
||
# Expose port for RStudio or Shiny apps, if needed | ||
# EXPOSE 8787 | ||
|
||
# Run a command to keep the container running, or specify a default action | ||
CMD ["R"] |
Empty file.
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
library(tximport) | ||
library(vroom) | ||
|
||
process_tximport <- function(quant_df_path, TX_QUANTS = TRUE, tx2gene = NULL) { | ||
|
||
if(!TX_QUANTS){ | ||
if(is.null(tx2gene)){ | ||
stop('tx2gene cannot be null if TX_QUANTS is TRUE') | ||
} else if(!file.exists(tx2gene)){ | ||
stop('tx2gene file does not exist') | ||
} | ||
} | ||
|
||
if(!file.exists(quant_df_path)){ | ||
stop('quant_df_path does not exist') | ||
} | ||
|
||
quant_df <- vroom::vroom(quant_df_path) | ||
|
||
missing_cols <- setdiff(c('isoform_file', 'library_id'), | ||
colnames(quant_df)) | ||
if(length(missing_cols) > 0){ | ||
stop(paste("`", paste(missing_cols, collapse="`, `"), | ||
"` must be columns in `quant_df`")) | ||
} | ||
|
||
isoform_list <- setNames(quant_df$isoform_file, | ||
paste0('library_', quant_df$library_id)) | ||
|
||
if(TX_QUANTS){ | ||
txi_isoform <- tximport(files = isoform_list, | ||
type = 'rsem', | ||
txIn = TRUE, | ||
txOut = TRUE, | ||
importer = vroom) | ||
# Save the result | ||
saveRDS(txi_isoform, file.path("txi_isoform.rds")) | ||
} else { | ||
tx2gene <- vroom::vroom(tx2gene) | ||
txi_gene <- tximport(files = isoform_list, | ||
type = 'rsem', | ||
txIn = TRUE, | ||
txOut = FALSE, | ||
tx2gene = tx2gene, | ||
importer = vroom) | ||
# Save the result | ||
saveRDS(txi_gene, file.path("txi_gene.rds")) | ||
} | ||
|
||
return(invisible(NULL)) # Return nothing, to avoid cluttering output | ||
} | ||
|
||
# # Define your parameters | ||
# params_list <- list(quant_df_path = "isoforms_df_20231009.csv", | ||
# TX_QUANTS = FALSE, | ||
# tx2gene="gencode38_tx2gene_20210919.csv") | ||
# | ||
# # Specify required packages | ||
# required_pkgs <- c("tximport", "vroom") | ||
# | ||
# # Set your SLURM options | ||
# # --mem-per-cpu=10G -N 1 -n 5 | ||
# slurm_opts <- list(time = '00:60:00', | ||
# nodes = 1, | ||
# mem='80G') | ||
# | ||
# | ||
# rslurm::slurm_call( | ||
# process_tximport, | ||
# params = params_list, | ||
# jobname='tximport', | ||
# pkgs=required_pkgs, | ||
# slurm_options = slurm_opts, | ||
# submit = FALSE | ||
# ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
#' | ||
#' summarize an all_by_all_compiled_metrics dataframe for upload to the | ||
#' database | ||
#' | ||
#' @param all_by_all_df a wgs all by all compiled metrics dataframe with the | ||
#' columns 'total_variants', 'library_id', 'rna_subject', 'dna_subject', | ||
#' 'total_variants', 'matching_variants', 'match_ratio' | ||
#' | ||
#' @import dplyr | ||
#' | ||
#' @return a dataframe suitable for upload to the full_wgs_compare table | ||
#' | ||
#' @export | ||
parse_all_by_all_compiled_results = function(all_by_all_df){ | ||
|
||
stopifnot(all(c('total_variants', 'library_id', | ||
'rna_subject', 'dna_subject', 'total_variants', | ||
'matching_variants', 'match_ratio') %in% | ||
colnames(all_by_all_df))) | ||
|
||
all_by_all_df %>% | ||
filter(total_variants>=100) %>% | ||
group_by(library_id) %>% | ||
arrange(desc(match_ratio)) %>% | ||
summarize(labelled_dna_subject=first(rna_subject), | ||
labelled_total_variants= | ||
total_variants[rna_subject==dna_subject][1], | ||
labelled_match_variants= | ||
matching_variants[rna_subject==dna_subject][1], | ||
labelled_match_ratio= | ||
match_ratio[rna_subject == dna_subject][1], | ||
emperical_best_subject=first(dna_subject), | ||
emperical_best_total_variants=first(total_variants), | ||
emperical_best_match_variants=first(matching_variants), | ||
emperical_best_match_ratio=first(match_ratio), | ||
emperical_next_subject=nth(dna_subject,2), | ||
emperical_next_total_variants=nth(total_variants,2), | ||
emperical_next_match_variants=nth(matching_variants,2), | ||
emperical_next_match_ratio=nth(match_ratio,2), | ||
chisq_labelled=chisq.test(matrix( | ||
c(total_variants[rna_subject==dna_subject]- | ||
matching_variants[rna_subject==dna_subject], | ||
first(total_variants)-first(matching_variants), | ||
matching_variants[rna_subject==dna_subject], | ||
first(matching_variants)), ncol = 2))$p.value, | ||
chisq_emperical=chisq.test(matrix( | ||
c(first(total_variants)-first(matching_variants), | ||
nth(total_variants,2)-nth(matching_variants,2), | ||
first(matching_variants), | ||
nth(matching_variants,2)), | ||
ncol = 2))$p.value) | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
library(tidyverse) | ||
library(here) | ||
|
||
con = RSQLite::dbConnect(RSQLite::SQLite(), | ||
here('llfs_rnaseq_data/llfs_rnaseq_database.sqlite')) | ||
|
||
sample_df = tbl(con,'sample') %>% | ||
collect() %>% | ||
dplyr::rename(library_id = pk) %>% | ||
select(library_id, fastq_id, visit, batch_id) | ||
|
||
batch_df = tbl(con,'batch') %>% | ||
collect() %>% | ||
dplyr::rename(batch_id = pk) | ||
|
||
sample_info_df = sample_df %>% | ||
left_join(batch_df) %>% | ||
select(fastq_id, visit, data_dir, library_id, batch_id) %>% | ||
mutate(fastq_id = as.character(fastq_id)) | ||
|
||
compile_files = list.files("/mnt/scratch/llfs_rna_dna_compare_test", | ||
"compiled_metrics.csv", | ||
recursive = TRUE, | ||
full.names = TRUE) | ||
|
||
compile_files_new = compile_files[7] | ||
|
||
names(compile_files_new) = basename(dirname(compile_files_new)) | ||
|
||
compiled_wgs_compare = map( | ||
compile_files_new, | ||
read_csv | ||
) %>% | ||
bind_rows(.id='data_dir') %>% | ||
dplyr::rename(fastq_id = rna_sample, | ||
visit = rna_visit) | ||
|
||
compiled_wgs_compare_upload = | ||
compiled_wgs_compare %>% | ||
mutate(fastq_id = as.character(fastq_id), | ||
visit = as.character(visit)) %>% | ||
dplyr::rename(dna_subject = dna_sample, | ||
homo_expr_cand = homo_expr_cand_fltr, | ||
total_variants = overlap_fltr, | ||
matching_variants = n_match_fltr) %>% | ||
mutate(match_ratio = matching_variants / total_variants) %>% | ||
left_join(sample_info_df) %>% | ||
select(library_id, | ||
dna_subject, | ||
chr, | ||
total_variants, | ||
matching_variants, | ||
homo_expr_cand, | ||
match_ratio) | ||
|
||
# dbAppendTable(con, 'wgs_compare', compiled_wgs_compare_upload) | ||
|
||
|
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Oops, something went wrong.