Skip to content

Commit

Permalink
Fixed non-C.e. plot_gene error
Browse files Browse the repository at this point in the history
  • Loading branch information
msauria committed Jul 12, 2024
1 parent a93c0a1 commit 3d8bd5d
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 11 deletions.
11 changes: 7 additions & 4 deletions bin/Fix_Isotype_names_bulk.R
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,9 @@ process_phenotypes <- function(df,
# get strain isotypes
strain_isotypes_db <- data.table::fread(args[3])
# identify strains that were phenotyped, but are not part of an isotype
strains_in <- df$strain %in% strain_isotypes_db$strain
message(glue::glue("{df$strain} "))
message(glue::glue("{strains_in} "))
non_isotype_strains <- dplyr::filter(df,
!(strain %in% c(strain_isotypes_db$strain, "PD1074")),
!(strain %in% strain_isotypes_db$isotype))
Expand All @@ -233,7 +236,7 @@ process_phenotypes <- function(df,

strains_to_remove <- unique(non_isotype_strains$strain)

message(glue::glue("WARNING: Removing strain(s) {strains_to_remove} because they do not fall into a defined isotype."))
message(glue::glue("WARNING: Removing strain(s) {strains_to_remove} because they do not fall into a defined isotype.\n"))

df_non_isotypes_removed <- dplyr::filter( df, !( strain %in% strains_to_remove) )
} else {
Expand Down Expand Up @@ -271,7 +274,7 @@ process_phenotypes <- function(df,
fix <- df %>%
dplyr::mutate(strain = isotype) %>%
dplyr::select(-ref_strain, -num)
message(glue::glue("WARNING: Non-isotype reference strain {df$strain[1]} renamed to isotype {i}."))
message(glue::glue("WARNING: Non-isotype reference strain {df$strain[1]} renamed to isotype {i}.\n"))
} else {
# remove non-isotype strains
fix <- df %>%
Expand All @@ -280,10 +283,10 @@ process_phenotypes <- function(df,

# warn the user
if(sum(df$ref_strain) > 0) {
message(glue::glue("WARNING: Non-isotype reference strain(s) {paste(df %>% dplyr::filter(!ref_strain) %>% dplyr::pull(strain) %>% unique(), collapse = ', ')} from isotype group {i} removed."))
message(glue::glue("WARNING: Non-isotype reference strain(s) {paste(df %>% dplyr::filter(!ref_strain) %>% dplyr::pull(strain) %>% unique(), collapse = ', ')} from isotype group {i} removed.\n"))
}
else {
message(glue::glue("WARNING: Non-isotype reference strain(s) {paste(df %>% dplyr::filter(!ref_strain) %>% dplyr::pull(strain) %>% unique(), collapse = ', ')} from isotype group {i} removed. To include this isotype in the analysis, you can (1) phenotype {i} or (2) evaluate the similarity of these strains and choose one representative for the group."))
message(glue::glue("WARNING: Non-isotype reference strain(s) {paste(df %>% dplyr::filter(!ref_strain) %>% dplyr::pull(strain) %>% unique(), collapse = ', ')} from isotype group {i} removed. To include this isotype in the analysis, you can (1) phenotype {i} or (2) evaluate the similarity of these strains and choose one representative for the group.\n"))
}
}
# add to data
Expand Down
21 changes: 18 additions & 3 deletions bin/plot_genes.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#!/usr/bin/env Rscript
library(dplyr)
library(tibble)
library(tidyr)
library(ggplot2)
library(stringr)
Expand All @@ -12,6 +13,8 @@ library(purrr)
# 2 = phenotype file
# 3 = gene file
# 4 = annotation file
# 5 = algorithm
# 6 = species

args <- commandArgs(trailingOnly = TRUE)

Expand Down Expand Up @@ -52,6 +55,8 @@ query_regions <- pr_trait_ld %>%

query_regions

# query_regions == CHROM, start_pos, end_pos

# update 20210330 KSE: use impute vcf for genotypes and hard vcf annotation file for annotations
# this could be bcsq or snpeff
annotations <- data.table::fread(args[4])
Expand All @@ -63,6 +68,8 @@ if("CONSEQUENCE" %in% names(annotations)) {
ann_type <- "snpeff"
}

species = args[6]

# do this for each QTL separately, then combine
annotation_out <- list()
for(r in 1:nrow(query_regions)){
Expand All @@ -71,6 +78,8 @@ for(r in 1:nrow(query_regions)){
eq <- query_regions$end_pos[r]




# a function to check how many strains in the mapping set are in the column "Strains" of the annotation file

alt_strain_check=function(df){
Expand All @@ -93,6 +102,8 @@ for(r in 1:nrow(query_regions)){
dplyr::select(CHROM=CHR,POS,strains_used=strains) %>%
dplyr::distinct()

# strain_list == CHROM, POS, strains_used

# count and filter
annotations_region <- annotations %>%
dplyr::filter(CHROM == cq,
Expand All @@ -115,7 +126,6 @@ for(r in 1:nrow(query_regions)){
}%>%
tidyr::unite(marker, CHROM, POS, sep = "_")


# pull variants from finemap impute -- don't need this?
# impute_vcf <- data.table::fread(args[4]) %>%
# dplyr::select(marker:POS, REF:strains)
Expand All @@ -142,7 +152,12 @@ for(r in 1:nrow(query_regions)){
}

# combine annotations for regions
annotation_df <- dplyr::bind_rows(annotation_out) %>%
annotation_out = dplyr::bind_rows(annotation_out)
if(ann_type == "bcsq" && species != "c_elegans") {
annotation_out = tibble::add_column(annotation_out, dplyr::select(annotation_out, GENE) %>% dplyr::rename(WORMBASE_ID = GENE))
}

annotation_df <- annotation_out %>%
dplyr::left_join(pr_trait_ld, ., by = c("marker", "REF", "ALT")) %>% {
if(ann_type == "bcsq") dplyr::rename(., gene_id = WORMBASE_ID) else dplyr::select(., -gene_name)
}
Expand Down Expand Up @@ -186,7 +201,7 @@ tidy_genes_in_region <- if(ann_type == "bcsq") {
# path = glue::glue("{analysis_trait}_{cq}_{sq}-{eq}_{ann_type}_genes_{args[5]}.tsv"))

write_tsv(tidy_genes_in_region,
path = glue::glue("{analysis_trait}_{cq}_{output_sq}-{output_eq}_{ann_type}_genes_{args[5]}.tsv"))
file = glue::glue("{analysis_trait}_{cq}_{output_sq}-{output_eq}_{ann_type}_genes_{args[5]}.tsv"))


for(r in 1:length(unique(ugly_genes_in_region$start_pos))){
Expand Down
5 changes: 3 additions & 2 deletions main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ if(params.debug) {
println "WARNING: Using snpeff annotation. To use BCSQ annotation, please use a newer vcf (2021 or later)"
ann_file = Channel.fromPath("${params.dataDir}/${params.species}/WI/variation/${params.vcf}/vcf/WI.${params.vcf}.strain-annotation.snpeff.tsv")
} else {
ann_file = Channel.fromPath("${parms.dataDir}/${params.species}/WI/variation/${params.vcf}/vcf/WI.${params.vcf}.strain-annotation.tsv")
ann_file = Channel.fromPath("${params.dataDir}/${params.species}/WI/variation/${params.vcf}/vcf/WI.${params.vcf}.strain-annotation.tsv")
}
} else {
// check that vcf file exists, if it does, use it. If not, throw error
Expand Down Expand Up @@ -433,7 +433,8 @@ workflow {
.combine(ann_file)
.combine(Channel.fromPath("${params.genes}"))
.combine(Channel.fromPath("${params.bin_dir}/Finemap_QTL_Intervals.R"))
.combine(Channel.fromPath("${params.bin_dir}/plot_genes.R")) | gcta_fine_maps
.combine(Channel.fromPath("${params.bin_dir}/plot_genes.R"))
.combine(Channel.of("${params.species}")) | gcta_fine_maps


// divergent regions and haplotypes
Expand Down
4 changes: 2 additions & 2 deletions modules/post-mapping.nf
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ process gcta_fine_maps {
file(formatted_traits), file(trait_bed), file(trait_bim), file(trait_fam), \
file(trait_map), file(trait_nosex), file(trait_ped), file(grm_bin), file(grm_id), \
file(grm_nbin), file(grm_bin_inbred), file(grm_id_inbred), \
file(annotation), file(genefile), file(finemap_qtl_intervals), file(plot_genes)
file(annotation), file(genefile), file(finemap_qtl_intervals), file(plot_genes), val(species)

output:
tuple file("*.fastGWA"), val(TRAIT), file("*.prLD_df*.tsv"), file("*.pdf"), file("*_genes*.tsv"), val(algorithm)
Expand Down Expand Up @@ -248,7 +248,7 @@ process gcta_fine_maps {
Rscript --vanilla ${finemap_qtl_intervals} ${TRAIT}.\$chr.\$start.\$stop.finemap_inbred.${algorithm}.fastGWA \$i ${TRAIT}.\$chr.\$start.\$stop.LD_${algorithm}.tsv ${algorithm}
Rscript --vanilla ${plot_genes} ${TRAIT}.\$chr.\$start.\$stop.prLD_df_${algorithm}.tsv ${pheno} ${genefile} ${annotation} ${algorithm}
Rscript --vanilla ${plot_genes} ${TRAIT}.\$chr.\$start.\$stop.prLD_df_${algorithm}.tsv ${pheno} ${genefile} ${annotation} ${algorithm} ${species}
done
"""
}
Expand Down

0 comments on commit 3d8bd5d

Please sign in to comment.