diff --git a/R/Gene_Centric_Coding_Info.R b/R/Gene_Centric_Coding_Info.R index 71c265b..febdb32 100644 --- a/R/Gene_Centric_Coding_Info.R +++ b/R/Gene_Centric_Coding_Info.R @@ -87,14 +87,14 @@ Gene_Centric_Coding_Info <- function(category=c("plof","plof_ds","missense","dis if(category=="ptv") { - results <- info_synonymous(chr=chr,genofile=genofile,obj_nullmodel=obj_nullmodel,gene_name=gene_name,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff, + results <- info_ptv(chr=chr,genofile=genofile,obj_nullmodel=obj_nullmodel,gene_name=gene_name,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff, method_cond=method_cond,QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,Annotation_name=Annotation_name) } if(category=="ptv_ds") { - results <- info_synonymous(chr=chr,genofile=genofile,obj_nullmodel=obj_nullmodel,gene_name=gene_name,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff, + results <- info_ptv_ds(chr=chr,genofile=genofile,obj_nullmodel=obj_nullmodel,gene_name=gene_name,known_loci=known_loci,rare_maf_cutoff=rare_maf_cutoff, method_cond=method_cond,QC_label=QC_label,variant_type=variant_type,geno_missing_imputation=geno_missing_imputation, Annotation_dir=Annotation_dir,Annotation_name_catalog=Annotation_name_catalog,Annotation_name=Annotation_name) } diff --git a/R/Gene_Centric_Coding_Results_Summary.R b/R/Gene_Centric_Coding_Results_Summary.R index c640cde..d382955 100644 --- a/R/Gene_Centric_Coding_Results_Summary.R +++ b/R/Gene_Centric_Coding_Results_Summary.R @@ -166,6 +166,158 @@ Gene_Centric_Coding_Results_Summary <- function(agds_dir,gene_centric_coding_job # synonymous results_synonymous_genome <- results_synonymous_genome[results_synonymous_genome[,"cMAC"]>cMAC_cutoff,] + ### recalculate missense pvalue + if(cMAC_cutoff > 0) + { + genes_name_disruptive_missense <- as.vector(unlist(results_disruptive_missense_genome[,1])) + genes_name_missense <- as.vector(unlist(results_missense_genome[,1])) + + recal_id <- (1:dim(results_missense_genome)[1])[(!genes_name_missense%in%genes_name_disruptive_missense)] + + for(kk in recal_id) + { + print(kk) + results_m <- results_missense_genome[kk,] + + if(use_SPA) + { + ## disruptive missense cMAC < cut_off, set p-value to 1 + results_missense_genome[kk,"Burden(1,25)-Disruptive"] <- 1 + results_missense_genome[kk,"Burden(1,1)-Disruptive"] <- 1 + + apc_num <- (length(results_m)-10)/2 + p_seq <- c(1:apc_num,1:apc_num+(apc_num+1)) + + ## calculate STAAR-B + pvalues_sub <- as.numeric(results_m[6:length(results_m)][p_seq]) + + if(sum(is.na(pvalues_sub))>0) + { + if(sum(is.na(pvalues_sub))==length(pvalues_sub)) + { + results_m["STAAR-B"] <- 1 + }else + { + ## not all NAs + pvalues_sub <- pvalues_sub[!is.na(pvalues_sub)] + if(sum(pvalues_sub[pvalues_sub<1])>0) + { + ## not all ones + results_m["STAAR-B"] <- CCT(pvalues_sub[pvalues_sub<1]) + + }else + { + results_m["STAAR-B"] <- 1 + + } + } + }else + { + if(sum(pvalues_sub[pvalues_sub<1])>0) + { + results_m["STAAR-B"] <- CCT(pvalues_sub[pvalues_sub<1]) + }else + { + results_m["STAAR-B"] <- 1 + } + } + + results_missense_genome[kk,"STAAR-B"] <- results_m["STAAR-B"] + + ## calculate STAAR-B(1,25) + pvalues_sub <- as.numeric(results_m[6:length(results_m)][c(1:apc_num)]) + if(sum(is.na(pvalues_sub))>0) + { + if(sum(is.na(pvalues_sub))==length(pvalues_sub)) + { + results_m["STAAR-B(1,25)"] <- 1 + }else + { + ## not all NAs + pvalues_sub <- pvalues_sub[!is.na(pvalues_sub)] + if(sum(pvalues_sub[pvalues_sub<1])>0) + { + ## not all ones + results_m["STAAR-B(1,25)"] <- CCT(pvalues_sub[pvalues_sub<1]) + + }else + { + results_m["STAAR-B(1,25)"] <- 1 + + } + } + }else + { + if(sum(pvalues_sub[pvalues_sub<1])>0) + { + results_m["STAAR-B(1,25)"] <- CCT(pvalues_sub[pvalues_sub<1]) + }else + { + results_m["STAAR-B(1,25)"] <- 1 + } + } + + results_missense_genome[kk,"STAAR-B(1,25)"] <- results_m["STAAR-B(1,25)"] + + ## calculate STAAR-B(1,1) + pvalues_sub <- as.numeric(results_m[6:length(results_m)][c(1:apc_num+(apc_num+1))]) + if(sum(is.na(pvalues_sub))>0) + { + if(sum(is.na(pvalues_sub))==length(pvalues_sub)) + { + results_m["STAAR-B(1,1)"] <- 1 + }else + { + ## not all NAs + pvalues_sub <- pvalues_sub[!is.na(pvalues_sub)] + if(sum(pvalues_sub[pvalues_sub<1])>0) + { + ## not all ones + results_m["STAAR-B(1,1)"] <- CCT(pvalues_sub[pvalues_sub<1]) + + }else + { + results_m["STAAR-B(1,1)"] <- 1 + + } + } + }else + { + if(sum(pvalues_sub[pvalues_sub<1])>0) + { + results_m["STAAR-B(1,1)"] <- CCT(pvalues_sub[pvalues_sub<1]) + }else + { + results_m["STAAR-B(1,1)"] <- 1 + } + } + + results_missense_genome[kk,"STAAR-B(1,1)"] <- results_m["STAAR-B(1,1)"] + }else + { + ## disruptive missense cMAC < cut_off, set p-value to 1 + results_missense_genome[kk,"Burden(1,25)-Disruptive"] <- 1 + results_missense_genome[kk,"Burden(1,1)-Disruptive"] <- 1 + results_missense_genome[kk,"SKAT(1,25)-Disruptive"] <- 1 + results_missense_genome[kk,"SKAT(1,1)-Disruptive"] <- 1 + results_missense_genome[kk,"ACAT-V(1,25)-Disruptive"] <- 1 + results_missense_genome[kk,"ACAT-V(1,1)-Disruptive"] <- 1 + + apc_num <- (length(results_m)-19)/6 + + p_seq <- c(1:apc_num,1:apc_num+(apc_num+1),1:apc_num+2*(apc_num+1),1:apc_num+3*(apc_num+1),1:apc_num+4*(apc_num+1),1:apc_num+5*(apc_num+1)) + + results_m["STAAR-O"] <- CCT(as.numeric(results_m[6:length(results_m)][p_seq])) + results_m["STAAR-S(1,25)"] <- CCT(as.numeric(results_m[6:length(results_m)][c(1:apc_num)])) + results_m["STAAR-S(1,1)"] <- CCT(as.numeric(results_m[6:length(results_m)][c(1:apc_num+(apc_num+1))])) + results_m["STAAR-B(1,25)"] <- CCT(as.numeric(results_m[6:length(results_m)][c(1:apc_num+2*(apc_num+1))])) + results_m["STAAR-B(1,1)"] <- CCT(as.numeric(results_m[6:length(results_m)][c(1:apc_num+3*(apc_num+1))])) + results_m["STAAR-A(1,25)"] <- CCT(as.numeric(results_m[6:length(results_m)][c(1:apc_num+4*(apc_num+1))])) + results_m["STAAR-A(1,1)"] <- CCT(as.numeric(results_m[6:length(results_m)][c(1:apc_num+5*(apc_num+1))])) + } + } + } + ###### whole-genome results # plof save(results_plof_genome,file=paste0(output_path,"plof.Rdata")) diff --git a/R/Gene_Centric_Coding_Results_Summary_incl_ptv.R b/R/Gene_Centric_Coding_Results_Summary_incl_ptv.R index e7095a0..32c399c 100644 --- a/R/Gene_Centric_Coding_Results_Summary_incl_ptv.R +++ b/R/Gene_Centric_Coding_Results_Summary_incl_ptv.R @@ -189,6 +189,160 @@ Gene_Centric_Coding_Results_Summary_incl_ptv <- function(agds_dir,gene_centric_c # ptv + disruptive missense results_ptv_ds_genome <- results_ptv_ds_genome[results_ptv_ds_genome[,"cMAC"]>cMAC_cutoff,] + + ### recalculate missense pvalue + if(cMAC_cutoff > 0) + { + genes_name_disruptive_missense <- as.vector(unlist(results_disruptive_missense_genome[,1])) + genes_name_missense <- as.vector(unlist(results_missense_genome[,1])) + + recal_id <- (1:dim(results_missense_genome)[1])[(!genes_name_missense%in%genes_name_disruptive_missense)] + + for(kk in recal_id) + { + print(kk) + results_m <- results_missense_genome[kk,] + + if(use_SPA) + { + ## disruptive missense cMAC < cut_off, set p-value to 1 + results_missense_genome[kk,"Burden(1,25)-Disruptive"] <- 1 + results_missense_genome[kk,"Burden(1,1)-Disruptive"] <- 1 + + apc_num <- (length(results_m)-10)/2 + p_seq <- c(1:apc_num,1:apc_num+(apc_num+1)) + + ## calculate STAAR-B + pvalues_sub <- as.numeric(results_m[6:length(results_m)][p_seq]) + + if(sum(is.na(pvalues_sub))>0) + { + if(sum(is.na(pvalues_sub))==length(pvalues_sub)) + { + results_m["STAAR-B"] <- 1 + }else + { + ## not all NAs + pvalues_sub <- pvalues_sub[!is.na(pvalues_sub)] + if(sum(pvalues_sub[pvalues_sub<1])>0) + { + ## not all ones + results_m["STAAR-B"] <- CCT(pvalues_sub[pvalues_sub<1]) + + }else + { + results_m["STAAR-B"] <- 1 + + } + } + }else + { + if(sum(pvalues_sub[pvalues_sub<1])>0) + { + results_m["STAAR-B"] <- CCT(pvalues_sub[pvalues_sub<1]) + }else + { + results_m["STAAR-B"] <- 1 + } + } + + results_missense_genome[kk,"STAAR-B"] <- results_m["STAAR-B"] + + ## calculate STAAR-B(1,25) + pvalues_sub <- as.numeric(results_m[6:length(results_m)][c(1:apc_num)]) + if(sum(is.na(pvalues_sub))>0) + { + if(sum(is.na(pvalues_sub))==length(pvalues_sub)) + { + results_m["STAAR-B(1,25)"] <- 1 + }else + { + ## not all NAs + pvalues_sub <- pvalues_sub[!is.na(pvalues_sub)] + if(sum(pvalues_sub[pvalues_sub<1])>0) + { + ## not all ones + results_m["STAAR-B(1,25)"] <- CCT(pvalues_sub[pvalues_sub<1]) + + }else + { + results_m["STAAR-B(1,25)"] <- 1 + + } + } + }else + { + if(sum(pvalues_sub[pvalues_sub<1])>0) + { + results_m["STAAR-B(1,25)"] <- CCT(pvalues_sub[pvalues_sub<1]) + }else + { + results_m["STAAR-B(1,25)"] <- 1 + } + } + + results_missense_genome[kk,"STAAR-B(1,25)"] <- results_m["STAAR-B(1,25)"] + + ## calculate STAAR-B(1,1) + pvalues_sub <- as.numeric(results_m[6:length(results_m)][c(1:apc_num+(apc_num+1))]) + if(sum(is.na(pvalues_sub))>0) + { + if(sum(is.na(pvalues_sub))==length(pvalues_sub)) + { + results_m["STAAR-B(1,1)"] <- 1 + }else + { + ## not all NAs + pvalues_sub <- pvalues_sub[!is.na(pvalues_sub)] + if(sum(pvalues_sub[pvalues_sub<1])>0) + { + ## not all ones + results_m["STAAR-B(1,1)"] <- CCT(pvalues_sub[pvalues_sub<1]) + + }else + { + results_m["STAAR-B(1,1)"] <- 1 + + } + } + }else + { + if(sum(pvalues_sub[pvalues_sub<1])>0) + { + results_m["STAAR-B(1,1)"] <- CCT(pvalues_sub[pvalues_sub<1]) + }else + { + results_m["STAAR-B(1,1)"] <- 1 + } + } + + results_missense_genome[kk,"STAAR-B(1,1)"] <- results_m["STAAR-B(1,1)"] + }else + { + ## disruptive missense cMAC < cut_off, set p-value to 1 + results_missense_genome[kk,"Burden(1,25)-Disruptive"] <- 1 + results_missense_genome[kk,"Burden(1,1)-Disruptive"] <- 1 + results_missense_genome[kk,"SKAT(1,25)-Disruptive"] <- 1 + results_missense_genome[kk,"SKAT(1,1)-Disruptive"] <- 1 + results_missense_genome[kk,"ACAT-V(1,25)-Disruptive"] <- 1 + results_missense_genome[kk,"ACAT-V(1,1)-Disruptive"] <- 1 + + apc_num <- (length(results_m)-19)/6 + + p_seq <- c(1:apc_num,1:apc_num+(apc_num+1),1:apc_num+2*(apc_num+1),1:apc_num+3*(apc_num+1),1:apc_num+4*(apc_num+1),1:apc_num+5*(apc_num+1)) + + results_m["STAAR-O"] <- CCT(as.numeric(results_m[6:length(results_m)][p_seq])) + results_m["STAAR-S(1,25)"] <- CCT(as.numeric(results_m[6:length(results_m)][c(1:apc_num)])) + results_m["STAAR-S(1,1)"] <- CCT(as.numeric(results_m[6:length(results_m)][c(1:apc_num+(apc_num+1))])) + results_m["STAAR-B(1,25)"] <- CCT(as.numeric(results_m[6:length(results_m)][c(1:apc_num+2*(apc_num+1))])) + results_m["STAAR-B(1,1)"] <- CCT(as.numeric(results_m[6:length(results_m)][c(1:apc_num+3*(apc_num+1))])) + results_m["STAAR-A(1,25)"] <- CCT(as.numeric(results_m[6:length(results_m)][c(1:apc_num+4*(apc_num+1))])) + results_m["STAAR-A(1,1)"] <- CCT(as.numeric(results_m[6:length(results_m)][c(1:apc_num+5*(apc_num+1))])) + } + } + } + + ###### whole-genome results # plof save(results_plof_genome,file=paste0(output_path,"plof.Rdata")) diff --git a/README.md b/README.md index 2829c6c..699363d 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,4 @@ [![R build status](https://github.com/xihaoli/STAARpipelineSummary/workflows/R-CMD-check/badge.svg)](https://github.com/xihaoli/STAARpipelineSummary/actions) -[![Build status](https://ci.appveyor.com/api/projects/status/glc569adbr7ar11j/branch/main?svg=true)](https://ci.appveyor.com/project/xihaoli/staarpipelinesummary/branch/main) [![License: GPL v3](https://img.shields.io/badge/License-GPLv3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) # STAARpipelineSummary @@ -30,7 +29,7 @@ Please see the **STAARpipelineSum ## Data Availability The whole-genome functional annotation data assembled from a variety of sources and the precomputed annotation principal components are available at the [Functional Annotation of Variant - Online Resource (FAVOR)](https://favor.genohub.org) site and [FAVOR Essential Database](https://doi.org/10.7910/DVN/1VGTJI). ## Version -The current version is 0.9.7 (March 23, 2024). +The current version is 0.9.7 (July 25, 2024). ## Citation If you use **STAARpipeline** and **STAARpipelineSummary** for your work, please cite: