added ont plotting

mulqueenr · Sep 25, 2023 · 64a7818 · 64a7818
1 parent 43300d4
commit 64a7818
Show file tree

Hide file tree

Showing 2 changed files with 69 additions and 7 deletions.
diff --git a/docs/MD anderson analysis/gccACT_init.md b/docs/MD anderson analysis/gccACT_init.md
@@ -168,21 +168,24 @@ multiqc .
 Analysis from 
 https://navinlabcode.github.io/CopyKit-UserGuide/quick-start.html
 
+```bash
+conda activate r4.2
+```
 ```R
 library(copykit)
 library(BiocParallel)
 library(EnsDb.Hsapiens.v86)
 register(MulticoreParam(progressbar = T, workers = 50), default = T)
 BiocParallel::bpparam()
-setwd("/volumes/seq/projects/gccACT/230306_mdamb231_test/cells")
+setwd("/volumes/seq/projects/gccACT/230306_mdamb231_test")
 
-tumor2 <- runVarbin("/volumes/seq/projects/gccACT/230306_mdamb231_test/cells",
+tumor <- runVarbin("/volumes/seq/projects/gccACT/230306_mdamb231_test/cells",
                  remove_Y = TRUE,
                  genome="hg38",
                  is_paired_end=TRUE)
 
 # Mark euploid cells if they exist
-tumor2 <- findAneuploidCells(tumor2)
+tumor <- findAneuploidCells(tumor)
 
 # Mark low-quality cells for filtering
 tumor <- findOutliers(tumor)
@@ -200,10 +203,9 @@ tumor <- tumor[,SummarizedExperiment::colData(tumor)$is_aneuploid == TRUE]
 # kNN smooth profiles
 tumor <- knnSmooth(tumor)
 
-
+tumor <- runUmap(tumor)
 k_clones<-findSuggestedK(tumor)
 # Create a umap embedding 
-tumor <- runUmap(tumor)
 
 # Find clusters of similar copy number profiles and plot the results
 # If no k_subclones value is provided, automatically detect it from findSuggestedK()
@@ -232,6 +234,61 @@ for (i in unique(clone_out$clone)){
 }
 ```
 
+
+## Correlating our HiC Data to ONT Data
+See /gccACT_ONT/ for more details on ONT processing.
+
+```bash
+#Data stored here:
+/volumes/seq/projects/gccACT/230808_mdamb231_ONT/20230726_1239_2D_PAO38369_output
+
+#CNV calls using QDNA algorithm:
+/volumes/seq/projects/gccACT/230808_mdamb231_ONT/20230726_1239_2D_PAO38369_output/qdna_seq/20230726_1239_2D_PAO38369_output_combined.bed
+
+
+#SV calls using Sniffles2:
+/volumes/seq/projects/gccACT/230808_mdamb231_ONT/20230726_1239_2D_PAO38369_output/20230726_1239_2D_PAO38369_output.wf_sv.vcf.gz
+```
+
+```R
+library(copykit)
+library(BiocParallel)
+library(EnsDb.Hsapiens.v86)
+library(GenomicRanges)
+setwd("/volumes/seq/projects/gccACT/230306_mdamb231_test")
+tumor<-readRDS(file="/volumes/seq/projects/gccACT/230306_mdamb231_test/scCNA.rds")
+ont<-read.table("/volumes/seq/projects/gccACT/230808_mdamb231_ONT/20230726_1239_2D_PAO38369_output/qdna_seq/20230726_1239_2D_PAO38369_output_combined.bed",sep="\t",skip=1)
+
+colnames(ont)<-c("chrom","start","end","range","log2ratio","pass_filt","cnv")
+ont$chrom<-paste0("chr",ont$chrom)
+ont<-makeGRangesFromDataFrame(ont,keep.extra.columns=TRUE)
+
+#https://rdrr.io/github/navinlabcode/copykit/src/R/plotHeatmap.R
+copykit_ranges<-makeGRangesFromDataFrame(tumor@rowRanges,keep.extra.columns=TRUE)
+overlap<-findOverlaps(subject=ont,query=copykit_ranges,select="first")
+
+ont_overlaps<-cbind(as.data.frame(copykit_ranges),as.data.frame(ont)[overlap,c("log2ratio","cnv")])
+
+plt<-plotHeatmap(tumor, label = 'subclones',order='hclust')
+
+# Plot a copy number heatmap with clustering annotation
+pdf("subclone.heatmap.pdf")
+print(plt)
+dev.off()
+
+seg_data_ont<-t(as.data.frame(ont_overlaps$log2ratio))
+seg_data <- t(SummarizedExperiment::assay(tumor, "logr"))
+dim(seg_data_ont)[2]==dim(seg_data)[2]
+
+plt@matrix<-seg_data_ont
+plt@row_order<-c(1)
+# Plot a copy number heatmap with clustering annotation
+pdf("subclone.heatmap.ont.pdf")
+print(plt)
+dev.off()
+#manipulated pdf in illustrator to make the ONT data in line with other data, both pdfs are printed to match width so they can be stacked for easy viewing
+```
+
 ### Count of WGS and GCC Reads
 ```bash
 dir="/volumes/seq/projects/gccACT/230306_mdamb231_test"
@@ -1265,7 +1322,6 @@ dip-c color -n color/mm10.chr.txt 20k.1.clean.3dg | dip-c vis -c /dev/stdin 20k.
 ```
 
 
-
 <!--
 ### eccDNA Analysis
 https://www.science.org/doi/10.1126/sciadv.aba2489

diff --git a/docs/MD anderson analysis/gccACT_ont.md b/docs/MD anderson analysis/gccACT_ont.md
@@ -79,6 +79,10 @@ singularity pull docker://ontresearch/wf-human-variation-snp:sha0d7e7e8e8207d9d2
 #methyl
 singularity pull docker://ontresearch/wf-human-variation-methyl:sha44a13bcf48db332b2277bb9f95b56d64e393a1d5 > /dev/null
 
+singularity pull  --name ontresearch-wf-human-variation-methyl-sha44a13bcf48db332b2277bb9f95b56d64e393a1d5.img.pulling.1695478159368 docker://ontresearch/wf-human-variation-methyl:sha44a13bcf48db332b2277bb9f95b56d64e393a1d5 > /dev/null
+
+#all together at once
+nextflow download /home/rmulqueen/wf-human-variation-master/main.nf --container singularity
 ```
 
 ## Running ONT nextflow pipeline.
@@ -131,6 +135,7 @@ export SINGULARITY_CACHEDIR="/rsrch4/home/genetics/rmulqueen/singularity/"
 export SINGULARITY_TMPDIR=$SINGULARITY_CACHEDIR/tmp
 export SINGULARITY_PULLDIR=$SINGULARITY_CACHEDIR/pull
 export CWL_SINGULARITY_CACHE=$SINGULARITY_PULLDIR
+export NXF_OFFLINE='TRUE' #https://nf-co.re/docs/usage/offline
 
 #dorado run (succeeded previously so commenting out here)
 #output bam file from dorado caller has to be sorted before it can be used in the pipeline.
@@ -154,7 +159,8 @@ nextflow run /home/rmulqueen/wf-human-variation-master/main.nf \
     --sample_name ${output_name} \
     --out_dir ${wd_out}/${output_name}/ \
     -with-singularity \
-    -without-docker
+    -without-docker \
+    -offline
 
 #note sif files may need to be manually pulled (as above) for updates to wf-human-variation-master in the future
 ```