Skip to content

Commit

Permalink
added ont plotting
Browse files Browse the repository at this point in the history
  • Loading branch information
mulqueenr committed Sep 25, 2023
1 parent 43300d4 commit 64a7818
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 7 deletions.
68 changes: 62 additions & 6 deletions docs/MD anderson analysis/gccACT_init.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,21 +168,24 @@ multiqc .
Analysis from
https://navinlabcode.github.io/CopyKit-UserGuide/quick-start.html

```bash
conda activate r4.2
```
```R
library(copykit)
library(BiocParallel)
library(EnsDb.Hsapiens.v86)
register(MulticoreParam(progressbar = T, workers = 50), default = T)
BiocParallel::bpparam()
setwd("/volumes/seq/projects/gccACT/230306_mdamb231_test/cells")
setwd("/volumes/seq/projects/gccACT/230306_mdamb231_test")

tumor2 <- runVarbin("/volumes/seq/projects/gccACT/230306_mdamb231_test/cells",
tumor <- runVarbin("/volumes/seq/projects/gccACT/230306_mdamb231_test/cells",
remove_Y = TRUE,
genome="hg38",
is_paired_end=TRUE)

# Mark euploid cells if they exist
tumor2 <- findAneuploidCells(tumor2)
tumor <- findAneuploidCells(tumor)

# Mark low-quality cells for filtering
tumor <- findOutliers(tumor)
Expand All @@ -200,10 +203,9 @@ tumor <- tumor[,SummarizedExperiment::colData(tumor)$is_aneuploid == TRUE]
# kNN smooth profiles
tumor <- knnSmooth(tumor)


tumor <- runUmap(tumor)
k_clones<-findSuggestedK(tumor)
# Create a umap embedding
tumor <- runUmap(tumor)

# Find clusters of similar copy number profiles and plot the results
# If no k_subclones value is provided, automatically detect it from findSuggestedK()
Expand Down Expand Up @@ -232,6 +234,61 @@ for (i in unique(clone_out$clone)){
}
```


## Correlating our HiC Data to ONT Data
See /gccACT_ONT/ for more details on ONT processing.

```bash
#Data stored here:
/volumes/seq/projects/gccACT/230808_mdamb231_ONT/20230726_1239_2D_PAO38369_output

#CNV calls using QDNA algorithm:
/volumes/seq/projects/gccACT/230808_mdamb231_ONT/20230726_1239_2D_PAO38369_output/qdna_seq/20230726_1239_2D_PAO38369_output_combined.bed


#SV calls using Sniffles2:
/volumes/seq/projects/gccACT/230808_mdamb231_ONT/20230726_1239_2D_PAO38369_output/20230726_1239_2D_PAO38369_output.wf_sv.vcf.gz
```

```R
library(copykit)
library(BiocParallel)
library(EnsDb.Hsapiens.v86)
library(GenomicRanges)
setwd("/volumes/seq/projects/gccACT/230306_mdamb231_test")
tumor<-readRDS(file="/volumes/seq/projects/gccACT/230306_mdamb231_test/scCNA.rds")
ont<-read.table("/volumes/seq/projects/gccACT/230808_mdamb231_ONT/20230726_1239_2D_PAO38369_output/qdna_seq/20230726_1239_2D_PAO38369_output_combined.bed",sep="\t",skip=1)

colnames(ont)<-c("chrom","start","end","range","log2ratio","pass_filt","cnv")
ont$chrom<-paste0("chr",ont$chrom)
ont<-makeGRangesFromDataFrame(ont,keep.extra.columns=TRUE)

#https://rdrr.io/github/navinlabcode/copykit/src/R/plotHeatmap.R
copykit_ranges<-makeGRangesFromDataFrame(tumor@rowRanges,keep.extra.columns=TRUE)
overlap<-findOverlaps(subject=ont,query=copykit_ranges,select="first")

ont_overlaps<-cbind(as.data.frame(copykit_ranges),as.data.frame(ont)[overlap,c("log2ratio","cnv")])

plt<-plotHeatmap(tumor, label = 'subclones',order='hclust')

# Plot a copy number heatmap with clustering annotation
pdf("subclone.heatmap.pdf")
print(plt)
dev.off()

seg_data_ont<-t(as.data.frame(ont_overlaps$log2ratio))
seg_data <- t(SummarizedExperiment::assay(tumor, "logr"))
dim(seg_data_ont)[2]==dim(seg_data)[2]

plt@matrix<-seg_data_ont
plt@row_order<-c(1)
# Plot a copy number heatmap with clustering annotation
pdf("subclone.heatmap.ont.pdf")
print(plt)
dev.off()
#manipulated pdf in illustrator to make the ONT data in line with other data, both pdfs are printed to match width so they can be stacked for easy viewing
```

### Count of WGS and GCC Reads
```bash
dir="/volumes/seq/projects/gccACT/230306_mdamb231_test"
Expand Down Expand Up @@ -1265,7 +1322,6 @@ dip-c color -n color/mm10.chr.txt 20k.1.clean.3dg | dip-c vis -c /dev/stdin 20k.
```



<!--
### eccDNA Analysis
https://www.science.org/doi/10.1126/sciadv.aba2489
Expand Down
8 changes: 7 additions & 1 deletion docs/MD anderson analysis/gccACT_ont.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ singularity pull docker://ontresearch/wf-human-variation-snp:sha0d7e7e8e8207d9d2
#methyl
singularity pull docker://ontresearch/wf-human-variation-methyl:sha44a13bcf48db332b2277bb9f95b56d64e393a1d5 > /dev/null

singularity pull --name ontresearch-wf-human-variation-methyl-sha44a13bcf48db332b2277bb9f95b56d64e393a1d5.img.pulling.1695478159368 docker://ontresearch/wf-human-variation-methyl:sha44a13bcf48db332b2277bb9f95b56d64e393a1d5 > /dev/null

#all together at once
nextflow download /home/rmulqueen/wf-human-variation-master/main.nf --container singularity
```

## Running ONT nextflow pipeline.
Expand Down Expand Up @@ -131,6 +135,7 @@ export SINGULARITY_CACHEDIR="/rsrch4/home/genetics/rmulqueen/singularity/"
export SINGULARITY_TMPDIR=$SINGULARITY_CACHEDIR/tmp
export SINGULARITY_PULLDIR=$SINGULARITY_CACHEDIR/pull
export CWL_SINGULARITY_CACHE=$SINGULARITY_PULLDIR
export NXF_OFFLINE='TRUE' #https://nf-co.re/docs/usage/offline

#dorado run (succeeded previously so commenting out here)
#output bam file from dorado caller has to be sorted before it can be used in the pipeline.
Expand All @@ -154,7 +159,8 @@ nextflow run /home/rmulqueen/wf-human-variation-master/main.nf \
--sample_name ${output_name} \
--out_dir ${wd_out}/${output_name}/ \
-with-singularity \
-without-docker
-without-docker \
-offline

#note sif files may need to be manually pulled (as above) for updates to wf-human-variation-master in the future
```
Expand Down

0 comments on commit 64a7818

Please sign in to comment.