-
Notifications
You must be signed in to change notification settings - Fork 0
/
CountBam
81 lines (69 loc) · 2.88 KB
/
CountBam
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
## Purpose: Count Reads Over exons
## Created: Slinker, 07/22/2014
## Associated rdas:
###############
#load libraries
library("GenomicRanges")
library("Rsamtools")
library("GenomicFeatures")
library("rtracklayer")
##Step 1:
#Load hg19 reference of known genes
library("TxDb.Hsapiens.UCSC.hg19.knownGene")
#create a gene range list using the hg19 annotation
exbygene <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "gene")
#summarize
files <- c("~/Documents/SalkProjects/Marchetto/NHPTimeCourse/RNASeq/CM1-16-5_00_accepted_hits.bam",
"~/Documents/SalkProjects/Marchetto/NHPTimeCourse/RNASeq/CM1-16-5_00_accepted_hits.bam",
"~/Documents/SalkProjects/Marchetto/NHPTimeCourse/RNASeq/CM1-16-5_01_accepted_hits.bam",
"~/Documents/SalkProjects/Marchetto/NHPTimeCourse/RNASeq/CM1-16-5_01_accepted_hits.bam")
getCounts <- function(fileName) {
aligns <- readBamGappedAlignments(fileName)
counts <- countOverlaps(exbygene, aligns)
se <- summarizeOverlaps(exbygene, aligns)
names(counts) <- names(exbygene)
return(counts)
}
countTable <- sapply(files, getCounts)
countTable <- countTable[rowSums(countTable) != 0, ]
colnames(countTable) <- c("human1a","human1a2", "human1b","human1b2")
design <- data.frame(
condition=c("human1a", "human1a","human1b","human1b"),
replicate=c(1,2,1,2),
libType=rep("paired-end", 4)
#,countfiles=colData(se_exons)[,1], stringsAsFactors=TRUE)
)
pairedSamples = design$libType == "paired-end"
condition = design$condition[ pairedSamples ]
# library( "DESeq" )
# cds = newCountDataSet( countTable, condition )
# sizeFactors( cds )
# cds = estimateDispersions( cds ,method='blind') #change this once conditions are increased
# plotDispEsts( cds )
# res = nbinomTest( cds, "human1a", "human1b" )
# plotMA(res)
# hist(res$pval, breaks=100, col="skyblue", border="slateblue", main="")
# #filter out nominally sig hits
# resSig = res[ res$padj < 0.1, ]
# #filter down-regulated from sig
# head( resSig[ order( resSig$foldChange, -resSig$baseMean ), ] )
# #filter up-reg from sig
# head( resSig[ order( -resSig$foldChange, -resSig$baseMean ), ] )
#DESeq2
cds = newCountDataSet( countTable, condition )
cds = newCountDataSet( countTable, condition )
unlist(sizeFactors( cds ))
colData2 <- data.frame(c(rep("treated",2),rep("untreated",2)),c(rep("paired-end",4)),c(sizeFactors(cds)))
colnames(colData2) <- c("condition","type","sizeFactor")
dds <- DESeqDataSetFromMatrix(countData = countTable,
colData = colData2,
design = ~ condition)
dds$condition <- factor(dds$condition,
levels=c("untreated","treated"))
dds$condition <- relevel(dds$condition, "untreated")
#dds$condition <- droplevels(dds$condition) #use this if removing samples
dds <- DESeq(dds)
res <- results(dds)
resOrdered <- res[order(res$padj),]
#MA Plot
plotMA(as.data.frame(res), main="DESeq2", ylim=c(-2,2))