CountBam

## Purpose: Count Reads Over exons
## Created: Slinker, 07/22/2014
## Associated rdas:
###############

#load libraries
library("GenomicRanges")
library("Rsamtools")
library("GenomicFeatures")
library("rtracklayer")

##Step 1:
#Load hg19 reference of known genes
library("TxDb.Hsapiens.UCSC.hg19.knownGene")

#create a gene range list using the hg19 annotation
exbygene <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "gene")

#summarize
files <- c("~/Documents/SalkProjects/Marchetto/NHPTimeCourse/RNASeq/CM1-16-5_00_accepted_hits.bam",
           "~/Documents/SalkProjects/Marchetto/NHPTimeCourse/RNASeq/CM1-16-5_00_accepted_hits.bam",
           "~/Documents/SalkProjects/Marchetto/NHPTimeCourse/RNASeq/CM1-16-5_01_accepted_hits.bam",
           "~/Documents/SalkProjects/Marchetto/NHPTimeCourse/RNASeq/CM1-16-5_01_accepted_hits.bam")

getCounts <- function(fileName) {
     aligns <- readBamGappedAlignments(fileName)
     counts <- countOverlaps(exbygene, aligns)
     se <- summarizeOverlaps(exbygene, aligns)
     names(counts) <- names(exbygene)
     return(counts)
}

countTable <- sapply(files, getCounts)
countTable <- countTable[rowSums(countTable) != 0, ]
colnames(countTable) <- c("human1a","human1a2", "human1b","human1b2")

design <- data.frame(
  condition=c("human1a", "human1a","human1b","human1b"),
  replicate=c(1,2,1,2),
  libType=rep("paired-end", 4)
  #,countfiles=colData(se_exons)[,1], stringsAsFactors=TRUE)
)

pairedSamples = design$libType == "paired-end"
condition = design$condition[ pairedSamples ]

# library( "DESeq" )
# cds = newCountDataSet( countTable, condition )
# sizeFactors( cds )
# cds = estimateDispersions( cds ,method='blind') #change this once conditions are increased
# plotDispEsts( cds )
# res = nbinomTest( cds, "human1a", "human1b" )
# plotMA(res)
# hist(res$pval, breaks=100, col="skyblue", border="slateblue", main="")
# #filter out nominally sig hits
# resSig = res[ res$padj < 0.1, ]
# #filter down-regulated from sig
# head( resSig[ order( resSig$foldChange, -resSig$baseMean ), ] )
# #filter up-reg from sig
# head( resSig[ order( -resSig$foldChange, -resSig$baseMean ), ] )


#DESeq2
cds = newCountDataSet( countTable, condition )
cds = newCountDataSet( countTable, condition )
unlist(sizeFactors( cds ))
colData2 <- data.frame(c(rep("treated",2),rep("untreated",2)),c(rep("paired-end",4)),c(sizeFactors(cds)))
colnames(colData2) <- c("condition","type","sizeFactor")
dds <- DESeqDataSetFromMatrix(countData = countTable,
                              colData = colData2,
                              design = ~ condition)

dds$condition <- factor(dds$condition,
                        levels=c("untreated","treated"))
dds$condition <- relevel(dds$condition, "untreated")
#dds$condition <- droplevels(dds$condition) #use this if removing samples
dds <- DESeq(dds)
res <- results(dds)
resOrdered <- res[order(res$padj),]
#MA Plot
plotMA(as.data.frame(res), main="DESeq2", ylim=c(-2,2))