forked from baliga-lab/sygnal
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathenrichment.R
executable file
·59 lines (50 loc) · 2.31 KB
/
enrichment.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#!/usr/bin/env Rscript
suppressMessages(library(topGO))
suppressMessages(library('org.Mm.eg.db'))
suppressMessages(library('getopt'))
# read command line arguments
spec = matrix(c(
'outdir', 'o', 1, 'character',
'gene_conv', 'c', 1, 'character',
'help', 'h', 0, 'logical'
), byrow=TRUE, ncol=4)
opt <- getopt(spec)
if (is.null(opt$outdir) || is.null(opt$gene_conv) || !is.null(opt$help)) {
cat(getopt(spec, usage=TRUE))
q(status=1)
}
# Make a map of Entrez IDs to UCSC transcript IDs
d1 = read.csv(opt$gene_conv,header=F)
ucsc2entrez = list()
for(j in 1:length(d1[,1])) {
tmp = strsplit(as.character(d1[j,2]),split=';')[[1]]
for(k in tmp) {
ucsc2entrez[[as.character(k)]] = as.character(d1[j,1])
}
}
# Automated for all clusters
d1 = read.csv(paste(opt$outdir,'/cluster.members.genes.txt',sep=''),header=F)
biclustMembership = list()
for(j in 1:length(d1[,1])) {
biclustMembership[[j]] = as.character(unlist(ucsc2entrez[strsplit(as.character(d1[j,]),split=' ')[[1]][-1]]))
}
xx <- annFUN.org("BP", mapping = "org.Mm.eg.db", ID = "entrez")
geneNames <- intersect(unique(unlist(biclustMembership)), unique(unlist(xx)))
tmp1 <- biclustMembership[[1]]
geneList <- factor(as.integer(geneNames %in% tmp1))
names(geneList) <- geneNames
# Make Biological Process GOData object
GOdata.BP <- new("topGOdata", ontology='BP', allGenes = geneList, annot = annFUN.org, mapping = 'org.Mm.eg.db', ID = 'entrez')
m1.BP <- matrix(nrow = length(GOdata.BP@graph@nodes), ncol = length(biclustMembership), dimnames = list(GOdata.BP@graph@nodes, 1:length(biclustMembership)))
m2.BP <- matrix(nrow = length(biclustMembership), ncol=2, dimnames = list(1:length(biclustMembership),c('Top10.Terms.BP','BH.sig.GO.Ids.BP')))
for( biclust in (1:length(biclustMembership)) ) {
# Expand gene list and change factor in GOdata
biclustGenes <- biclustMembership[[biclust]]
GOdata.BP@allScores <- factor(as.integer(geneNames %in% biclustGenes))
# Biological process
r1.BP = runTest(GOdata.BP, algorithm = 'classic', statistic = 'fisher')
m1.BP[,biclust] = r1.BP@score
m2.BP[biclust,1] = gsub(',','_',paste(GenTable(GOdata.BP, r1.BP)[,2],collapse=';'))
m2.BP[biclust,2] = paste(names(which(p.adjust(r1.BP@score,method='BH')<=0.05)),collapse=';')
}
write.csv(m2.BP,paste(opt$outdir,'/biclusterEnrichment_GOBP.csv',sep=''))