-
Notifications
You must be signed in to change notification settings - Fork 9
/
06predictTargets.R
33 lines (28 loc) · 1.22 KB
/
06predictTargets.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
### libraries ###
library(mlr)
library(parallel)
library(parallelMap)
library(biomaRt)
### options ###
set.seed(986, kind="L'Ecuyer-CMRG")
parallelStartMulticore(detectCores())
ensembl <- useMart("ENSEMBL_MART_ENSEMBL", "hsapiens_gene_ensembl", host="mar2016.archive.ensembl.org")
chr <- c(1:22, "X", "Y", "MT")
type="protein_coding"
### data ###
predictionset <- readRDS(file.path("../data/predictionset.rds"))
nn.mod <- readRDS(file.path("../data/nn.mod.rds"))
### predict ###
pred <- predict(nn.mod, newdata=predictionset)
pred <- setThreshold(pred, 0.9)
saveRDS(pred, file.path("../data/pred.rds"))
### annotate ###
ann <- getBM(attributes=c("ensembl_gene_id", "entrezgene", "external_gene_name"),
filters=c("ensembl_gene_id", "chromosome_name", "biotype"),
values=list(rownames(pred$data), chr, type),
mart=ensembl)
predres <- merge(pred$data, ann, by.x="row.names", by.y="ensembl_gene_id", all=TRUE)
names(predres) <- c("Ensembl", "UnknownProb", "TargetProb", "Prediction", "Entrez", "Symbol")
predres <- predres[c("Ensembl", "Entrez", "Symbol", "Prediction", "TargetProb", "UnknownProb")]
write.csv(predres, file.path("../data/PredictionResults.csv"), quote=FALSE, row.names=FALSE)
parallelStop()