forked from deweylab/RSEM
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrsem-gen-transcript-plots
executable file
·118 lines (95 loc) · 4.11 KB
/
rsem-gen-transcript-plots
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#!/usr/bin/env Rscript
nrow_per_page <- 3 # if input_list is composed of transcript ids
ncol_per_page <- 2 # if input_list is composed of transcript ids
num_plots_per_page <- nrow_per_page * ncol_per_page # if input_list is composed of transcript/allele ids
exit_with_error <- function(errmsg) {
cat(errmsg, "\n", sep = "", file = stderr())
quit(save = "no", status = 1)
}
args <- commandArgs(TRUE)
if (length(args) != 6)
exit_with_error("Usage: rsem-gen-transcript-plots sample_name input_list is_allele_specific id_type<0,allele;1,isoform;2,gene> show_uniq output_plot_file")
sample_name <- args[1]
input_list <- args[2]
alleleS <- as.numeric(args[3])
id_type <- as.numeric(args[4])
show_uniq <- as.numeric(args[5])
output_plot_file <- args[6]
is_composite <- (!alleleS && (id_type == 2)) || (alleleS && (id_type > 0))
load_readdepth_file <- function(filename) {
data <- read.table(file = filename, sep = "\t", stringsAsFactors = FALSE)
readdepth <- split(data[, 2:3], data[, 1])
}
build_map <- function(filename) {
value_pos <- 1
if (!alleleS || (alleleS && id_type == 1)) {
key_pos <- 2
} else {
key_pos <- 3
}
data <- read.delim(file = filename, sep = "\t", stringsAsFactors = FALSE)
tmp <- aggregate(data[value_pos], data[key_pos], function(x) x)
map <- tmp[,2]
names(map) <- tmp[,1]
map
}
make_a_plot <- function(id) {
vec <- readdepth[[id]]
if (is.null(vec)) exit_with_error(sprintf("Unknown %s detected, %s is not included in RSEM's indices.", ifelse(alleleS, "allele-specific transcript", "transcript"), id))
if (is.na(vec[[2]])) wiggle <- rep(0, vec[[1]]) else wiggle <- as.numeric(unlist(strsplit(vec[[2]], split = " ")))
len <- length(wiggle)
if (!show_uniq) {
plot(wiggle, type = "h")
} else {
vec <- readdepth_uniq[[id]]
stopifnot(!is.null(vec))
if (is.na(vec[[2]])) wiggle_uniq <- rep(0, vec[[1]]) else wiggle_uniq <- as.numeric(unlist(strsplit(vec[[2]], split = " ")))
stopifnot(len == length(wiggle_uniq))
if (len != sum(wiggle >= wiggle_uniq)) {
cat("Warning: ", ifelse(alleleS, "allele-specific transcript", "transcript"), " ", id, " has position(s) that read covarege with multireads is smaller than read covarge without multireads.\n", " The 1-based position(s) is(are) : ", which(wiggle < wiggle_uniq), ".\n", " This may be due to floating point arithmetics.\n", sep = "")
}
heights <- rbind(wiggle_uniq, wiggle - wiggle_uniq)
barplot(heights, space = 0, border = NA, names.arg = 1:len, col = c("black", "red"))
}
title(main = id)
}
generate_a_page <- function(ids, title = NULL) {
n <- length(ids)
ncol <- ifelse(is_composite, floor(sqrt(n)), ncol_per_page)
nrow <- ifelse(is_composite, ceiling(n / ncol), nrow_per_page)
par(mfrow = c(nrow, ncol), mar = c(2, 2, 2, 2))
if (is_composite) par(oma = c(0, 0, 3, 0))
sapply(ids, make_a_plot)
if (is_composite) mtext(title, outer = TRUE, line = 1)
}
plot_individual <- function(i) {
fr <- (i - 1) * num_plots_per_page + 1
to <- min(i * num_plots_per_page, n)
generate_a_page(ids[fr:to])
}
# cid, composite id, can be either a gene id or transcript id (for allele-specific expression only)
plot_composite <- function(cid) {
if (is.null(map[[cid]])) exit_with_error(sprintf("Unknown %s detected, %s is not included in RSEM's indices.", ifelse(alleleS && id_type == 1, "transcript", "gene"), cid))
generate_a_page(map[[cid]], cid)
}
readdepth <- load_readdepth_file(paste(sample_name, ".transcript.readdepth", sep = ""))
if (show_uniq) {
readdepth_uniq <- load_readdepth_file(paste(sample_name, ".uniq.transcript.readdepth", sep = ""))
}
ids <- scan(file = input_list, what = "", sep = "\n")
cat("Loading files is done!\n")
if (is_composite) {
file_name <- sprintf("%s.%s.results", sample_name, ifelse(alleleS, "alleles", "isoforms"))
map <- build_map(file_name)
cat("Building transcript to gene map is done!\n")
}
pdf(output_plot_file)
if (!is_composite) {
n <- length(ids)
ub <- (n - 1) %/% num_plots_per_page + 1
dumbvar <- sapply(1:ub, plot_individual)
} else {
dumbvar <- sapply(ids, plot_composite)
}
cat("Plots are generated!\n")
dev.off.output <- dev.off()