-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrankaggregation.r
60 lines (51 loc) · 2.75 KB
/
rankaggregation.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
###############################################################
### File: rankaggregation.r
### Description: Pair-wise pearson correlations with MYC and
### rank aggregation of correlation coefficients.
### Written by Delaney Sullivan
###############################################################
# Load R packages
pkgs <- c("RobustRankAggreg")
invisible(lapply(pkgs, function(x) suppressWarnings(suppressMessages(library(x, character.only=TRUE)))))
# Set up some variables
args <- commandArgs(TRUE) # Extract command-line arguments
tcga_file <- args[1] # TCGA data file (.tsv.gz)
info_file <- args[2] # TCGA annotation file (.csv)
output_prefix <- args[3]
# Iterate through folders containing the data:
data <- read.table(tcga_file, stringsAsFactors=FALSE, check.names=FALSE, sep="\t", header=TRUE)
annotation <- read.csv(info_file, stringsAsFactors=FALSE, check.names=FALSE, header=TRUE, na.strings="",row.names=1)
# Run correlation analysis
data_cor <- NULL
for (study in unique(annotation[,"Study Abbreviation"])) {
print(paste("Computing pair-wise correlations for:", study))
study_cols <- annotation[annotation[,"Study Abbreviation"] == study & annotation$Tumor == 1,"Sample"]
study_data <- data[,study_cols]
study_data.myc <- unlist(study_data["ENSG00000136997",]) # The MYC row
cors <- apply(study_data, 1, y=study_data.myc, function(x, y) cor.test(y,x)$estimate ) # This might spit out a bunch of warnings for cases where std dev of x is zero
cors <- data.frame(cors)
colnames(cors) <- study
if (is.null(data_cor)) {
data_cor <- cors
} else {
data_cor <- cbind(data_cor, cors)
}
}
ranked_lists <- list()
data_rank <- data_cor[rowSums(is.na(data_cor)) <= 0.90*ncol(data_cor),] # Remove rows where 90% are NA
data_rank <- data_rank[!(rownames(data_rank) %in% rownames(data_rank["ENSG00000136997",])),] # Remove MYC
median_cor <- apply(data_rank, 1, function(x) { median(x, na.rm=TRUE) })
for (study in colnames(data_rank)) {
curr_list <- data_rank[!is.na(data_rank[,study]),study,drop=FALSE]
ranked_lists[[study]] <- rownames(curr_list[order(-curr_list[,study]),study,drop=FALSE])
}
ranks_aggregated <- aggregateRanks(ranked_lists)
ranks_aggregated$padj <- p.adjust(ranks_aggregated$Score, method="BH")
ranks_aggregated <- merge(ranks_aggregated, data.frame(median_cor), by="row.names", all.x=TRUE, all.y=FALSE)
rownames(ranks_aggregated) <- ranks_aggregated[,1]
ranks_aggregated[,1] <- NULL
ranks_aggregated <- ranks_aggregated[,c("Score", "padj", "median_cor")]
colnames(ranks_aggregated) <- c("pval", "padj", "median_pearson")
# Output rankings and correlations
write.csv(data_cor, file=paste(output_prefix, "myc_correlations.csv", sep=""), quote=FALSE)
write.csv(ranks_aggregated, file=paste(output_prefix, "myc_rra.csv", sep=""), quote=FALSE)