-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathwgcna_relate.py
84 lines (77 loc) · 3.56 KB
/
wgcna_relate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# coding=utf-8
import subprocess
import argparse
import pandas as pd
"""
1. Correlate the module eigengenes with the trait
2. Correlate each gene with the trait
"""
# get arguments
parser = argparse.ArgumentParser(description="wgcna step3: relate module/gene to traits")
parser.add_argument('-datExpr', type=str, metavar="exp_matrix_file", required=True,
help="expression matrix file, for gene/traits relationship calculation")
parser.add_argument('-MEs', type=str, metavar="module eigengenes", required=True,
help="module eigengenes, for module/traits relationship calculation")
parser.add_argument('-traits', type=str, required=True, metavar="phenotype_data",
help="sample name in row, traits data in column; "
"或者为样本分组信息文件,第一行是header,第一列为样本id, 第二列为样本分组, "
"样本信息必须和输入的表达矩阵完全匹配,不多不少")
parser.add_argument('-corType', type=str, metavar="correlation_type", default="pearson",
help="correlation type, one of ['pearson', 'spearman', 'kendall', 'bicor']")
parser.add_argument('-nThreads', type=int, default=16, )
args = parser.parse_args()
# read expr
r_cmds = \
"""
# module and traits relation
library('WGCNA')
enableWGCNAThreads()
datME = read.table('{eigengenes}', header=T, row.names=1)
datME = as.data.frame(t(datME))
MEs = orderMEs(datME)
traits = read.table("{traits}", header=T, row.names=1)
if (dim(traits)[2] == 1 & class(traits[1,1])=="factor"){bracket1}
tmp = model.matrix(~0+ traits[,1])
colnames(tmp) = levels(traits[,1])
rownames(tmp) = rownames(traits)
traits = tmp
{bracket2}
traits = as.data.frame(traits)
traits = traits[rownames(MEs), ]
if ("{cor_type}" == 'bicor'){bracket1}
correlation = signif(bicor(MEs, traits, robustY=F, maxPOutliers = 0.05, nThreads={threads}), 3)
{bracket2} else {bracket1}
correlation = signif(cor(MEs, traits, use="p", method="{cor_type}", nThreads={threads}), 3)
{bracket2}
pvalues = signif(corPvalueStudent(correlation, nSamples = dim(traits)[1]), 3)
pdf(file='Module-Trait.pdf', width = 12, height = 9)
textMatrix = paste(signif(correlation, 2), "\n(", signif(pvalues, 1), ")", sep = "")
dim(textMatrix) = dim(correlation)
par(mar = c(6, 8.5, 3, 3))
labeledHeatmap(Matrix = correlation, xLabels = names(traits), yLabels = names(MEs),
ySymbols = names(MEs), colorLabels = FALSE, colors = greenWhiteRed(50),
textMatrix = textMatrix, setStdMargins = FALSE, cex.text = 0.95,
zlim = c(-1,1), main = paste("Module-trait relationships"))
dev.off()
write.table(correlation, 'module_trait.correlation.xls', col.names=NA, quote=F, sep='\\t', row.names=T)
write.table(pvalues, 'module_trait.correlation_pvalues.xls', col.names=NA, quote=F, sep='\\t', row.names=T)
# gene and traits relation
exp = read.table('{exp_matrix}', header=T, row.names=1)
if ("{cor_type}" == 'bicor'){bracket1}
correlation2 = signif(bicor(t(exp), traits, robustY=F, maxPOutliers = 0.05, nThreads={threads}), 3)
{bracket2} else {bracket1}
correlation2 = signif(cor(t(exp), traits, use="p", method="{cor_type}"), 3)
{bracket2}
write.table(correlation2, 'gene_trait.correlation.xls', col.names=NA, quote=F, sep='\\t', row.names=T)
""".format(
eigengenes=args.MEs,
traits=args.traits,
cor_type=args.corType,
threads=args.nThreads,
exp_matrix=args.datExpr,
bracket1="{",
bracket2="}"
)
with open('wgcna_relate_analysis.r', 'w') as f:
f.write(r_cmds)
subprocess.check_call("Rscript wgcna_relate_analysis.r", shell=True)