-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
34 changed files
with
2,346 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
library(ggplot2) | ||
library(gplots) | ||
library(RColorBrewer) | ||
|
||
factorStateTable <- read.table("stats/factor-state-table.txt", header = TRUE, as.is=TRUE, row.names=1); | ||
# print(factorStateTable) | ||
class(factorStateTable) | ||
|
||
factorStateTallyOverMeanTable <- read.table("stats/factor-state-sample-mean-table.txt", header = TRUE, as.is=TRUE, row.names=1); | ||
|
||
# factorStateTallyOverMeanTable <- log(factorStateTallyOverMeanTable) | ||
|
||
stepColours <- read.table("stats/step-colours.txt", header = FALSE, as.is=TRUE) | ||
stepColoursRow <- as.matrix(as.data.frame(t(stepColours), stringsAsFactors=FALSE)); | ||
print(stepColours) | ||
print(stepColoursRow) | ||
class(stepColours) | ||
class(stepColoursRow) | ||
|
||
for (i in 1:ncol(stepColoursRow)) { | ||
stepColoursRow[1,i] <- paste("#",stepColoursRow[1,i],sep="") | ||
} | ||
|
||
print(stepColoursRow) | ||
|
||
|
||
# print(max(factorStateTable[,2:ncol(factorStateTable)])) | ||
# print(sum(factorStateTable[,1:15])) | ||
|
||
# print(factorStatePercentTable) | ||
|
||
tallyovermeanMatrix <- data.matrix(factorStateTallyOverMeanTable) | ||
|
||
# tallyovermeanMatrix <- log2(tallyovermeanMatrix) | ||
|
||
print(log(factorStateTallyOverMeanTable)) | ||
|
||
# test <- data.matrix(factorStateTallyOverMeanTable) | ||
# log2(test) | ||
# log2(tallyovermeanMatrix) | ||
|
||
|
||
|
||
rowClust <- hclust(dist(tallyovermeanMatrix)) | ||
|
||
# Plot the data table as heatmap and the cluster results as dendrograms. | ||
png("stats/cluster-heatmap-tallyovermean.png", width=2000, height=1500, res=100) | ||
mycl <- cutree(rowClust, k=6); | ||
# mycolhc <- c("#b8860b", "#ffd700", "#cd5c5c", "#32cd32", "#00bfff", "#ff4500") | ||
mycolhc <- c("#ff9933", "#990000", "#990099", "#009900", "#33CCFF", "#FF0000", "#826561") | ||
mycolhc <- mycolhc[as.vector(mycl)]; | ||
hm <- heatmap.2(tallyovermeanMatrix, trace="none", RowSideColors=mycolhc, cexRow=0.75, cexCol=1.0, key=TRUE, keysize=1.5, margins=c(10,22)) | ||
dev.off() | ||
# print(hm) | ||
# print(mycolhc) | ||
# print(hm$Rowv) | ||
|
||
sink("stats/heatmap-cluster-colours-tallyovermean.txt") | ||
cat(mycolhc) | ||
sink() | ||
|
||
|
||
|
||
# Cut the tree at specific height and color the corresponding clusters in the heatmap color bar. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
library(ggplot2) | ||
library(gplots) | ||
library(RColorBrewer) | ||
|
||
factorStateTable <- read.table("stats/factor-state-table.txt", header = TRUE, as.is=TRUE, row.names=1); | ||
# print(factorStateTable) | ||
class(factorStateTable) | ||
|
||
factorStatePercentTable <- read.table("stats/factor-state-percent-table.txt", header = TRUE, as.is=TRUE, row.names=1); | ||
|
||
stepColours <- read.table("stats/step-colours.txt", header = FALSE, as.is=TRUE) | ||
stepColoursRow <- as.matrix(as.data.frame(t(stepColours), stringsAsFactors=FALSE)); | ||
print(stepColours) | ||
print(stepColoursRow) | ||
class(stepColours) | ||
class(stepColoursRow) | ||
|
||
for (i in 1:ncol(stepColoursRow)) { | ||
stepColoursRow[1,i] <- paste("#",stepColoursRow[1,i],sep="") | ||
} | ||
|
||
print(stepColoursRow) | ||
|
||
|
||
# print(max(factorStateTable[,2:ncol(factorStateTable)])) | ||
# print(sum(factorStateTable[,1:15])) | ||
|
||
# print(factorStatePercentTable) | ||
|
||
percentMatrix <- data.matrix(factorStatePercentTable) | ||
|
||
################ | ||
|
||
# ## The colors you specified. | ||
# stepColours <- stepColoursRow | ||
# ## Defining breaks for the color scale | ||
# # stepBreaks <- c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1) | ||
|
||
# library(sparcl) | ||
|
||
# png("stats/cluster-heatmap-morecolours.png", width=2000, height=1500, res=100) | ||
# rc <- rainbow(nrow(percentMatrix), start = 0, end = .3) | ||
# cc <- rainbow(ncol(percentMatrix), start = 0, end = .3) | ||
|
||
# hc <- hclust(dist(percentMatrix)) | ||
# dd <- as.dendrogram(hc) | ||
|
||
# hm <- heatmap.2(percentMatrix, #Rowv=NA, Colv=NA, | ||
# # col = stepColours, ## using your colors | ||
# # breaks = stepBreaks, ## using your breaks | ||
# # dendrogram = "none", ## to suppress warnings | ||
# cexRow=0.75, cexCol=1.0, key=TRUE, keysize=1.5, | ||
# margins=c(10,22), | ||
# trace="none") | ||
# # ColorDendrogram(hm,main="My Simulated Data",branchlength=3) | ||
# # legend("topright", fill = stepColours, | ||
# # legend = c("0 to 10%", "10%+ to 20%", "20%+ to 30%", "30%+ to 40%", "40%+ to 50%", "50%+ to 60%", "60%+ to 70%", "70%+ to 80%", "80%+ to 90%", "90%+ to 100%")) | ||
# # mar.orig <- par()$mar # save the original values | ||
# # par(mar = c(4,4,50,4)) # set your new values EACH 1 IS 15 PX | ||
# # par(mar = mar.orig) # put the original values back | ||
|
||
# dev.off() | ||
|
||
################## | ||
|
||
# mydatascale <- t(scale(t(percentMatrix))) # Centers and scales data. | ||
rowClust <- hclust(dist(percentMatrix)) | ||
|
||
# print(rowClust[]) | ||
# # colClust <- hclust(as.dist(percentMatrix)) | ||
# hr <- hclust(as.dist(1-cor(t(mydatascale), method="pearson")), method="complete") # Cluster rows by Pearson correlation. | ||
# hc <- hclust(as.dist(1-cor(mydatascale, method="spearman")), method="complete") | ||
# # Clusters columns by Spearman correlation. | ||
# heatmap(percentMatrix)# Rowv=as.dendrogram(hr), Colv=as.dendrogram(hc)) | ||
|
||
|
||
|
||
|
||
# Plot the data table as heatmap and the cluster results as dendrograms. | ||
png("stats/cluster-heatmap-morecolours.png", width=2000, height=1500, res=100) | ||
mycl <- cutree(rowClust, k=6); | ||
# mycolhc <- c("#b8860b", "#ffd700", "#cd5c5c", "#32cd32", "#00bfff", "#ff4500") | ||
mycolhc <- c("#ff9933", "#990000", "#990099", "#009900", "#33CCFF", "#FF0000") | ||
mycolhc <- mycolhc[as.vector(mycl)]; | ||
hm <- heatmap.2(percentMatrix, RowSideColors=mycolhc, trace="none", cexRow=0.75, cexCol=1.0, key=TRUE, keysize=1.5, margins=c(10,22)) | ||
dev.off() | ||
print(hm) | ||
print(mycolhc) | ||
print(hm$Rowv) | ||
|
||
sink("stats/heatmap-cluster-colours.txt") | ||
cat(mycolhc) | ||
sink() | ||
|
||
|
||
|
||
# Cut the tree at specific height and color the corresponding clusters in the heatmap color bar. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
cat "stats/run.txt" | while read tf; do | ||
mkdir broadhmm/$tf | ||
echo $tf | ||
for state in {1..15} | ||
do | ||
bedtools intersect -a bedfiles/$tf -b broadhmm/$state.bed > broadhmm/$tf/$state-intersect.bed | ||
done | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#!/usr/bin/perl | ||
use warnings; | ||
# use integer; | ||
use List::Util qw(sum); | ||
use Math::BigFloat; | ||
|
||
open runfile, "stats/run.txt"; | ||
$writeTablePath = "stats/factor-state-table.txt"; | ||
$writeTablePercentPath = "stats/factor-state-percent-table.txt"; | ||
open (writeFactorStateTableFile, ">".$writeTablePath); | ||
open (writeFactorStateTablePercentFile, ">".$writeTablePercentPath); | ||
|
||
print writeFactorStateTableFile "factor\t1_Active_Promoter\t2_Weak_Promoter\t3_Poised_Promoter\t4_Strong_Enhancer\t5_Strong_Enhancer\t6_Weak_Enhancer\t7_Weak_Enhancer\t8_Insulator\t9_Txn_Transition\t10_Txn_Elongation\t11_Weak_Txn\t12_Repressed\t13_Heterochrom/lo\t14_Repetitive/CNV\t15_Repetitive/CNV\n"; | ||
# print writeFactorStateTableFile "state1\tstate2\tstate3\tstate4\tstate5\tstate6\tstate7\tstate8\tstate9\tstate10\tstate11\tstate12\tstate13\tstate14\tstate15\n"; | ||
|
||
# print writeFactorStateTablePercentFile "factor\tstate1\tstate2\tstate3\tstate4\tstate5\tstate6\tstate7\tstate8\tstate9\tstate10\tstate11\tstate12\tstate13\tstate14\tstate15\n"; | ||
print writeFactorStateTablePercentFile "factor\t1_Active_Promoter\t2_Weak_Promoter\t3_Poised_Promoter\t4_Strong_Enhancer\t5_Strong_Enhancer\t6_Weak_Enhancer\t7_Weak_Enhancer\t8_Insulator\t9_Txn_Transition\t10_Txn_Elongation\t11_Weak_Txn\t12_Repressed\t13_Heterochrom/lo\t14_Repetitive/CNV\t15_Repetitive/CNV\n"; | ||
|
||
|
||
my @factorTotals = (); | ||
|
||
while (<runfile>) { | ||
chomp; | ||
my $factor = $_; | ||
print "$factor\n"; | ||
|
||
my $factorSum = 0; | ||
|
||
print writeFactorStateTableFile "$factor\t"; | ||
open tallyfile, "broadhmm/$factor/factor-tally.txt"; | ||
|
||
while (<tallyfile>) { | ||
chomp; | ||
my $stateTally = $_; | ||
print writeFactorStateTableFile "$stateTally\t"; | ||
# push(@factorValues, $stateTally); | ||
$factorSum += $stateTally; | ||
} | ||
|
||
close tallyfile; | ||
print writeFactorStateTableFile "\n"; | ||
# $factorSum = sum(@factorValues); | ||
|
||
print writeFactorStateTablePercentFile "$factor\t"; | ||
open tallyfile, "broadhmm/$factor/factor-tally.txt"; | ||
|
||
while (<tallyfile>) { | ||
chomp; | ||
my $stateTally = $_; | ||
my $statePercent = $stateTally / $factorSum; | ||
print ref $stateTally; | ||
|
||
print "factorsum: $factorSum\n"; | ||
print "$stateTally\n"; | ||
print "$statePercent\n"; | ||
|
||
print writeFactorStateTablePercentFile "$statePercent\t"; | ||
} | ||
|
||
close tallyfile; | ||
print writeFactorStateTablePercentFile "\n"; | ||
|
||
# push(@factorTotals, sum(@factorValues)); | ||
|
||
} | ||
|
||
# foreach (@factorTotals) | ||
# { | ||
# print "$_\n"; | ||
# } | ||
|
||
# print $factorTotals[1]; | ||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
|
||
# for ($state = 1; $state < 16; $state++) { | ||
# print "$state\n"; | ||
# open broadhmmFile, $broadhmmPath; | ||
# while (<broadhmmFile>) { | ||
# chomp; | ||
# my @line = split(/\t/,"$_"); | ||
|
||
# # print index($line[3], $state); | ||
|
||
# if (index($line[3],"$state"."_") == 0) { | ||
# # print "$line[3]\n"; | ||
# print writeSplitState "$_\n"; | ||
# } | ||
# } | ||
# } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#!/usr/bin/perl | ||
use warnings; | ||
# use integer; | ||
use List::Util qw(sum); | ||
use Math::BigFloat; | ||
|
||
open runfile, "stats/run.txt"; | ||
$writeTableSampleMeanPath = "stats/factor-state-sample-mean-table.txt"; | ||
open (writeFactorStateTableSampleMeanFile, ">".$writeTableSampleMeanPath); | ||
|
||
print writeFactorStateTableSampleMeanFile "factor\t1_Active_Promoter\t2_Weak_Promoter\t3_Poised_Promoter\t4_Strong_Enhancer\t5_Strong_Enhancer\t6_Weak_Enhancer\t7_Weak_Enhancer\t8_Insulator\t9_Txn_Transition\t10_Txn_Elongation\t11_Weak_Txn\t12_Repressed\t13_Heterochrom/lo\n"; | ||
|
||
while (<runfile>) { | ||
chomp; | ||
my $factor = $_; | ||
print writeFactorStateTableSampleMeanFile "$factor\t"; | ||
|
||
my @factorTally = (); | ||
my @factorMean = (); | ||
|
||
open factorTallyFile, "broadhmm/$factor/factor-tally.txt"; | ||
open factorMeanFile, "broadhmm/$factor/normalized/factor-sample-mean.txt"; | ||
|
||
while (<factorTallyFile>) { | ||
chomp; | ||
# print "$_\n"; | ||
push (@factorTally, $_); | ||
} | ||
|
||
while (<factorMeanFile>) { | ||
chomp; | ||
# print "$_\n"; | ||
push (@factorMean, $_); | ||
} | ||
|
||
# print $factorMean[0]; | ||
|
||
for ($line = 0; $line < 13; $line++) { | ||
my $tallyOverMean = $factorTally[$line] / $factorMean[$line]; | ||
print writeFactorStateTableSampleMeanFile "$tallyOverMean\t"; | ||
} | ||
print writeFactorStateTableSampleMeanFile "\n"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
#!/bin/bash | ||
|
||
# tally for each state | ||
|
||
for state in {1..15} | ||
do | ||
echo $state | ||
cat "stats/run.txt" | while read tf; do | ||
tally=`wc broadhmm/$tf/$state-intersect.bed | awk {'print $1'}` | ||
echo -e "$tf\t$tally" | ||
done > broadhmm/$state-tally.txt | ||
done | ||
|
||
|
||
# for easier processing later, tally for each factor according to state | ||
|
||
cat "stats/run.txt" | while read tf; do | ||
echo "$tf" | ||
for state in {1..15} | ||
do | ||
tally=`wc broadhmm/$tf/$state-intersect.bed | awk {'print $1'}` | ||
echo -e "$tally" | ||
done > broadhmm/$tf/factor-tally.txt | ||
done |
Oops, something went wrong.