Skip to content

Commit

Permalink
Added all files
Browse files Browse the repository at this point in the history
  • Loading branch information
enmalik committed Sep 28, 2013
1 parent b6cf847 commit c28dfb7
Show file tree
Hide file tree
Showing 34 changed files with 2,346 additions and 2 deletions.
64 changes: 64 additions & 0 deletions FactorStateClusterNormalizedPlot.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
library(ggplot2)
library(gplots)
library(RColorBrewer)

factorStateTable <- read.table("stats/factor-state-table.txt", header = TRUE, as.is=TRUE, row.names=1);
# print(factorStateTable)
class(factorStateTable)

factorStateTallyOverMeanTable <- read.table("stats/factor-state-sample-mean-table.txt", header = TRUE, as.is=TRUE, row.names=1);

# factorStateTallyOverMeanTable <- log(factorStateTallyOverMeanTable)

stepColours <- read.table("stats/step-colours.txt", header = FALSE, as.is=TRUE)
stepColoursRow <- as.matrix(as.data.frame(t(stepColours), stringsAsFactors=FALSE));
print(stepColours)
print(stepColoursRow)
class(stepColours)
class(stepColoursRow)

for (i in 1:ncol(stepColoursRow)) {
stepColoursRow[1,i] <- paste("#",stepColoursRow[1,i],sep="")
}

print(stepColoursRow)


# print(max(factorStateTable[,2:ncol(factorStateTable)]))
# print(sum(factorStateTable[,1:15]))

# print(factorStatePercentTable)

tallyovermeanMatrix <- data.matrix(factorStateTallyOverMeanTable)

# tallyovermeanMatrix <- log2(tallyovermeanMatrix)

print(log(factorStateTallyOverMeanTable))

# test <- data.matrix(factorStateTallyOverMeanTable)
# log2(test)
# log2(tallyovermeanMatrix)



rowClust <- hclust(dist(tallyovermeanMatrix))

# Plot the data table as heatmap and the cluster results as dendrograms.
png("stats/cluster-heatmap-tallyovermean.png", width=2000, height=1500, res=100)
mycl <- cutree(rowClust, k=6);
# mycolhc <- c("#b8860b", "#ffd700", "#cd5c5c", "#32cd32", "#00bfff", "#ff4500")
mycolhc <- c("#ff9933", "#990000", "#990099", "#009900", "#33CCFF", "#FF0000", "#826561")
mycolhc <- mycolhc[as.vector(mycl)];
hm <- heatmap.2(tallyovermeanMatrix, trace="none", RowSideColors=mycolhc, cexRow=0.75, cexCol=1.0, key=TRUE, keysize=1.5, margins=c(10,22))
dev.off()
# print(hm)
# print(mycolhc)
# print(hm$Rowv)

sink("stats/heatmap-cluster-colours-tallyovermean.txt")
cat(mycolhc)
sink()



# Cut the tree at specific height and color the corresponding clusters in the heatmap color bar.
97 changes: 97 additions & 0 deletions FactorStateClusterPlot.r
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
library(ggplot2)
library(gplots)
library(RColorBrewer)

factorStateTable <- read.table("stats/factor-state-table.txt", header = TRUE, as.is=TRUE, row.names=1);
# print(factorStateTable)
class(factorStateTable)

factorStatePercentTable <- read.table("stats/factor-state-percent-table.txt", header = TRUE, as.is=TRUE, row.names=1);

stepColours <- read.table("stats/step-colours.txt", header = FALSE, as.is=TRUE)
stepColoursRow <- as.matrix(as.data.frame(t(stepColours), stringsAsFactors=FALSE));
print(stepColours)
print(stepColoursRow)
class(stepColours)
class(stepColoursRow)

for (i in 1:ncol(stepColoursRow)) {
stepColoursRow[1,i] <- paste("#",stepColoursRow[1,i],sep="")
}

print(stepColoursRow)


# print(max(factorStateTable[,2:ncol(factorStateTable)]))
# print(sum(factorStateTable[,1:15]))

# print(factorStatePercentTable)

percentMatrix <- data.matrix(factorStatePercentTable)

################

# ## The colors you specified.
# stepColours <- stepColoursRow
# ## Defining breaks for the color scale
# # stepBreaks <- c(0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)

# library(sparcl)

# png("stats/cluster-heatmap-morecolours.png", width=2000, height=1500, res=100)
# rc <- rainbow(nrow(percentMatrix), start = 0, end = .3)
# cc <- rainbow(ncol(percentMatrix), start = 0, end = .3)

# hc <- hclust(dist(percentMatrix))
# dd <- as.dendrogram(hc)

# hm <- heatmap.2(percentMatrix, #Rowv=NA, Colv=NA,
# # col = stepColours, ## using your colors
# # breaks = stepBreaks, ## using your breaks
# # dendrogram = "none", ## to suppress warnings
# cexRow=0.75, cexCol=1.0, key=TRUE, keysize=1.5,
# margins=c(10,22),
# trace="none")
# # ColorDendrogram(hm,main="My Simulated Data",branchlength=3)
# # legend("topright", fill = stepColours,
# # legend = c("0 to 10%", "10%+ to 20%", "20%+ to 30%", "30%+ to 40%", "40%+ to 50%", "50%+ to 60%", "60%+ to 70%", "70%+ to 80%", "80%+ to 90%", "90%+ to 100%"))
# # mar.orig <- par()$mar # save the original values
# # par(mar = c(4,4,50,4)) # set your new values EACH 1 IS 15 PX
# # par(mar = mar.orig) # put the original values back

# dev.off()

##################

# mydatascale <- t(scale(t(percentMatrix))) # Centers and scales data.
rowClust <- hclust(dist(percentMatrix))

# print(rowClust[])
# # colClust <- hclust(as.dist(percentMatrix))
# hr <- hclust(as.dist(1-cor(t(mydatascale), method="pearson")), method="complete") # Cluster rows by Pearson correlation.
# hc <- hclust(as.dist(1-cor(mydatascale, method="spearman")), method="complete")
# # Clusters columns by Spearman correlation.
# heatmap(percentMatrix)# Rowv=as.dendrogram(hr), Colv=as.dendrogram(hc))




# Plot the data table as heatmap and the cluster results as dendrograms.
png("stats/cluster-heatmap-morecolours.png", width=2000, height=1500, res=100)
mycl <- cutree(rowClust, k=6);
# mycolhc <- c("#b8860b", "#ffd700", "#cd5c5c", "#32cd32", "#00bfff", "#ff4500")
mycolhc <- c("#ff9933", "#990000", "#990099", "#009900", "#33CCFF", "#FF0000")
mycolhc <- mycolhc[as.vector(mycl)];
hm <- heatmap.2(percentMatrix, RowSideColors=mycolhc, trace="none", cexRow=0.75, cexCol=1.0, key=TRUE, keysize=1.5, margins=c(10,22))
dev.off()
print(hm)
print(mycolhc)
print(hm$Rowv)

sink("stats/heatmap-cluster-colours.txt")
cat(mycolhc)
sink()



# Cut the tree at specific height and color the corresponding clusters in the heatmap color bar.
8 changes: 8 additions & 0 deletions FactorStateIntersect.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
cat "stats/run.txt" | while read tf; do
mkdir broadhmm/$tf
echo $tf
for state in {1..15}
do
bedtools intersect -a bedfiles/$tf -b broadhmm/$state.bed > broadhmm/$tf/$state-intersect.bed
done
done
97 changes: 97 additions & 0 deletions FactorStateTable.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/perl
use warnings;
# use integer;
use List::Util qw(sum);
use Math::BigFloat;

open runfile, "stats/run.txt";
$writeTablePath = "stats/factor-state-table.txt";
$writeTablePercentPath = "stats/factor-state-percent-table.txt";
open (writeFactorStateTableFile, ">".$writeTablePath);
open (writeFactorStateTablePercentFile, ">".$writeTablePercentPath);

print writeFactorStateTableFile "factor\t1_Active_Promoter\t2_Weak_Promoter\t3_Poised_Promoter\t4_Strong_Enhancer\t5_Strong_Enhancer\t6_Weak_Enhancer\t7_Weak_Enhancer\t8_Insulator\t9_Txn_Transition\t10_Txn_Elongation\t11_Weak_Txn\t12_Repressed\t13_Heterochrom/lo\t14_Repetitive/CNV\t15_Repetitive/CNV\n";
# print writeFactorStateTableFile "state1\tstate2\tstate3\tstate4\tstate5\tstate6\tstate7\tstate8\tstate9\tstate10\tstate11\tstate12\tstate13\tstate14\tstate15\n";

# print writeFactorStateTablePercentFile "factor\tstate1\tstate2\tstate3\tstate4\tstate5\tstate6\tstate7\tstate8\tstate9\tstate10\tstate11\tstate12\tstate13\tstate14\tstate15\n";
print writeFactorStateTablePercentFile "factor\t1_Active_Promoter\t2_Weak_Promoter\t3_Poised_Promoter\t4_Strong_Enhancer\t5_Strong_Enhancer\t6_Weak_Enhancer\t7_Weak_Enhancer\t8_Insulator\t9_Txn_Transition\t10_Txn_Elongation\t11_Weak_Txn\t12_Repressed\t13_Heterochrom/lo\t14_Repetitive/CNV\t15_Repetitive/CNV\n";


my @factorTotals = ();

while (<runfile>) {
chomp;
my $factor = $_;
print "$factor\n";

my $factorSum = 0;

print writeFactorStateTableFile "$factor\t";
open tallyfile, "broadhmm/$factor/factor-tally.txt";

while (<tallyfile>) {
chomp;
my $stateTally = $_;
print writeFactorStateTableFile "$stateTally\t";
# push(@factorValues, $stateTally);
$factorSum += $stateTally;
}

close tallyfile;
print writeFactorStateTableFile "\n";
# $factorSum = sum(@factorValues);

print writeFactorStateTablePercentFile "$factor\t";
open tallyfile, "broadhmm/$factor/factor-tally.txt";

while (<tallyfile>) {
chomp;
my $stateTally = $_;
my $statePercent = $stateTally / $factorSum;
print ref $stateTally;

print "factorsum: $factorSum\n";
print "$stateTally\n";
print "$statePercent\n";

print writeFactorStateTablePercentFile "$statePercent\t";
}

close tallyfile;
print writeFactorStateTablePercentFile "\n";

# push(@factorTotals, sum(@factorValues));

}

# foreach (@factorTotals)
# {
# print "$_\n";
# }

# print $factorTotals[1];










# for ($state = 1; $state < 16; $state++) {
# print "$state\n";
# open broadhmmFile, $broadhmmPath;
# while (<broadhmmFile>) {
# chomp;
# my @line = split(/\t/,"$_");

# # print index($line[3], $state);

# if (index($line[3],"$state"."_") == 0) {
# # print "$line[3]\n";
# print writeSplitState "$_\n";
# }
# }
# }
43 changes: 43 additions & 0 deletions FactorStateTableNormalized.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#!/usr/bin/perl
use warnings;
# use integer;
use List::Util qw(sum);
use Math::BigFloat;

open runfile, "stats/run.txt";
$writeTableSampleMeanPath = "stats/factor-state-sample-mean-table.txt";
open (writeFactorStateTableSampleMeanFile, ">".$writeTableSampleMeanPath);

print writeFactorStateTableSampleMeanFile "factor\t1_Active_Promoter\t2_Weak_Promoter\t3_Poised_Promoter\t4_Strong_Enhancer\t5_Strong_Enhancer\t6_Weak_Enhancer\t7_Weak_Enhancer\t8_Insulator\t9_Txn_Transition\t10_Txn_Elongation\t11_Weak_Txn\t12_Repressed\t13_Heterochrom/lo\n";

while (<runfile>) {
chomp;
my $factor = $_;
print writeFactorStateTableSampleMeanFile "$factor\t";

my @factorTally = ();
my @factorMean = ();

open factorTallyFile, "broadhmm/$factor/factor-tally.txt";
open factorMeanFile, "broadhmm/$factor/normalized/factor-sample-mean.txt";

while (<factorTallyFile>) {
chomp;
# print "$_\n";
push (@factorTally, $_);
}

while (<factorMeanFile>) {
chomp;
# print "$_\n";
push (@factorMean, $_);
}

# print $factorMean[0];

for ($line = 0; $line < 13; $line++) {
my $tallyOverMean = $factorTally[$line] / $factorMean[$line];
print writeFactorStateTableSampleMeanFile "$tallyOverMean\t";
}
print writeFactorStateTableSampleMeanFile "\n";
}
24 changes: 24 additions & 0 deletions FactorStateTally.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/bin/bash

# tally for each state

for state in {1..15}
do
echo $state
cat "stats/run.txt" | while read tf; do
tally=`wc broadhmm/$tf/$state-intersect.bed | awk {'print $1'}`
echo -e "$tf\t$tally"
done > broadhmm/$state-tally.txt
done


# for easier processing later, tally for each factor according to state

cat "stats/run.txt" | while read tf; do
echo "$tf"
for state in {1..15}
do
tally=`wc broadhmm/$tf/$state-intersect.bed | awk {'print $1'}`
echo -e "$tally"
done > broadhmm/$tf/factor-tally.txt
done
Loading

0 comments on commit c28dfb7

Please sign in to comment.