-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathstates_through_time.pl
executable file
·129 lines (121 loc) · 2.89 KB
/
states_through_time.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#!/usr/bin/perl
use strict;
use warnings;
use Getopt::Long;
use List::Util 'sum';
use Bio::Phylo::IO 'parse_tree';
use Bio::Phylo::Util::Logger ':levels';
# process command line arguments
my $verbosity = WARN;
my $burnin = 500000;
my ( $treefile, $logfile );
GetOptions(
'verbose+' => \$verbosity,
'burnin=i' => \$burnin,
'treefile=s' => \$treefile,
'logfile=s' => \$logfile,
);
# instantiate helper objects
my $log = Bio::Phylo::Util::Logger->new(
'-class' => 'main',
'-level' => $verbosity,
);
$log->info("going to read tree $treefile");
my $tree = parse_tree(
'-format' => 'nexus',
'-file' => $treefile,
);
# start parsing the log
$log->info("going to read log $logfile");
my ( %nodes, %header, %states );
open my $fh, '<', $logfile or die $!;
LINE: while(<$fh>) {
chomp;
# fetch corresponding node
if ( /^\t(Node\d+)Tag\t\d+\t(.+)$/ ) {
my ( $tag, $taxa ) = ( $1, $2 );
my @nodes;
for my $t ( split /\s/, $taxa ) {
if ( my $n = $tree->get_by_name($t) ) {
push @nodes, $n;
}
else {
$log->warn("no $t in $tree");
}
}
my $mrca = $tree->get_mrca(\@nodes);
$nodes{$tag} = $mrca;
$log->debug("found node $tag");
next LINE;
}
# parse header
if ( not %header and /^Iteration\t.+$/ ) {
my @fields = split /\t/, $_;
$log->info("going to read log file header");
for my $i ( 0 .. $#fields ) {
if ( $fields[$i] =~ /^(Node\d+) P\((.)\)$/ ) {
my ( $tag, $state ) = ( $1, $2 );
$states{$state} = undef;
$header{$tag} = [] if not $header{$tag};
push @{ $header{$tag} }, {
'index' => $i,
'state' => $state,
};
}
}
next LINE;
}
# parse data
if ( %header ) {
my @fields = split /\t/, $_;
my $gen = $fields[0];
if ( $gen >= $burnin ) {
$log->debug("processing post-burn-in generation $gen");
$log->info("reached end of burn-in ($burnin)") if $gen == $burnin;
for my $node ( keys %nodes ) {
for my $column ( @{ $header{$node} } ) {
my $i = $column->{'index'};
my $s = $column->{'state'};
my $values = $nodes{$node}->get_generic($s) || [];
push @{ $values }, $fields[$i];
$nodes{$node}->set_generic( $s => $values );
}
}
}
else {
$log->debug("skipping burn-in generation $gen");
}
}
}
# calculate node ages (MYA)
$tree->visit_depth_first(
'-post' => sub {
my $node = shift;
if ( $node->is_terminal ) {
$node->set_generic( 'age' => 0 );
}
else {
my $fd = $node->get_first_daughter;
my $bl = $fd->get_branch_length;
$node->set_generic( 'age' => ( $fd->get_generic('age') + $bl ) );
}
}
);
# print results
my @states = keys %states;
print join( "\t", 'age', @states ), "\n";
$tree->visit_depth_first(
'-pre' => sub {
my $node = shift;
my @values = $node->get_generic('age');
for my $s ( @states ) {
if ( my $v = $node->get_generic($s) ) {
push @values, ( sum(@$v)/scalar(@$v) );
}
else {
push @values, 0;
}
}
print join( "\t", @values ), "\n";
}
);