Skip to content

Commit

Permalink
Merge branch 'master' of https://github.com/bioperl/p5-bpwrapper
Browse files Browse the repository at this point in the history
  • Loading branch information
yzhernand committed Jun 6, 2023
2 parents b2b3e16 + a91a00d commit 632f070
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 60 deletions.
13 changes: 2 additions & 11 deletions bin/biotree
Original file line number Diff line number Diff line change
Expand Up @@ -335,12 +335,6 @@ Rocky Bernstein (testing & release)
=item *
Yözen Hernández yzhernand at gmail dot com (initial design of implementation)

=item *
Pedro Pegan (developer)

=item *
Lia Di (developer, --mid-point)

=item *
Weigang Qiu <[email protected]> (maintainer)

Expand All @@ -366,10 +360,7 @@ Add documentation in POD in biotree
=over 4

=item *
Bug: Bio::Tree reroot produces trees unreadable by R package "ape"

=item *
consistency index for DNA/protein alignments
Place holder

=back

Expand All @@ -378,7 +369,7 @@ consistency index for DNA/protein alignments
=over 4

=item *
Hernandez, Bernstein, Qiu, et al (2017). "BpWrappers: Command-line utilities for manipulation of sequences, alignments, and phylogenetic trees based on BioPerl". (In prep).
Hernandez, Bernstein, Pagan, Vargas, McCaig, Laracuente, Di, Vieira, and Qiu (2018). "BpWrapper: BioPerl-based sequence and tree utilities for rapid prototyping of bioinformatics pipelines". BMC Bioinformatics. 19:76. https://doi.org/10.1186/s12859-018-2074-9

=item *
Stajich et al (2002). "The BioPerl Toolkit: Perl Modules for the Life Sciences". Genome Research 12(10):1611-1618.
Expand Down
2 changes: 1 addition & 1 deletion lib/Bio/BPWrapper/SeqManipulations.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1002,7 +1002,7 @@ sub shred_seq {
while ($seq = $in->next_seq()) {
my $newid = $seq->id();
$newid =~ s/[\s\|]/_/g;
warn $newid, "\n";
print $newid, "\n";
my $suffix = $out_format || "fas";
my $newout = Bio::SeqIO->new(-format => $out_format, -file => ">" . $newid . ".$suffix");
$newout->write_seq($seq)
Expand Down
118 changes: 70 additions & 48 deletions lib/Bio/BPWrapper/TreeManipulations.pm
Original file line number Diff line number Diff line change
Expand Up @@ -192,58 +192,79 @@ sub _flip_if_not_in_top_clade { # by resetting creation_id & sortby option of ea
}


# trim tips to a single OTU if all branch lengths of its sister nodes < $cut
# trim a node to a single OTU representative if all branch lengths of its descendant OTUs <= $cut
sub trim_tips {
die "Usage: $0 --trim-tips <num>\n" unless $opts{'trim-tips'};
my $cut = $opts{'trim-tips'};
#print $cut, "\t";
my @sisters; # groups of sister nodes
foreach my $nd (@nodes) {
next if $nd->is_Leaf();
my @sis;
foreach ($nd->each_Descendent()) {
push @sis, $_;

my @trim_nodes;
my $group_ct = 0;
&identify_nodes_to_trim_by_walk_from_root($rootnode, \$cut, \@trim_nodes, \$group_ct);
my %otu_sets;
my $set_ct = 1;
foreach my $ref_trim (@trim_nodes) {
foreach (@$ref_trim) {
$otu_sets{$_} = $set_ct;
}
push @sisters, \@sis;
$set_ct++;
# print STDERR join "\t", sort @$ref_trim;
# print STDERR "\n";
}

# print Dumper(\@sisters); exit;

foreach my $ref_sis (@sisters) {
my $trim = 1;
foreach (@$ref_sis) {
# print $_->branch_length(), "\t";
$trim = 0 if $_->branch_length() >= $cut; # any sister branch length > $cut, don't trim
}

# print $nd->internal_id(), ":";
# foreach (@$ref_sis) {
# print "\t", $_->is_Leaf() ? $_->id() : $_->internal_id();
# }
# print "\t", $trim, "\n";

if ($trim) { # retain the first OTU
my @nds = @$ref_sis;
my $retain = shift @nds;
my $pa = $retain->ancestor();
foreach (@nds) {
$pa->remove_Descendent($_)
}

my @leaf_ids;
foreach my $sis (@$ref_sis) {
foreach (&_each_leaf($sis)) {
push @leaf_ids, $_->id();
}
}
# print Dumper(\@leaf_ids);
print STDERR join "\t", @leaf_ids;
print STDERR "\n";
}
foreach (@otus) {
next if $otu_sets{$_->id};
$otu_sets{$_->id} = $set_ct++;
}
# print Dumper(\%otu_sets);
print STDERR "#Trim tree from tip with a cutoff of d=$cut\n";
print STDERR "otu\tnr_set_id\n";
foreach (sort {$otu_sets{$a} <=> $otu_sets{$b}} keys %otu_sets) {
print STDERR $_, "\t", $otu_sets{$_}, "\n";
}

$print_tree = 1;
}

sub identify_nodes_to_trim_by_walk_from_root {
my $node = shift; # internal node only
my $ref_cut = shift;
my $ref_group = shift;
my $ref_ct = shift;

return if $node->is_Leaf;
my %des_otus; # save distances
my $trim = 1; # default to trim
# trim a node if all OTU distance to it is <= cut
foreach my $des ($node -> get_all_Descendents()) {
next unless $des->is_Leaf;
#push @des_otus, $des;
# distance to a desc OTU
my $dist = $tree->distance($node, $des);
$des_otus{$des->id} = { 'otu' => $des, 'dist' => $dist };
$trim = 0 if $dist > $$ref_cut; # don't trim is any distance to OTU > $cut
}

if ($trim) { # trim & retain the first OTU; stop descending
my @leafs = sort keys %des_otus; # make a copy
my $pa = $node->ancestor();
$pa -> remove_Descendent($node); # clear this node as a des of parent
my $retain = shift @leafs;
my $retain_node = $des_otus{$retain}->{'otu'};
my $d = $node->branch_length() + $des_otus{$retain}->{'dist'};
$retain_node->branch_length($d); # keep distance to tip
$pa->add_Descendent($retain_node); # add retained OTU to parent

# collect all OTUs for a trimmed inode
push @$ref_group, [keys %des_otus];
$$ref_ct++;
return;
} else { # don't trim
foreach my $des ($node->each_Descendent()) {
&identify_nodes_to_trim_by_walk_from_root($des, $ref_cut, $ref_group, $ref_ct);
}
}
}

sub cut_tree {
my @otu_hts;
Expand Down Expand Up @@ -769,7 +790,7 @@ sub reroot {
}
}
else {
die("option does not exist: $tag")
die("Need a tag: otu:<otu_id>, or intid:<internal_id>\n");
}
# my $newroot = $outgroup->create_node_on_branch(-FRACTION => 0.5, -ANNOT => {id => 'newroot'});
# $tree->reroot($outgroup);
Expand Down Expand Up @@ -901,7 +922,7 @@ sub subset {

# Print OTU names and lengths
sub print_leaves_lengths {
foreach (@nodes) { say $_->id(), "\t", $_->branch_length() if $_->is_Leaf() }
foreach (@nodes) { say $_->id(), "\t", $_->branch_length() || 0 if $_->is_Leaf() }
}

# Get LCA
Expand Down Expand Up @@ -1257,10 +1278,11 @@ sub _name2node {

# _each_leaf ($node): returns a list of all OTU's descended from this node, if any
sub _each_leaf {
my @leaves;
return $_[0] if $_[0]->is_Leaf;
for ($_[0]->get_all_Descendents) { push (@leaves, $_) if $_->is_Leaf }
return @leaves
my $nd = shift;
my @leaves;
return ($nd) if $nd->is_Leaf;
for ($nd->get_all_Descendents) { push (@leaves, $_) if $_->is_Leaf }
return @leaves
}

# main routine to walk up from root
Expand Down Expand Up @@ -1384,7 +1406,7 @@ L<bioatree>: command-line tool for tree manipulations
=item *
L<Qiu Lab wiki page|http://diverge.hunter.cuny.edu/labwiki/Bioutils>
L<Qiu Lab wiki page|http://wiki.genometracker.org/>
=item *
Expand Down

0 comments on commit 632f070

Please sign in to comment.