File Coverage

blib/lib/Bio/Phylo/Forest/TreeRole.pm
Criterion Covered Total %
statement 639 1419 45.0
branch 152 398 38.1
condition 46 110 41.8
subroutine 78 145 53.7
pod 77 92 83.7
total 992 2164 45.8


line stmt bran cond sub pod time code
1             package Bio::Phylo::Forest::TreeRole;
2 31     31   177 use strict;
  31         59  
  31         799  
3 31     31   1151 use Bio::Phylo::Util::MOP;
  31         66  
  31         216  
4 31     31   140 use base 'Bio::Phylo::Listable';
  31         56  
  31         4643  
5 31     31   197 use Bio::Phylo::Util::Exceptions 'throw';
  31         55  
  31         1578  
6 31     31   183 use Bio::Phylo::Util::CONSTANT qw'/looks_like/ :objecttypes';
  31         58  
  31         8057  
7 31     31   6754 use Bio::Phylo::Util::OptionalInterface 'Bio::Tree::TreeI';
  31         73  
  31         171  
8 31     31   7865 use Bio::Phylo::Forest::Node;
  31         87  
  31         300  
9 31     31   192 use Bio::Phylo::IO 'unparse';
  31         65  
  31         1426  
10 31     31   251 use Bio::Phylo::Factory;
  31         68  
  31         147  
11 31     31   189 use Scalar::Util 'blessed';
  31         55  
  31         1216  
12 31     31   202 use List::Util qw'sum shuffle';
  31         68  
  31         8261  
13             my $LOADED_WRAPPERS = 0;
14             {
15             my $logger = __PACKAGE__->get_logger;
16             my ( $TYPE_CONSTANT, $CONTAINER_CONSTANT ) = ( _TREE_, _FOREST_ );
17             my $fac = Bio::Phylo::Factory->new;
18             my %default_constructor_args = (
19             '-listener' => sub {
20             my ( $self, $method, @args ) = @_;
21             for my $node (@args) {
22             if ( $method eq 'insert' ) {
23             $node->set_tree($self);
24             }
25             elsif ( $method eq 'delete' ) {
26             $node->set_tree();
27             }
28             elsif ( $method eq '_set_things' ) {
29             $_->set_tree($self) for @{ $node };
30             }
31             }
32             },
33             );
34              
35             =head1 NAME
36              
37             Bio::Phylo::Forest::TreeRole - Extra behaviours for a phylogenetic tree
38              
39             =head1 SYNOPSIS
40              
41             # some way to get a tree
42             use Bio::Phylo::IO;
43             my $string = '((A,B),C);';
44             my $forest = Bio::Phylo::IO->parse(
45             -format => 'newick',
46             -string => $string
47             );
48             my $tree = $forest->first;
49              
50             # do something:
51             print $tree->calc_imbalance;
52              
53             # prints "1"
54              
55             =head1 DESCRIPTION
56              
57             The object models a phylogenetic tree, a container of
58             L objects. The tree object
59             inherits from L, so look there
60             for more methods.
61              
62             =head1 METHODS
63              
64             =head2 CONSTRUCTORS
65              
66             =over
67              
68             =item new()
69              
70             Tree constructor.
71              
72             Type : Constructor
73             Title : new
74             Usage : my $tree = Bio::Phylo::Forest::Tree->new;
75             Function: Instantiates a Bio::Phylo::Forest::Tree object.
76             Returns : A Bio::Phylo::Forest::Tree object.
77             Args : No required arguments.
78              
79             =cut
80              
81             sub new : Constructor {
82              
83             # could be child class
84 180     180 1 421 my $class = shift;
85              
86             # notify user
87 180         1180 $logger->info("constructor called for '$class'");
88 180 100       582 if ( not $LOADED_WRAPPERS ) {
89 29 0 0 0 0 69 eval do { local $/; };
  29 0 0 0 0 143  
  29 0   0 0 35600  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0   0  
  0 0   0   0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
90 29         89 $LOADED_WRAPPERS++;
91             }
92              
93             # go up inheritance tree, eventually get an ID
94 180         1261 my $self = $class->SUPER::new( %default_constructor_args, @_ );
95 180         931 return $self;
96 31     31   220 }
  31         61  
  31         178  
97              
98             =item new_from_bioperl()
99              
100             Tree constructor from Bio::Tree::TreeI argument.
101              
102             Type : Constructor
103             Title : new_from_bioperl
104             Usage : my $tree =
105             Bio::Phylo::Forest::Tree->new_from_bioperl(
106             $bptree
107             );
108             Function: Instantiates a
109             Bio::Phylo::Forest::Tree object.
110             Returns : A Bio::Phylo::Forest::Tree object.
111             Args : A tree that implements Bio::Tree::TreeI
112              
113             =cut
114              
115             sub new_from_bioperl {
116 0     0 1 0 my ( $class, $bptree ) = @_;
117 0         0 my $self;
118 0 0 0     0 if ( blessed $bptree && $bptree->isa('Bio::Tree::TreeI') ) {
119 0         0 $self = $fac->create_tree;
120             # bless $self, $class;
121 0         0 $self = $self->_recurse( $bptree->get_root_node );
122              
123             # copy name
124 0         0 my $name = $bptree->id;
125 0 0       0 $self->set_name($name) if defined $name;
126              
127             # copy score
128 0         0 my $score = $bptree->score;
129 0 0       0 $self->set_score($score) if defined $score;
130             }
131             else {
132 0         0 throw 'ObjectMismatch' => 'Not a bioperl tree!';
133             }
134 0         0 return $self;
135             }
136              
137             =begin comment
138              
139             Type : Internal method
140             Title : _recurse
141             Usage : $tree->_recurse( $bpnode );
142             Function: Traverses a bioperl tree, instantiates a Bio::Phylo::Forest::Node
143             object for every Bio::Tree::NodeI object it encounters, copying
144             the parent, sibling and child relationships.
145             Returns : None (modifies invocant).
146             Args : A Bio::Tree::NodeI object.
147              
148             =end comment
149              
150             =cut
151              
152             sub _recurse {
153 0     0   0 my ( $self, $bpnode, $parent ) = @_;
154 0         0 my $node = Bio::Phylo::Forest::Node->new_from_bioperl($bpnode);
155 0 0       0 if ($parent) {
156 0         0 $parent->set_child($node);
157             }
158 0         0 $self->insert($node);
159 0         0 foreach my $bpchild ( $bpnode->each_Descendent ) {
160 0         0 $self->_recurse( $bpchild, $node );
161             }
162 0         0 return $self;
163             }
164              
165             =begin comment
166              
167             Type : Internal method
168             Title : _analyze
169             Usage : $tree->_analyze;
170             Function: Traverses the tree, creates references to first_daughter,
171             last_daughter, next_sister and previous_sister.
172             Returns : A Bio::Phylo::Forest::Tree object.
173             Args : none.
174             Comments: This method only looks at the parent, so theoretically
175             one could mess around with the
176             Bio::Phylo::Forest::Node::set_parent(Bio::Phylo::Forest::Node) method and
177             subsequently call Bio::Phylo::Forest::Tree::_analyze to overwrite old
178             (and wrong) child and sister references with new (and correct) ones.
179              
180             =end comment
181              
182             =cut
183              
184             sub _analyze {
185 9     9   35 my $tree = $_[0];
186 9         47 my $nodes = $tree->get_entities;
187 9         27 foreach ( @{$nodes} ) {
  9         37  
188 229         561 $_->set_next_sister();
189 229         491 $_->set_previous_sister();
190 229         485 $_->set_first_daughter();
191 229         406 $_->set_last_daughter();
192             }
193 9         40 my ( $first, $next );
194              
195             # mmmm... O(N^2)
196 9         24 NODE: for my $i ( 0 .. $#{$nodes} ) {
  9         45  
197 229         328 $first = $nodes->[$i];
198 229         449 for my $j ( ( $i + 1 ) .. $#{$nodes} ) {
  229         499  
199 2081         2538 $next = $nodes->[$j];
200 2081         3302 my ( $firstp, $nextp ) =
201             ( $first->get_parent, $next->get_parent );
202 2081 100 66     7023 if ( $firstp && $nextp && $firstp == $nextp ) {
      100        
203 115 100       258 if ( !$first->get_next_sister ) {
204 6         33 $first->set_next_sister($next);
205             }
206 115 100       276 if ( !$next->get_previous_sister ) {
207 6         38 $next->set_previous_sister($first);
208             }
209 115         284 next NODE;
210             }
211             }
212             }
213              
214             # O(N)
215 9         28 foreach ( @{$nodes} ) {
  9         34  
216 229         449 my $p = $_->get_parent;
217 229 100       425 if ($p) {
218 220 100       459 if ( !$_->get_next_sister ) {
219 105         291 $p->set_last_daughter($_);
220 105         202 next;
221             }
222 115 100       242 if ( !$_->get_previous_sister ) {
223 105         213 $p->set_first_daughter($_);
224             }
225             }
226             }
227 9         16311 return $tree;
228             }
229              
230             =back
231              
232             =head2 QUERIES
233              
234             =over
235              
236             =item get_midpoint()
237              
238             Gets node that divides tree into two distance-balanced partitions.
239              
240             Type : Query
241             Title : get_midpoint
242             Usage : my $midpoint = $tree->get_midpoint;
243             Function: Gets node nearest to the middle of the longest path
244             Returns : A Bio::Phylo::Forest::Node object.
245             Args : NONE
246             Comments: This algorithm was ported from ETE.
247             It assumes the tree has branch lengths.
248              
249             =cut
250              
251             sub get_midpoint {
252 1     1 1 534 my $self = shift;
253 1         6 my $root = $self->get_root;
254 1         10 my $nA = $self->get_tallest_tip;
255 1         5 my $nB = $nA->get_farthest_node(1);
256 1 50       6 $logger->error("no farthest node!") unless $nB;
257 1         7 my $A2B_dist = $nA->calc_path_to_root + $nB->calc_path_to_root;
258 1         3 my $outgroup = $nA;
259 1         4 my $middist = $A2B_dist / 2;
260 1         2 my $cdist = 0;
261 1         2 my $current = $nA;
262 1         5 while ($current) {
263              
264 2 100       8 if ( $cdist > $middist ) {
265 1         4 last;
266             }
267             else {
268 1 50       5 if ( my $parent = $current->get_parent ) {
269 1         4 $cdist += $current->get_branch_length;
270 1         5 $current = $parent;
271             }
272             else {
273 0         0 last;
274             }
275             }
276             }
277 1         9 return $current;
278             }
279              
280             =item get_terminals()
281              
282             Get terminal nodes.
283              
284             Type : Query
285             Title : get_terminals
286             Usage : my @terminals = @{ $tree->get_terminals };
287             Function: Retrieves all terminal nodes in
288             the Bio::Phylo::Forest::Tree object.
289             Returns : An array reference of
290             Bio::Phylo::Forest::Node objects.
291             Args : NONE
292             Comments: If the tree is valid, this method
293             retrieves the same set of nodes as
294             $node->get_terminals($root). However,
295             because there is no recursion it may
296             be faster. Also, the node method by
297             the same name does not see orphans.
298              
299             =cut
300              
301             sub get_terminals {
302 151     151 1 15872 my $self = shift;
303 151         270 my @terminals;
304 151 100       533 if ( my $root = $self->get_root ) {
305             $root->visit_level_order(
306             sub {
307 5109     5109   6186 my $node = shift;
308 5109 100       9191 if ( $node->is_terminal ) {
309 2721         5685 push @terminals, $node;
310             }
311             }
312 121         1070 );
313             }
314             else {
315             $self->visit(
316             sub {
317 0     0   0 my $n = shift;
318 0 0       0 if ( $n->is_terminal ) {
319 0         0 push @terminals, $n;
320             }
321             }
322 30         185 );
323             }
324 151         1153 return \@terminals;
325             }
326              
327             =item get_internals()
328              
329             Get internal nodes.
330              
331             Type : Query
332             Title : get_internals
333             Usage : my @internals = @{ $tree->get_internals };
334             Function: Retrieves all internal nodes
335             in the Bio::Phylo::Forest::Tree object.
336             Returns : An array reference of
337             Bio::Phylo::Forest::Node objects.
338             Args : NONE
339             Comments: If the tree is valid, this method
340             retrieves the same set of nodes as
341             $node->get_internals($root). However,
342             because there is no recursion it may
343             be faster. Also, the node method by
344             the same name does not see orphans.
345              
346             =cut
347              
348             sub get_internals {
349 13     13 1 36 my $self = shift;
350 13         28 my @internals = grep { scalar @{ $_->get_children } } @{ $self->get_entities };
  184         275  
  184         397  
  13         68  
351 13         46 return \@internals;
352             }
353              
354             =item get_cherries()
355              
356             Get all cherries, i.e. nodes that have two terminal children
357              
358             Type : Query
359             Title : get_cherries
360             Usage : my @cherries = @{ $tree->get_cherries };
361             Function: Returns an array ref of cherries
362             Returns : ARRAY
363             Args : NONE
364              
365             =cut
366              
367             sub get_cherries {
368 1     1 1 30 my $self = shift;
369 1         4 my @cherries;
370 1         5 for my $node ( @{ $self->get_entities } ) {
  1         7  
371 10         22 my @children = @{ $node->get_children };
  10         37  
372            
373             # node has to be bifurcating
374 10 100       40 if ( scalar(@children) == 2 ) {
375            
376             # both children need to be tips
377 3 50 66     7 if ( not @{ $children[0]->get_children } and not @{ $children[1]->get_children } ) {
  3         13  
  2         10  
378 2         10 push @cherries, $node;
379             }
380             }
381             }
382 1         9 return \@cherries;
383             }
384              
385             =item get_all_rootings()
386              
387             Gets a forest of all rooted versions of the invocant tree.
388              
389             Type : Query
390             Title : get_all_rootings
391             Usage : my $forest = $tree->get_all_rootings;
392             Function: Returns an array ref of cherries
393             Returns : Bio::Phylo::Forest object
394             Args : NONE
395             Comments: This method assumes the invocant tree has a basal trichotomy.
396             "Rooted" trees with a basal bifurcation will give strange
397             results.
398              
399             =cut
400              
401             sub get_all_rootings {
402 0     0 1 0 my $self = shift;
403 0         0 my $forest = $fac->create_forest;
404            
405             # iterate over all nodes
406 0         0 my $i = 0;
407             $self->visit(sub{
408            
409             # clone the tree
410 0     0   0 my $clone = $self->clone;
411 0         0 my $node = $clone->get_by_index($i++);
412 0         0 my $anc = $node->get_ancestors;
413            
414             # create the new root if node isn't already root
415 0 0       0 if ( $anc->[0] ) {
416 0         0 my $nroot = $fac->create_node;
417 0         0 $nroot->set_child($node);
418 0         0 $clone->insert($nroot);
419 0 0       0 $anc->[0]->delete($node) if $anc->[0];
420            
421             # flip the nodes on the path to the root
422 0         0 for my $j ( 0 .. $#{ $anc } ) {
  0         0  
423 0         0 $nroot->set_child($anc->[$j]);
424 0         0 $nroot = $anc->[$j];
425             }
426 0         0 $forest->insert($clone);
427             }
428 0         0 });
429 0         0 return $forest;
430             }
431              
432            
433             =item get_root()
434              
435             Get root node.
436              
437             Type : Query
438             Title : get_root
439             Usage : my $root = $tree->get_root;
440             Function: Returns the root node.
441             Returns : Bio::Phylo::Forest::Node
442             Args : NONE
443              
444             =cut
445              
446             sub get_root {
447 2107     2107 1 85196 my $self = shift;
448            
449             # the simplest approach: look for nodes without parents
450 2107         3354 my ($root) = grep { ! $_->get_parent } @{ $self->get_entities };
  575127         945344  
  2107         4676  
451 2107 100       4287 if ( $root ) {
452 2048         5754 return $root;
453             }
454            
455             else {
456 59         81 my ( %children_of, %node_by_id );
457 59         63 for my $node ( @{ $self->get_entities } ) {
  59         103  
458 0         0 $node_by_id{ $node->get_id } = $node;
459 0 0       0 if ( my $parent = $node->get_parent ) {
460 0         0 my $parent_id = $parent->get_id;
461 0 0       0 $children_of{$parent_id} = [] if not $children_of{$parent_id};
462 0         0 push @{ $children_of{$parent_id} }, $node;
  0         0  
463             }
464             else {
465 0         0 return $node;
466             }
467             }
468 59         111 for my $parent ( keys %children_of ) {
469 0 0       0 if ( not exists $node_by_id{$parent} ) {
470 0         0 my @children = @{ $children_of{$parent} };
  0         0  
471 0 0       0 if ( scalar @children > 1 ) {
472 0         0 $logger->warn("Tree has multiple roots");
473             }
474 0         0 return shift @children;
475             }
476             }
477 59         156 return;
478             }
479             }
480              
481             =item get_ntax()
482              
483             Gets number of tips
484              
485             Type : Query
486             Title : get_ntax
487             Usage : my $ntax = $tree->get_ntax;
488             Function: Calculates the number of terminal nodes
489             Returns : Int
490             Args : NONE
491              
492             =cut
493            
494 0     0 1 0 sub get_ntax { scalar(@{ shift->get_terminals } ) }
  0         0  
495              
496             =item get_tallest_tip()
497              
498             Retrieves the node furthest from the root.
499              
500             Type : Query
501             Title : get_tallest_tip
502             Usage : my $tip = $tree->get_tallest_tip;
503             Function: Retrieves the node furthest from the
504             root in the current Bio::Phylo::Forest::Tree
505             object.
506             Returns : Bio::Phylo::Forest::Node
507             Args : NONE
508             Comments: If the tree has branch lengths, the tallest tip is
509             based on root-to-tip path length, else it is based
510             on number of nodes to root
511              
512             =cut
513              
514             sub get_tallest_tip {
515 3     3 1 6 my $self = shift;
516 3         4 my $criterion;
517              
518             # has (at least some) branch lengths
519 3 50       15 if ( $self->calc_tree_length ) {
520 3         6 $criterion = 'calc_path_to_root';
521             }
522             else {
523 0         0 $criterion = 'calc_nodes_to_root';
524             }
525 3         7 my $tallest;
526 3         13 my $height = 0;
527 3         6 for my $tip ( @{ $self->get_terminals } ) {
  3         13  
528 16 50       42 if ( my $path = $tip->$criterion ) {
529 16 100       41 if ( $path > $height ) {
530 4         10 $tallest = $tip;
531 4         8 $height = $path;
532             }
533             }
534             }
535 3         17 return $tallest;
536             }
537              
538             =item get_nodes_for_taxa()
539              
540             Gets node objects for the supplied taxon objects
541              
542             Type : Query
543             Title : get_nodes_for_taxa
544             Usage : my @nodes = @{ $tree->get_nodes_for_taxa(\@taxa) };
545             Function: Gets node objects for the supplied taxon objects
546             Returns : array ref of Bio::Phylo::Forest::Node objects
547             Args : A reference to an array of Bio::Phylo::Taxa::Taxon objects
548             or a Bio::Phylo::Taxa object
549              
550             =cut
551              
552             sub get_nodes_for_taxa {
553 5     5 1 9 my ( $self, $taxa ) = @_;
554 5         7 my ( $is_taxa, $taxa_objs );
555 5         8 eval { $is_taxa = looks_like_object $taxa, _TAXA_ };
  5         11  
556 5 100 66     16 if ( $is_taxa and not $@ ) {
557 1         4 $taxa_objs = $taxa->get_entities;
558             }
559             else {
560 4         6 $taxa_objs = $taxa;
561             }
562 5         6 my %ids = map { $_->get_id => 1 } @{$taxa_objs};
  11         21  
  5         11  
563 5         9 my @nodes;
564 5         8 for my $node ( @{ $self->get_entities } ) {
  5         11  
565 25 100       44 if ( my $taxon = $node->get_taxon ) {
566 15 100       31 push @nodes, $node if $ids{ $taxon->get_id };
567             }
568             }
569 5         14 return \@nodes;
570             }
571              
572             =item get_mrca()
573              
574             Get most recent common ancestor of argument nodes.
575              
576             Type : Query
577             Title : get_mrca
578             Usage : my $mrca = $tree->get_mrca(\@nodes);
579             Function: Retrieves the most recent
580             common ancestor of \@nodes
581             Returns : Bio::Phylo::Forest::Node
582             Args : A reference to an array of
583             Bio::Phylo::Forest::Node objects
584             in $tree.
585              
586             =cut
587              
588             sub get_mrca {
589 0     0 1 0 my ( $tree, $nodes ) = @_;
590 0 0 0     0 if ( not $nodes or not @{$nodes} ) {
    0          
591 0         0 return;
592             }
593 0         0 elsif ( scalar @{$nodes} == 1 ) {
594 0         0 return $nodes->[0];
595             }
596             else {
597 0         0 my $node1 = shift @{$nodes};
  0         0  
598 0         0 my $node2 = shift @{$nodes};
  0         0  
599 0         0 my $anc1 = $node1->get_ancestors;
600 0         0 my $anc2 = $node2->get_ancestors;
601 0         0 unshift @{$anc1}, $node1;
  0         0  
602 0         0 unshift @{$anc2}, $node2;
  0         0  
603 0         0 TRAVERSAL: for my $i ( 0 .. $#{$anc1} ) {
  0         0  
604 0         0 for my $j ( 0 .. $#{$anc2} ) {
  0         0  
605 0 0       0 if ( $anc1->[$i]->get_id == $anc2->[$j]->get_id ) {
606 0         0 unshift @{$nodes}, $anc1->[$i];
  0         0  
607 0         0 last TRAVERSAL;
608             }
609             }
610             }
611 0         0 return $tree->get_mrca($nodes);
612             }
613             }
614              
615             =back
616              
617             =head2 TESTS
618              
619             =over
620              
621             =item is_binary()
622              
623             Test if tree is bifurcating.
624              
625             Type : Test
626             Title : is_binary
627             Usage : if ( $tree->is_binary ) {
628             # do something
629             }
630             Function: Tests whether the invocant
631             object is bifurcating.
632             Returns : BOOLEAN
633             Args : NONE
634              
635             =cut
636              
637             sub is_binary {
638 2     2 1 4 my $self = shift;
639 2         5 my $return = 1;
640             $self->visit(sub{
641 33     33   36 my $count = scalar(@{ shift->get_children });
  33         59  
642 33 100 100     92 $return = 0 if $count != 0 && $count != 2;
643 2         14 });
644 2         13 $return;
645             }
646              
647             =item is_ultrametric()
648              
649             Test if tree is ultrametric.
650              
651             Type : Test
652             Title : is_ultrametric
653             Usage : if ( $tree->is_ultrametric(0.01) ) {
654             # do something
655             }
656             Function: Tests whether the invocant is
657             ultrametric.
658             Returns : BOOLEAN
659             Args : Optional margin between pairwise
660             comparisons (default = 0).
661             Comments: The test is done by performing
662             all pairwise comparisons for
663             root-to-tip path lengths. Since many
664             programs introduce rounding errors
665             in branch lengths the optional argument is
666             available to test TRUE for nearly
667             ultrametric trees. For example, a value
668             of 0.01 indicates that no pairwise
669             comparison may differ by more than 1%.
670             Note: behaviour is undefined for
671             negative branch lengths.
672              
673             =cut
674              
675             sub is_ultrametric {
676 48     48 1 6000 my $tree = shift;
677 48   100     163 my $margin = shift || 0;
678 48         86 my ( @tips, %path );
679             $tree->visit_depth_first(
680             '-pre' => sub {
681 916     916   1243 my $node = shift;
682 916 100       1815 if ( my $parent = $node->get_parent ) {
683             $path{ $node->get_id } =
684 868   100     1566 $path{ $parent->get_id } +
685             ( $node->get_branch_length || 0 );
686             }
687             else {
688 48   100     144 $path{ $node->get_id } = $node->get_branch_length || 0;
689             }
690 916 100       2301 push @tips, $node if $node->is_terminal;
691             }
692 48         415 );
693 48         385 for my $i ( 0 .. ( $#tips - 1 ) ) {
694 410         727 my $id1 = $tips[$i]->get_id;
695 410         721 PATH: for my $j ( $i + 1 .. $#tips ) {
696 2196         3523 my $id2 = $tips[$j]->get_id;
697 2196 100       3644 next PATH unless $path{$id2};
698 2181 100       4463 return 0 if abs( 1 - $path{$id1} / $path{$id2} ) > $margin;
699             }
700             }
701 44         304 return 1;
702             }
703              
704             =item is_monophyletic()
705              
706             Tests if first argument (node array ref) is monophyletic with respect
707             to second argument.
708              
709             Type : Test
710             Title : is_monophyletic
711             Usage : if ( $tree->is_monophyletic(\@tips, $node) ) {
712             # do something
713             }
714             Function: Tests whether the set of \@tips is
715             monophyletic w.r.t. $outgroup.
716             Returns : BOOLEAN
717             Args : A reference to a list of nodes, and a node.
718             Comments: This method is essentially the
719             same as
720             &Bio::Phylo::Forest::Node::is_outgroup_of.
721              
722             =cut
723              
724             sub is_monophyletic {
725 2     2 1 11 my $tree = shift;
726 2         6 my ( $nodes, $outgroup );
727 2 50       11 if ( @_ == 2 ) {
    0          
728 2         7 ( $nodes, $outgroup ) = @_;
729             }
730             elsif ( @_ == 4 ) {
731 0         0 my %args = @_;
732 0         0 $nodes = $args{'-nodes'};
733 0         0 $outgroup = $args{'-outgroup'};
734             }
735 2         6 for my $i ( 0 .. $#{$nodes} ) {
  2         10  
736 4         9 for my $j ( ( $i + 1 ) .. $#{$nodes} ) {
  4         8  
737 4         21 my $mrca = $nodes->[$i]->get_mrca( $nodes->[$j] );
738 4 100       13 return if $mrca->is_ancestor_of($outgroup);
739             }
740             }
741 1         8 return 1;
742             }
743              
744             =item is_paraphyletic()
745              
746             Type : Test
747             Title : is_paraphyletic
748             Usage : if ( $tree->is_paraphyletic(\@nodes,$node) ){ }
749             Function: Tests whether or not a given set of nodes are paraphyletic
750             (representing the full clade) given an outgroup
751             Returns : [-1,0,1] , -1 if the group is not monophyletic
752             0 if the group is not paraphyletic
753             1 if the group is paraphyletic
754             Args : Array ref of node objects which are in the tree,
755             Outgroup to compare the nodes to
756              
757             =cut
758              
759             sub is_paraphyletic {
760 0     0 1 0 my $tree = shift;
761 0         0 my ( $nodes, $outgroup );
762 0 0       0 if ( @_ == 2 ) {
    0          
763 0         0 ( $nodes, $outgroup ) = @_;
764             }
765             elsif ( @_ == 4 ) {
766 0         0 my %args = @_;
767 0         0 $nodes = $args{'-nodes'};
768 0         0 $outgroup = $args{'-outgroup'};
769             }
770 0 0       0 return -1 if !$tree->is_monophyletic( $nodes, $outgroup );
771 0         0 my @all = ( @{$nodes}, $outgroup );
  0         0  
772 0         0 my $mrca = $tree->get_mrca( \@all );
773 0         0 my $tips = $mrca->get_terminals;
774 0 0       0 return scalar @{$tips} == scalar @all ? 0 : 1;
  0         0  
775             }
776              
777             =item is_clade()
778              
779             Tests if argument (node array ref) forms a clade.
780              
781             Type : Test
782             Title : is_clade
783             Usage : if ( $tree->is_clade(\@tips) ) {
784             # do something
785             }
786             Function: Tests whether the set of
787             \@tips forms a clade
788             Returns : BOOLEAN
789             Args : A reference to an array of Bio::Phylo::Forest::Node objects, or a
790             reference to an array of Bio::Phylo::Taxa::Taxon objects, or a
791             Bio::Phylo::Taxa object
792             Comments:
793              
794             =cut
795              
796             sub is_clade {
797 3     3 1 6 my ( $tree, $arg ) = @_;
798 3         6 my ( $is_taxa, $is_node_array, $tips );
799              
800             # check if arg is a Taxa object
801 3         5 eval { $is_taxa = looks_like_object $arg, _TAXA_ };
  3         36  
802 3 50 33     11 if ( $is_taxa and not $@ ) {
803 0         0 $tips = $tree->get_nodes_for_taxa($arg);
804             }
805              
806             # check if arg is an array of Taxon object
807 3         4 eval { $is_node_array = looks_like_object $arg->[0], _TAXON_ };
  3         16  
808 3 50 33     12 if ( $is_node_array and not $@ ) {
809 3         11 $tips = $tree->get_nodes_for_taxa($arg);
810             }
811             else {
812 0         0 $tips = $arg; # arg is an array of Node objects
813             }
814 3         4 my $mrca;
815 3         5 for my $i ( 1 .. $#{$tips} ) {
  3         8  
816 3 50       15 $mrca ? $mrca = $mrca->get_mrca( $tips->[$i] ) : $mrca =
817             $tips->[0]->get_mrca( $tips->[$i] );
818             }
819 3 100       4 scalar @{ $mrca->get_terminals } == scalar @{$tips} ? return 1 : return;
  3         8  
  3         18  
820             }
821              
822             =item is_cladogram()
823              
824             Tests if tree is a cladogram (i.e. no branch lengths)
825              
826             Type : Test
827             Title : is_cladogram
828             Usage : if ( $tree->is_cladogram() ) {
829             # do something
830             }
831             Function: Tests whether the tree is a
832             cladogram (i.e. no branch lengths)
833             Returns : BOOLEAN
834             Args : NONE
835             Comments:
836              
837             =cut
838              
839             sub is_cladogram {
840 1     1 1 2 my $tree = shift;
841 1         2 for my $node ( @{ $tree->get_entities } ) {
  1         3  
842 0 0       0 return 0 if defined $node->get_branch_length;
843             }
844 1         5 return 1;
845             }
846              
847             =back
848              
849             =head2 CALCULATIONS
850              
851             =over
852              
853             =item calc_branch_length_distance()
854              
855             Calculates the Euclidean branch length distance between two trees. See
856             Kuhner & Felsenstein (1994). A simulation comparison of phylogeny algorithms
857             under equal and uneqaul evolutionary rates. MBE 11(3):459-468.
858              
859             Type : Calculation
860             Title : calc_branch_length_distance
861             Usage : my $distance =
862             $tree1->calc_branch_length_distance($tree2);
863             Function: Calculates the Euclidean branch length distance between two trees
864             Returns : SCALAR, number
865             Args : NONE
866              
867             =cut
868              
869             #=item calc_robinson_foulds_distance()
870             #
871             #Calculates the Robinson and Foulds distance between two trees.
872             #
873             # Type : Calculation
874             # Title : calc_robinson_foulds_distance
875             # Usage : my $distance =
876             # $tree1->calc_robinson_foulds_distance($tree2);
877             # Function: Calculates the Robinson and Foulds distance between two trees
878             # Returns : SCALAR, number
879             # Args : NONE
880             #
881             #=cut
882             #
883             # sub calc_robinson_foulds_distance {
884             # my ( $self, $other ) = @_;
885             # my $tuples = $self->_calc_branch_diffs($other);
886             # my $sum = 0;
887             # for my $tuple ( @{ $tuples } ) {
888             # my $diff = $tuple->[0] - $tuple->[1];
889             # $sum += abs $diff;
890             # }
891             # return $sum;
892             # }
893             sub calc_branch_length_distance {
894 1     1 1 3 my ( $self, $other ) = @_;
895 1         5 my $squared = $self->calc_branch_length_score($other);
896 1         7 return sqrt($squared);
897             }
898              
899             =item calc_branch_length_score()
900              
901             Calculates the squared Euclidean branch length distance between two trees.
902              
903             Type : Calculation
904             Title : calc_branch_length_score
905             Usage : my $score =
906             $tree1->calc_branch_length_score($tree2);
907             Function: Calculates the squared Euclidean branch
908             length distance between two trees
909             Returns : SCALAR, number
910             Args : A Bio::Phylo::Forest::Tree object,
911             Optional second argument flags that results should be normalized
912             =cut
913              
914             sub calc_branch_length_score {
915 2     2 1 6 my ( $self, $other, $normalize ) = @_;
916 2         13 my $tuples = $self->_calc_branch_diffs($other);
917 2         6 my $sum = 0;
918 2         4 for my $tuple ( @{$tuples} ) {
  2         6  
919 5   50     21 my $diff = ( $tuple->[0] || 0 ) - ( $tuple->[1] || 0 );
      50        
920 5         14 $sum += $diff**2;
921             }
922 2 50       17 return $normalize ? $sum / scalar(@{$tuples}) : $sum;
  0         0  
923             }
924              
925              
926             =begin comment
927              
928             Returns an array ref containing array references, with the first element of
929             each nested array ref representing the length of the branch subtending a
930             particular split on the invocant (or 0), the second element the length of the
931             same branch on argument (or 0), the third element a boolean to indicate whether
932             the split was present in both trees, and the fourth element a sorted, comma-separated
933             list of the MD5-hashed names of all tips subtended by that split.
934              
935             Type : Calculation
936             Title : calc_branch_diffs
937             Usage : my $triples =
938             $tree1->calc_branch_diffs($tree2);
939             Function: Creates two-dimensional array of equivalent branch lengths
940             Returns : Two-dimensional array (triples)
941             Args : NONE
942              
943             =end comment
944              
945             =cut
946              
947             sub _calc_branch_diffs {
948 23     23   54 my ( $self, $other ) = @_;
949              
950             # we create an anonymous subroutine which
951             # we will apply to $self and $other
952             my $length_for_split_creator = sub {
953              
954             # so this will be $self and $other
955 46     46   84 my $tree = shift;
956              
957             # keys will be hashed, comma-separated tip names,
958             # values will be branch lengths
959 46         91 my %length_for_split;
960              
961             # this will assemble the comma-separated,
962             # hashed tip names
963             my %hash_for_node;
964              
965             # post-order traversal, so tips are processed first
966             $tree->visit_depth_first(
967             '-post' => sub {
968 671         873 my $node = shift;
969 671         1108 my $id = $node->get_id;
970 671         847 my @children = @{ $node->get_children };
  671         1093  
971 671         917 my $hash;
972              
973             # we only enter into this case AFTER tips
974             # have been processed, so %hash_for_node
975             # values will be assigned for all children
976 671 100 100     1432 if (@children and $node->get_parent) {
977              
978             # these will be growing lists from
979             # tips to root
980             my $unsorted = join ',',
981 265         433 map { $hash_for_node{ $_->get_id } } @children;
  531         852  
982              
983             # we need to split, sort and join
984             # so that splits where the subtended,
985             # higher topology is different still
986             # yield the same concatenated hash
987 265         1126 $hash = join ',', sort { $a cmp $b } split /,/,
  3051         3851  
988             $unsorted;
989              
990             # coerce to a numeric type
991 265   100     649 $length_for_split{$hash} = $node->get_branch_length || 0;
992             }
993             else {
994              
995             # this is how we ensure that every tip name is a
996             # single, unique string without unexpected characters
997             # (especially, commas).
998             # Digest::MD5 was in CORE since 5.7
999 406         1471 require Digest::MD5;
1000 406         912 $hash = Digest::MD5::md5( $node->get_name );
1001             }
1002              
1003             # store for the next recursion
1004 671         2102 $hash_for_node{$id} = $hash;
1005             }
1006 46         297 );
1007              
1008             # this is the return value for the anonymous sub
1009 46         550 return %length_for_split;
1010 23         152 };
1011              
1012             # here we execute the anonymous sub. twice.
1013 23         71 my %lengths_self = $length_for_split_creator->($self);
1014 23         116 my %lengths_other = $length_for_split_creator->($other);
1015 23         58 my @tuples;
1016              
1017             # first visit the splits in $self, which will identify
1018             # those it shares with $other and those missing in $other
1019 23         84 for my $split ( keys %lengths_self ) {
1020 133         150 my $tuple;
1021 133 100       194 if ( exists $lengths_other{$split} ) {
1022             $tuple =
1023 128   100     302 [ $lengths_self{$split}, $lengths_other{$split} || 0, 1, $split ];
1024             }
1025             else {
1026 5         13 $tuple = [ $lengths_self{$split}, 0, 0, $split ];
1027             }
1028 133         222 push @tuples, $tuple;
1029             }
1030              
1031             # then check if there are splits in $other but not in $self
1032 23         83 for my $split ( keys %lengths_other ) {
1033 132 100       216 if ( not exists $lengths_self{$split} ) {
1034 4         8 push @tuples, [ 0, $lengths_other{$split}, 0, $split ];
1035             }
1036             }
1037 23         143 return \@tuples;
1038             }
1039              
1040             =item calc_tree_length()
1041              
1042             Calculates the sum of all branch lengths.
1043              
1044             Type : Calculation
1045             Title : calc_tree_length
1046             Usage : my $tree_length =
1047             $tree->calc_tree_length;
1048             Function: Calculates the sum of all branch
1049             lengths (i.e. the tree length).
1050             Returns : FLOAT
1051             Args : NONE
1052              
1053             =cut
1054              
1055             sub calc_tree_length {
1056 40     40 1 62 my $self = shift;
1057 40         56 my $tl = 0;
1058             $self->visit(sub{
1059 628   100 628   1007 $tl += shift->get_branch_length || 0;
1060 40         224 });
1061 40         228 return $tl;
1062             }
1063              
1064             =item calc_tree_height()
1065              
1066             Calculates the height of the tree.
1067              
1068             Type : Calculation
1069             Title : calc_tree_height
1070             Usage : my $tree_height =
1071             $tree->calc_tree_height;
1072             Function: Calculates the height
1073             of the tree.
1074             Returns : FLOAT
1075             Args : NONE
1076             Comments: For ultrametric trees this
1077             method returns the height, but
1078             this is done by averaging over
1079             all root-to-tip path lengths, so
1080             for additive trees the result
1081             should consequently be interpreted
1082             differently.
1083              
1084             =cut
1085              
1086             sub calc_tree_height {
1087 3     3 1 9 my $self = shift;
1088 3         24 my $th = $self->calc_total_paths / $self->calc_number_of_terminals;
1089 3         18 return $th;
1090             }
1091              
1092             =item calc_number_of_nodes()
1093              
1094             Calculates the number of nodes.
1095              
1096             Type : Calculation
1097             Title : calc_number_of_nodes
1098             Usage : my $number_of_nodes =
1099             $tree->calc_number_of_nodes;
1100             Function: Calculates the number of
1101             nodes (internals AND terminals).
1102             Returns : INT
1103             Args : NONE
1104              
1105             =cut
1106              
1107             sub calc_number_of_nodes {
1108 1     1 1 4 my $self = shift;
1109 1         2 my $numnodes = scalar @{ $self->get_entities };
  1         6  
1110 1         10 return $numnodes;
1111             }
1112              
1113             =item calc_number_of_terminals()
1114              
1115             Calculates the number of terminal nodes.
1116              
1117             Type : Calculation
1118             Title : calc_number_of_terminals
1119             Usage : my $number_of_terminals =
1120             $tree->calc_number_of_terminals;
1121             Function: Calculates the number
1122             of terminal nodes.
1123             Returns : INT
1124             Args : NONE
1125              
1126             =cut
1127              
1128             sub calc_number_of_terminals {
1129 43     43 1 77 my $self = shift;
1130 43         71 my $numterm = scalar @{ $self->get_terminals };
  43         125  
1131 43         147 return $numterm;
1132             }
1133              
1134             =item calc_number_of_internals()
1135              
1136             Calculates the number of internal nodes.
1137              
1138             Type : Calculation
1139             Title : calc_number_of_internals
1140             Usage : my $number_of_internals =
1141             $tree->calc_number_of_internals;
1142             Function: Calculates the number
1143             of internal nodes.
1144             Returns : INT
1145             Args : NONE
1146              
1147             =cut
1148              
1149             sub calc_number_of_internals {
1150 5     5 1 15 my $self = shift;
1151 5         13 my $numint = scalar @{ $self->get_internals };
  5         23  
1152 5         28 return $numint;
1153             }
1154              
1155             =item calc_number_of_cherries()
1156              
1157             Calculates the number of cherries, i.e. the number of nodes that subtend
1158             exactly two tips. See for applications of this metric:
1159             L
1160              
1161             Type : Calculation
1162             Title : calc_number_of_cherries
1163             Usage : my $number_of_cherries =
1164             $tree->calc_number_of_cherries;
1165             Function: Calculates the number of cherries
1166             Returns : INT
1167             Args : NONE
1168              
1169             =cut
1170              
1171             sub calc_number_of_cherries {
1172 1     1 1 3 my $self = shift;
1173 1         3 my %cherry;
1174 1         2 for my $tip ( @{ $self->get_terminals } ) {
  1         6  
1175 4 50       20 if ( my $parent = $tip->get_parent ) {
1176 4 50       35 if ( $parent->is_preterminal ) {
1177 4         18 my $children = $parent->get_children;
1178 4 50       12 if ( scalar @{$children} == 2 ) {
  4         16  
1179 4         16 $cherry{ $parent->get_id }++;
1180             }
1181             }
1182             }
1183             }
1184 1         10 my @cherry_ids = keys %cherry;
1185 1         15 return scalar @cherry_ids;
1186             }
1187              
1188             =item calc_total_paths()
1189              
1190             Calculates the sum of all root-to-tip path lengths.
1191              
1192             Type : Calculation
1193             Title : calc_total_paths
1194             Usage : my $total_paths =
1195             $tree->calc_total_paths;
1196             Function: Calculates the sum of all
1197             root-to-tip path lengths.
1198             Returns : FLOAT
1199             Args : NONE
1200              
1201             =cut
1202              
1203             sub calc_total_paths {
1204 4     4 1 9 my $self = shift;
1205 4         9 my $tp = 0;
1206 4         8 foreach ( @{ $self->get_terminals } ) {
  4         30  
1207 36         90 $tp += $_->calc_path_to_root;
1208             }
1209 4         34 return $tp;
1210             }
1211              
1212             =item calc_redundancy()
1213              
1214             Calculates the amount of shared (redundant) history on the total.
1215              
1216             Type : Calculation
1217             Title : calc_redundancy
1218             Usage : my $redundancy =
1219             $tree->calc_redundancy;
1220             Function: Calculates the amount of shared
1221             (redundant) history on the total.
1222             Returns : FLOAT
1223             Args : NONE
1224             Comments: Redundancy is calculated as
1225             1 / ( treelength - height / ( ntax * height - height ) )
1226              
1227             =cut
1228              
1229             sub calc_redundancy {
1230 1     1 1 3 my $self = shift;
1231 1         6 my $tl = $self->calc_tree_length;
1232 1         9 my $th = $self->calc_tree_height;
1233 1         5 my $ntax = $self->calc_number_of_terminals;
1234 1         8 my $red = 1 - ( ( $tl - $th ) / ( ( $th * $ntax ) - $th ) );
1235 1         12 return $red;
1236             }
1237              
1238             =item calc_imbalance()
1239              
1240             Calculates Colless' coefficient of tree imbalance.
1241              
1242             Type : Calculation
1243             Title : calc_imbalance
1244             Usage : my $imbalance = $tree->calc_imbalance;
1245             Function: Calculates Colless' coefficient
1246             of tree imbalance.
1247             Returns : FLOAT
1248             Args : NONE
1249             Comments: As described in Colless, D.H., 1982.
1250             The theory and practice of phylogenetic
1251             systematics. Systematic Zoology 31(1): 100-104
1252              
1253             =cut
1254              
1255             sub calc_imbalance {
1256 6     6 1 19 my $self = shift;
1257 6         14 my %descendants;
1258 6         15 my $n = 0;
1259 6         11 my $sumdiff = 0;
1260             $self->visit_depth_first(
1261             '-post' => sub {
1262 119     119   153 my $node = shift;
1263 119         148 my @children = @{ $node->get_children };
  119         195  
1264            
1265             # node is internal, compute n descendants left and right
1266 119 100       233 if ( @children == 2 ) {
    100          
1267 55         77 my $li = shift @children;
1268 55         67 my $ri = shift @children;
1269 55         94 my $li_ndesc = $descendants{$li->get_id};
1270 55         96 my $ri_ndesc = $descendants{$ri->get_id};
1271 55         101 $sumdiff += abs($li_ndesc - $ri_ndesc);
1272 55         91 $descendants{$node->get_id} = $li_ndesc + $ri_ndesc;
1273             }
1274            
1275             # node is terminal, initialize tally of descendants
1276             elsif ( @children == 0 ) {
1277 63         88 $n++;
1278 63         110 $descendants{$node->get_id} = 1;
1279             }
1280            
1281             # node is either a polytomy or an unbranched internal. Can't proceed in either case.
1282             else {
1283 1         7 throw 'ObjectMismatch' => "Colless's imbalance only possible for binary trees";
1284             }
1285             }
1286 6         68 );
1287 5 50       62 if ( $n < 3 ) {
1288 0         0 $logger->error("too few nodes in tree: $n<=2");
1289 0         0 return undef;
1290             }
1291             else {
1292 5         105 return $sumdiff / ( ($n-1) * ($n-2) / 2 );
1293             }
1294             }
1295              
1296             =item calc_i2()
1297              
1298             Calculates I2 imbalance.
1299              
1300             Type : Calculation
1301             Title : calc_i2
1302             Usage : my $ci2 = $tree->calc_i2;
1303             Function: Calculates I2 imbalance.
1304             Returns : FLOAT
1305             Args : NONE
1306             Comments:
1307              
1308             =cut
1309              
1310             sub calc_i2 {
1311 0     0 1 0 my $self = shift;
1312 0         0 my ( $maxic, $sum, $I2 ) = ( 0, 0 );
1313 0 0       0 if ( !$self->is_binary ) {
1314 0         0 throw 'ObjectMismatch' => 'I2 imbalance only possible for binary trees';
1315             }
1316 0         0 my $numtips = $self->calc_number_of_terminals;
1317 0         0 $numtips -= 2;
1318 0         0 while ($numtips) {
1319 0         0 $maxic += $numtips;
1320 0         0 $numtips--;
1321             }
1322 0         0 foreach my $node ( @{ $self->get_internals } ) {
  0         0  
1323 0         0 my ( $fd, $ld, $ftips, $ltips ) =
1324             ( $node->get_first_daughter, $node->get_last_daughter, 0, 0 );
1325 0 0       0 if ( $fd->is_internal ) {
1326 0         0 foreach ( @{ $fd->get_descendants } ) {
  0         0  
1327 0 0       0 if ( $_->is_terminal ) {
1328 0         0 $ftips++;
1329             }
1330             else {
1331 0         0 next;
1332             }
1333             }
1334             }
1335             else {
1336 0         0 $ftips = 1;
1337             }
1338 0 0       0 if ( $ld->is_internal ) {
1339 0         0 foreach ( @{ $ld->get_descendants } ) {
  0         0  
1340 0 0       0 if ( $_->is_terminal ) {
1341 0         0 $ltips++;
1342             }
1343             else {
1344 0         0 next;
1345             }
1346             }
1347             }
1348             else {
1349 0         0 $ltips = 1;
1350             }
1351 0 0       0 next unless ( $ftips + $ltips - 2 );
1352 0         0 $sum += abs( $ftips - $ltips ) / abs( $ftips + $ltips - 2 );
1353             }
1354 0 0       0 if ( $maxic == 0 ) {
1355 0         0 $logger->error("too few nodes in tree: $maxic==0");
1356 0         0 return undef;
1357             }
1358             else {
1359 0         0 $I2 = $sum / $maxic;
1360 0         0 return $I2;
1361             }
1362             }
1363              
1364             =item calc_gamma()
1365              
1366             Calculates the Pybus & Harvey (2000) gamma statistic.
1367              
1368             Type : Calculation
1369             Title : calc_gamma
1370             Usage : my $gamma = $tree->calc_gamma();
1371             Function: Calculates the Pybus gamma statistic
1372             Returns : FLOAT
1373             Args : NONE
1374             Comments: As described in Pybus, O.G. and
1375             Harvey, P.H., 2000. Testing
1376             macro-evolutionary models using
1377             incomplete molecular phylogenies.
1378             Proc. R. Soc. Lond. B 267, 2267-2272
1379              
1380             =cut
1381              
1382             # code due to Aki Mimoto
1383             sub calc_gamma {
1384 0     0 1 0 my $self = shift;
1385 0         0 my $tl = $self->calc_tree_length;
1386 0         0 my $terminals = $self->get_terminals;
1387 0         0 my $n = scalar @{$terminals};
  0         0  
1388 0         0 my $height = $self->calc_tree_height;
1389              
1390             # Calculate the distance of each node to the root
1391             # my %soft_refs;
1392             # my $root = $self->get_root;
1393             # $soft_refs{$root} = 0;
1394             # my @nodes = $root;
1395             # while (@nodes) {
1396             # my $node = shift @nodes;
1397             # my $path_len = $soft_refs{$node} += $node->get_branch_length;
1398             # my $children = $node->get_children or next;
1399             # for my $child (@$children) {
1400             # $soft_refs{$child} = $path_len;
1401             # }
1402             # push @nodes, @{$children};
1403             # }
1404             # the commented out block is more efficiently implemented like so:
1405             my %soft_refs =
1406 0         0 map { $_ => $_->calc_path_to_root } @{ $self->get_entities };
  0         0  
  0         0  
1407              
1408             # Then, we know how far each node is from the root. At this point, we
1409             # can sort through and create the @g array
1410             my %node_spread =
1411 0         0 map { ( $_ => 1 ) } values %soft_refs; # remove duplicates
  0         0  
1412 0         0 my @sorted_nodes = sort { $a <=> $b } keys %node_spread;
  0         0  
1413 0         0 my $prev = 0;
1414 0         0 my @g;
1415 0         0 for my $length (@sorted_nodes) {
1416 0         0 push @g, $length - $prev;
1417 0         0 $prev = $length;
1418             }
1419 0         0 my $sum = 0;
1420 0         0 eval { require Math::BigFloat };
  0         0  
1421 0 0       0 if ($@) { # BigFloat is not available.
1422 0         0 for ( my $i = 2 ; $i < $n ; $i++ ) {
1423 0         0 for ( my $k = 2 ; $k <= $i ; $k++ ) {
1424 0         0 $sum += $k * $g[ $k - 1 ];
1425             }
1426             }
1427 0         0 my $numerator = ( $sum / ( $n - 2 ) ) - ( $tl / 2 );
1428 0         0 my $denominator = $tl * sqrt( 1 / ( 12 * ( $n - 2 ) ) );
1429 0         0 $self->_store_cache( $numerator / $denominator );
1430 0         0 return $numerator / $denominator;
1431             }
1432              
1433             # Big Float is available. We'll use it then
1434 0         0 $sum = Math::BigFloat->new(0);
1435 0         0 for ( my $i = 2 ; $i < $n ; $i++ ) {
1436 0         0 for ( my $k = 2 ; $k <= $i ; $k++ ) {
1437 0         0 $sum->badd( $k * $g[ $k - 1 ] );
1438             }
1439             }
1440 0         0 $sum->bdiv( $n - 2 );
1441 0         0 $sum->bsub( $tl / 2 );
1442 0         0 my $denominator = Math::BigFloat->new(1);
1443 0         0 $denominator->bdiv( 12 * ( $n - 2 ) );
1444 0         0 $denominator->bsqrt();
1445 0         0 $sum->bdiv( $denominator * $tl );
1446            
1447             # R seems to be unhappy about long numbers, so truncating
1448 0         0 $sum->accuracy(10);
1449 0         0 return $sum;
1450             }
1451              
1452             =item calc_fiala_stemminess()
1453              
1454             Calculates stemminess measure of Fiala and Sokal (1985).
1455              
1456             Type : Calculation
1457             Title : calc_fiala_stemminess
1458             Usage : my $fiala_stemminess =
1459             $tree->calc_fiala_stemminess;
1460             Function: Calculates stemminess measure
1461             Fiala and Sokal (1985).
1462             Returns : FLOAT
1463             Args : NONE
1464             Comments: As described in Fiala, K.L. and
1465             R.R. Sokal, 1985. Factors
1466             determining the accuracy of
1467             cladogram estimation: evaluation
1468             using computer simulation.
1469             Evolution, 39: 609-622
1470              
1471             =cut
1472              
1473             sub calc_fiala_stemminess {
1474 1     1 1 3 my $self = shift;
1475 1         4 my @internals = @{ $self->get_internals };
  1         19  
1476 1         4 my $total = 0;
1477 1         3 my $nnodes = ( scalar @internals - 1 );
1478 1         3 foreach my $node (@internals) {
1479 8 100       16 if ( $node->get_parent ) {
1480 7         13 my $desclengths = $node->get_branch_length;
1481 7         11 my @children = @{ $node->get_descendants };
  7         18  
1482 7         23 for my $child (@children) {
1483 44         82 $desclengths += $child->get_branch_length;
1484             }
1485 7         15 $total += ( $node->get_branch_length / $desclengths );
1486             }
1487             }
1488 1 50       4 if ( $nnodes ) {
1489 1         10 return $total /= $nnodes;
1490             }
1491             else {
1492 0         0 $logger->error("too few nodes in tree: n-1=$nnodes");
1493 0         0 return undef;
1494             }
1495             }
1496              
1497             =item calc_rohlf_stemminess()
1498              
1499             Calculates stemminess measure from Rohlf et al. (1990).
1500              
1501             Type : Calculation
1502             Title : calc_rohlf_stemminess
1503             Usage : my $rohlf_stemminess =
1504             $tree->calc_rohlf_stemminess;
1505             Function: Calculates stemminess measure
1506             from Rohlf et al. (1990).
1507             Returns : FLOAT
1508             Args : NONE
1509             Comments: As described in Rohlf, F.J.,
1510             W.S. Chang, R.R. Sokal, J. Kim,
1511             1990. Accuracy of estimated
1512             phylogenies: effects of tree
1513             topology and evolutionary model.
1514             Evolution, 44(6): 1671-1684
1515              
1516             =cut
1517              
1518             sub calc_rohlf_stemminess {
1519              
1520             # invocant is a tree
1521 3     3 1 8 my $self = shift;
1522 3 100       16 throw ObjectMismatch => "This algorithm isn't generalized to
1523             deal with multifurcations" if $self->calc_resolution < 1;
1524 2 100       13 throw ObjectMismatch => "This algorithm requires branch lengths"
1525             unless $self->calc_tree_length;
1526              
1527             # all internal nodes in the tree
1528 1         4 my @internals = @{ $self->get_internals };
  1         5  
1529              
1530             # all terminal nodes in the tree
1531 1         5 my @terminals = @{ $self->get_terminals };
  1         4  
1532              
1533             # this will become the sum of all STni
1534 1         3 my $total = 0;
1535              
1536             # 1/(t-2), by which we multiply total
1537 1         4 my $one_over_t_minus_two = 1 / ( scalar @terminals - 2 );
1538              
1539             # iterate over all nodes, as per equation (1)
1540 1         4 for my $node (@internals) {
1541              
1542             # only process nodes that aren't the root
1543 8 100       20 if ( my $parent = $node->get_parent ) {
1544              
1545             # Wj->i is defined as "the length of the edge
1546             # (in time units) between HTU i (a hypothetical
1547             # taxonomic unit, i.e. an internal node) and
1548             # its ancestor j"
1549 7         17 my $Wj_i = $node->get_branch_length;
1550              
1551             # hj is defined as "the 'height' of HTU j (the
1552             # time of its origin, a known quantity since we
1553             # know the true tree in these simulations)".
1554 7         22 my $hj = $parent->calc_path_to_root;
1555 7 100       24 if ( !$hj ) {
1556 2         6 next;
1557             }
1558              
1559             # as per equation (2) in Rohlf et al. (1990)
1560 5         13 $total += ( $Wj_i / $hj );
1561             }
1562             }
1563              
1564             # multiply by 1/(t-2) as per equation (1)
1565 1         11 return $one_over_t_minus_two * $total;
1566             }
1567              
1568             =item calc_resolution()
1569              
1570             Calculates tree resolution.
1571              
1572             Type : Calculation
1573             Title : calc_resolution
1574             Usage : my $resolution =
1575             $tree->calc_resolution;
1576             Function: Calculates the number
1577             of internal nodes over the
1578             total number of internal nodes
1579             on a fully bifurcating
1580             tree of the same size.
1581             Returns : FLOAT
1582             Args : NONE
1583              
1584             =cut
1585              
1586             sub calc_resolution {
1587 4     4 1 9 my $self = shift;
1588 4         24 my $res = $self->calc_number_of_internals /
1589             ( $self->calc_number_of_terminals - 1 );
1590 4         28 return $res;
1591             }
1592              
1593             =item calc_branching_times()
1594              
1595             Calculates cumulative branching times.
1596              
1597             Type : Calculation
1598             Title : calc_branching_times
1599             Usage : my $branching_times =
1600             $tree->calc_branching_times;
1601             Function: Returns a two-dimensional array.
1602             The first dimension consists of
1603             the "records", so that in the
1604             second dimension $AoA[$first][0]
1605             contains the internal node references,
1606             and $AoA[$first][1] the branching
1607             time of the internal node. The
1608             records are orderered from root to
1609             tips by time from the origin.
1610             Returns : SCALAR[][] or FALSE
1611             Args : NONE
1612              
1613             =cut
1614              
1615             sub calc_branching_times {
1616 6     6 1 20 my $self = shift;
1617 6         15 my @branching_times;
1618 6 100       37 if ( !$self->is_ultrametric(0.01) ) {
1619 1         6 throw 'ObjectMismatch' =>
1620             'tree isn\'t ultrametric, results would be meaningless';
1621             }
1622             else {
1623 5         15 my @temp;
1624 5         17 my $seen_tip = 0;
1625             $self->visit_depth_first(
1626             '-pre' => sub {
1627 125     125   194 my $node = shift;
1628 125 100 100     383 if ( not $seen_tip or $node->is_internal ) {
1629 65         173 my $bt = $node->get_branch_length;
1630 65 100       165 if ( my $parent = $node->get_parent ) {
1631 60         179 $bt += $parent->get_generic('bt');
1632             }
1633 65         244 $node->set_generic( 'bt' => $bt );
1634 65         152 push @temp, [ $node, $bt ];
1635 65 100       248 if ( $node->is_terminal ) {
1636 5         257 $seen_tip++;
1637             }
1638             }
1639             }
1640 5         67 );
1641 5         88 @branching_times = sort { $a->[1] <=> $b->[1] } @temp;
  184         558  
1642             }
1643 5         31 return \@branching_times;
1644             }
1645              
1646             =item calc_waiting_times()
1647              
1648             Calculates intervals between splits.
1649              
1650             Type : Calculation
1651             Title : calc_waiting_times
1652             Usage : my $waitings =
1653             $tree->calc_waiting_times;
1654             Function: Returns a two-dimensional array.
1655             The first dimension consists of
1656             the "records", so that in the
1657             second dimension $AoA[$first][0]
1658             contains the internal node references,
1659             and $AoA[$first][1] the waiting
1660             time of the internal node. The
1661             records are orderered from root to
1662             tips by time from the origin.
1663             Returns : SCALAR[][] or FALSE
1664             Args : NONE
1665              
1666             =cut
1667              
1668             sub calc_waiting_times {
1669 3     3 1 29 my $self = shift;
1670 3         30 my $times = $self->calc_branching_times;
1671 3         9 for ( my $i = $#{$times} ; $i > 0 ; $i-- ) {
  3         23  
1672 44         88 $times->[$i]->[1] -= $times->[ $i - 1 ]->[1];
1673             }
1674 3         19 return $times;
1675             }
1676              
1677             =item calc_node_ages()
1678              
1679             Calculates node ages.
1680              
1681             Type : Calculation
1682             Title : calc_node_ages
1683             Usage : $tree->calc_node_ages;
1684             Function: Calculates the age of all the nodes in the tree (i.e. the distance
1685             from the tips) and assigns these to the 'age' slot, such that,
1686             after calling this method, the age of any one node can be retrieved
1687             by calling $node->get_generic('age');
1688             Returns : The invocant
1689             Args : NONE
1690             Comments: This method computes, in a sense, the opposite of
1691             calc_branching_times: here, we compute the distance from the tips
1692             (i.e. how long ago the split occurred), whereas calc_branching_times
1693             calculates the distance from the root.
1694              
1695             =cut
1696              
1697             sub calc_node_ages {
1698 1     1 1 3 my $self = shift;
1699             $self->visit_depth_first(
1700             '-post' => sub {
1701 5     5   8 my $node = shift;
1702 5         10 my $age = 0;
1703 5 100       10 if ( my $child = $node->get_child(0) ) {
1704 2         6 $age =
1705             $child->get_generic('age') + $child->get_branch_length;
1706             }
1707 5         14 $node->set_generic( 'age' => $age );
1708             }
1709 1         12 );
1710 1         8 return $self;
1711             }
1712              
1713             =item calc_ltt()
1714              
1715             Calculates lineage-through-time data points.
1716              
1717             Type : Calculation
1718             Title : calc_ltt
1719             Usage : my $ltt = $tree->calc_ltt;
1720             Function: Returns a two-dimensional array.
1721             The first dimension consists of the
1722             "records", so that in the second
1723             dimension $AoA[$first][0] contains
1724             the internal node references, and
1725             $AoA[$first][1] the branching time
1726             of the internal node, and $AoA[$first][2]
1727             the cumulative number of lineages over
1728             time. The records are orderered from
1729             root to tips by time from the origin.
1730             Returns : SCALAR[][] or FALSE
1731             Args : NONE
1732              
1733             =cut
1734              
1735             sub calc_ltt {
1736 2     2 1 6 my $self = shift;
1737 2 100       9 if ( !$self->is_ultrametric(0.01) ) {
1738 1         9 throw 'ObjectMismatch' =>
1739             'tree isn\'t ultrametric, results are meaningless';
1740             }
1741 1         8 my $ltt = ( $self->calc_branching_times );
1742 1         4 my $lineages = 1;
1743 1         4 for my $i ( 0 .. $#{$ltt} ) {
  1         4  
1744 9         12 $lineages += ( scalar @{ $ltt->[$i][0]->get_children } - 1 );
  9         17  
1745 9         17 $ltt->[$i][2] = $lineages;
1746             }
1747 1         10 return $ltt;
1748             }
1749              
1750             =item calc_symdiff()
1751              
1752             Calculates the symmetric difference metric between invocant and argument. This
1753             metric is identical to the Robinson-Foulds tree comparison distance. See
1754             L
1755              
1756             Type : Calculation
1757             Title : calc_symdiff
1758             Usage : my $symdiff =
1759             $tree->calc_symdiff($other_tree);
1760             Function: Returns the symmetric difference
1761             metric between $tree and $other_tree,
1762             sensu Penny and Hendy, 1985.
1763             Returns : SCALAR
1764             Args : A Bio::Phylo::Forest::Tree object,
1765             Optional second argument flags that results should be normalized
1766             Comments: Trees in comparison must span
1767             the same set of terminal taxa
1768             or results are meaningless.
1769              
1770             =cut
1771              
1772             sub calc_symdiff {
1773 21     21 1 101 my ( $tree, $other_tree, $normalize ) = @_;
1774 21         95 my $tuples = $tree->_calc_branch_diffs($other_tree);
1775 21         38 my $symdiff = 0;
1776             #use Data::Dumper;
1777             #warn Dumper($tuples);
1778 21         36 for my $tuple ( @{$tuples} ) {
  21         50  
1779 132 100       214 $symdiff++ unless $tuple->[2];
1780             }
1781 21 100       163 return $normalize ? $symdiff / scalar(@{$tuples}) : $symdiff;
  3         14  
1782             }
1783              
1784             =item calc_avtd()
1785              
1786             Calculates the average taxonomic distinctiveness. See
1787             Clarke KR, Warwick RM (1998) A taxonomic distinctness index and its statistical
1788             properties. J Appl Ecol 35:523-525
1789             L
1790              
1791             Type : Calculation
1792             Title : calc_avtd
1793             Usage : my $avtd = $tree->calc_avtd;
1794             Function: Returns the average taxonomic distinctiveness
1795             Returns : SCALAR
1796             Args : A Bio::Phylo::Forest::Tree object
1797             Comments:
1798              
1799             =cut
1800              
1801             sub calc_avtd {
1802 0     0 1 0 my $tree = shift;
1803 0         0 my @tips = @{ $tree->get_terminals };
  0         0  
1804 0         0 my $dist = 0;
1805 0         0 for my $i ( 0 .. $#tips - 1 ) {
1806 0         0 for my $j ( $i + 1 .. $#tips ) {
1807 0         0 $dist += $tips[$i]->calc_patristic_distance($tips[$j]);
1808             }
1809             }
1810 0         0 return $dist / scalar(@tips);
1811             }
1812              
1813             =item calc_fp()
1814              
1815             Calculates the Fair Proportion value for each terminal.
1816              
1817             Type : Calculation
1818             Title : calc_fp
1819             Usage : my $fp = $tree->calc_fp();
1820             Function: Returns the Fair Proportion
1821             value for each terminal
1822             Returns : HASHREF
1823             Args : NONE
1824              
1825             =cut
1826              
1827             # code due to Aki Mimoto
1828             sub calc_fp {
1829 0     0 1 0 my $self = shift;
1830              
1831             # First establish how many children sit on each of the nodes
1832 0         0 my %weak_ref;
1833 0         0 my $terminals = $self->get_terminals;
1834 0         0 for my $terminal (@$terminals) {
1835 0         0 my $index = $terminal;
1836 0         0 do { $weak_ref{$index}++ } while ( $index = $index->get_parent );
  0         0  
1837             }
1838              
1839             # Then, assign each terminal a value
1840 0         0 my $fp = {};
1841 0         0 for my $terminal (@$terminals) {
1842 0         0 my $name = $terminal->get_name;
1843 0         0 my $fpi = 0;
1844 0         0 do {
1845             $fpi +=
1846 0   0     0 ( $terminal->get_branch_length || 0 ) / $weak_ref{$terminal};
1847             } while ( $terminal = $terminal->get_parent );
1848 0         0 $fp->{$name} = $fpi;
1849             }
1850 0         0 return $fp;
1851             }
1852              
1853             =item calc_fp_mean()
1854              
1855             Calculates the mean Fair Proportion value over all terminals.
1856              
1857             Type : Calculation
1858             Title : calc_fp_mean
1859             Usage : my $fp = $tree->calc_fp_mean();
1860             Function: Returns the mean Fair Proportion
1861             value over all terminals
1862             Returns : FLOAT
1863             Args : NONE
1864              
1865             =cut
1866            
1867             sub calc_fp_mean {
1868 0     0 1 0 my $self = shift;
1869 0         0 my $fp = $self->calc_fp;
1870 0         0 my @fp = values %{ $fp };
  0         0  
1871 0         0 return sum(@fp)/scalar(@fp);
1872             }
1873              
1874             =item calc_es()
1875              
1876             Calculates the Equal Splits value for each terminal
1877              
1878             Type : Calculation
1879             Title : calc_es
1880             Usage : my $es = $tree->calc_es();
1881             Function: Returns the Equal Splits value for each terminal
1882             Returns : HASHREF
1883             Args : NONE
1884              
1885             =cut
1886              
1887             # code due to Aki Mimoto
1888             sub calc_es {
1889 0     0 1 0 my $self = shift;
1890              
1891             # First establish how many children sit on each of the nodes
1892 0         0 my $terminals = $self->get_terminals;
1893 0         0 my $es = {};
1894 0         0 for my $terminal ( @{$terminals} ) {
  0         0  
1895 0         0 my $name = $terminal->get_name;
1896 0         0 my $esi = 0;
1897 0         0 my $divisor = 1;
1898 0         0 do {
1899 0   0     0 my $length = $terminal->get_branch_length || 0;
1900 0   0     0 my $children = $terminal->get_children || [];
1901 0   0     0 $divisor *= @$children || 1;
1902 0         0 $esi += $length / $divisor;
1903             } while ( $terminal = $terminal->get_parent );
1904 0         0 $es->{$name} = $esi;
1905             }
1906 0         0 return $es;
1907             }
1908              
1909             =item calc_es_mean()
1910              
1911             Calculates the mean Equal Splits value over all terminals
1912              
1913             Type : Calculation
1914             Title : calc_es_mean
1915             Usage : my $es = $tree->calc_es_mean();
1916             Function: Returns the Equal Splits value over all terminals
1917             Returns : FLOAT
1918             Args : NONE
1919              
1920             =cut
1921              
1922             sub calc_es_mean {
1923 0     0 1 0 my $self = shift;
1924 0         0 my $es = $self->calc_es;
1925 0         0 my @es = values %{ $es };
  0         0  
1926 0         0 return sum(@es)/scalar(@es);
1927             }
1928              
1929             =item calc_pe()
1930              
1931             Calculates the Pendant Edge value for each terminal.
1932              
1933             Type : Calculation
1934             Title : calc_pe
1935             Usage : my $es = $tree->calc_pe();
1936             Function: Returns the Pendant Edge value for each terminal
1937             Returns : HASHREF
1938             Args : NONE
1939              
1940             =cut
1941              
1942             # code due to Aki Mimoto
1943             sub calc_pe {
1944 0     0 1 0 my $self = shift;
1945 0 0       0 my $terminals = $self->get_terminals or return {};
1946             my $pe =
1947 0         0 { map { $_->get_name => $_->get_branch_length } @{$terminals} };
  0         0  
  0         0  
1948 0         0 return $pe;
1949             }
1950              
1951             =item calc_pe_mean()
1952              
1953             Calculates the mean Pendant Edge value over all terminals
1954              
1955             Type : Calculation
1956             Title : calc_pe_mean
1957             Usage : my $es = $tree->calc_pe_mean();
1958             Function: Returns the mean Pendant Edge value over all terminals
1959             Returns : FLOAT
1960             Args : NONE
1961              
1962             =cut
1963              
1964             sub calc_pe_mean {
1965 0     0 1 0 my $self = shift;
1966 0         0 my $pe = $self->calc_pe;
1967 0         0 my @pe = values %{ $pe };
  0         0  
1968 0         0 return sum(@pe)/scalar(@pe);
1969             }
1970              
1971             =item calc_shapley()
1972              
1973             Calculates the Shapley value for each terminal.
1974              
1975             Type : Calculation
1976             Title : calc_shapley
1977             Usage : my $es = $tree->calc_shapley();
1978             Function: Returns the Shapley value for each terminal
1979             Returns : HASHREF
1980             Args : NONE
1981              
1982             =cut
1983              
1984             # code due to Aki Mimoto
1985             sub calc_shapley {
1986 0     0 1 0 my $self = shift;
1987              
1988             # First find out how many tips are at the ends of each edge.
1989 0 0       0 my $terminals = $self->get_terminals or return; # nothing to see!
1990 0         0 my $edge_lookup = {};
1991 0         0 my $index = $terminals->[0];
1992              
1993             # Iterate through the edges and find out which side each terminal reside
1994 0         0 _calc_shapley_traverse( $index, undef, $edge_lookup, 'root' );
1995              
1996             # At this point, it's possible to create the calculation matrix
1997 0         0 my $n = @$terminals;
1998 0         0 my @m;
1999 0         0 my $edges = [ keys %$edge_lookup ];
2000 0         0 for my $e ( 0 .. $#$edges ) {
2001 0         0 my $edge = $edges->[$e];
2002             my $el =
2003 0         0 $edge_lookup->{$edge}; # Lookup for terminals on one edge side
2004             my $v =
2005 0         0 keys %{ $el
2006 0         0 ->{terminals} }; # Number of elements on one side of the edge
2007 0         0 for my $l ( 0 .. $#$terminals ) {
2008 0         0 my $terminal = $terminals->[$l];
2009 0         0 my $name = $terminal->get_name;
2010 0 0       0 if ( $el->{terminals}{$name} ) {
2011 0         0 $m[$l][$e] = ( $n - $v ) / ( $n * $v );
2012             }
2013             else {
2014 0         0 $m[$l][$e] = $v / ( $n * ( $n - $v ) );
2015             }
2016             }
2017             }
2018              
2019             # Now we can calculate through the matrix
2020 0         0 my $shapley = {};
2021 0         0 for my $l ( 0 .. $#$terminals ) {
2022 0         0 my $terminal = $terminals->[$l];
2023 0         0 my $name = $terminal->get_name;
2024 0         0 for my $e ( 0 .. $#$edges ) {
2025 0         0 my $edge = $edge_lookup->{ $edges->[$e] };
2026 0         0 $shapley->{$name} += $edge->{branch_length} * $m[$l][$e];
2027             }
2028             }
2029 0         0 return $shapley;
2030             }
2031              
2032             sub _calc_shapley_traverse {
2033              
2034             # This does a depth first traversal to assign the terminals
2035             # to the outgoing side of each branch.
2036 0     0   0 my ( $index, $previous, $edge_lookup, $direction ) = @_;
2037 0 0       0 return unless $index;
2038 0   0     0 $previous ||= '';
2039              
2040             # Is this element a root?
2041 0         0 my $is_root = !$index->get_parent;
2042              
2043             # Now assemble all the terminal datapoints and use the soft reference
2044             # to keep track of which end the terminals are attached
2045 0         0 my @core_terminals;
2046 0 0 0     0 if ( $previous and $index->is_terminal ) {
2047 0         0 push @core_terminals, $index->get_name;
2048             }
2049 0   0     0 my $parent = $index->get_parent || '';
2050 0         0 my @child_terminals;
2051 0   0     0 my $child_nodes = $index->get_children || [];
2052 0         0 for my $child (@$child_nodes) {
2053 0 0       0 next unless $child ne $previous;
2054 0         0 push @child_terminals,
2055             _calc_shapley_traverse( $child, $index, $edge_lookup, 'tip' );
2056             }
2057 0         0 my @parent_terminals;
2058 0 0       0 if ( $parent ne $previous ) {
2059 0         0 push @parent_terminals,
2060             _calc_shapley_traverse( $parent, $index, $edge_lookup, 'root' );
2061             }
2062              
2063             # We're going to toss the root node and we need to merge the root's child branches
2064 0 0       0 unless ($is_root) {
2065             $edge_lookup->{$index} = {
2066             branch_length => $index->get_branch_length,
2067             terminals => {
2068 0 0       0 map { $_ => 1 } @core_terminals,
  0         0  
2069             $direction eq 'root' ? @parent_terminals : @child_terminals
2070             }
2071             };
2072             }
2073 0         0 return ( @core_terminals, @child_terminals, @parent_terminals );
2074             }
2075            
2076             =item calc_shapley_mean()
2077              
2078             Calculates the mean Shapley value over all terminals
2079              
2080             Type : Calculation
2081             Title : calc_shapley_mean
2082             Usage : my $es = $tree->calc_shapley_mean();
2083             Function: Returns the mean Shapley value over all terminals
2084             Returns : HASHREF
2085             Args : NONE
2086              
2087             =cut
2088              
2089             sub calc_shapley_mean {
2090 0     0 1 0 my $self = shift;
2091 0         0 my $sv = $self->calc_shapley;
2092 0         0 my @sv = values %{ $sv };
  0         0  
2093 0         0 return sum(@sv)/scalar(@sv);
2094             }
2095              
2096             =back
2097              
2098             =head2 VISITOR METHODS
2099              
2100             The following methods are a - not entirely true-to-form - implementation of the Visitor
2101             design pattern: the nodes in a tree are visited, and rather than having an object
2102             operate on them, a set of code references is used. This can be used, for example, to
2103             serialize a tree to a string format. To create a newick string without branch lengths
2104             you would use something like this (there is a more powerful 'to_newick' method, so this
2105             is just an example):
2106              
2107             $tree->visit_depth_first(
2108             '-pre_daughter' => sub { print '(' },
2109             '-post_daughter' => sub { print ')' },
2110             '-in' => sub { print shift->get_name },
2111             '-pre_sister' => sub { print ',' },
2112             );
2113             print ';';
2114              
2115             =over
2116              
2117             =item visit_depth_first()
2118              
2119             Visits nodes depth first
2120              
2121             Type : Visitor method
2122             Title : visit_depth_first
2123             Usage : $tree->visit_depth_first( -pre => sub{ ... }, -post => sub { ... } );
2124             Function: Visits nodes in a depth first traversal, executes subs
2125             Returns : $tree
2126             Args : Optional handlers in the order in which they would be executed on an internal node:
2127            
2128             # first event handler, is executed when node is reached in recursion
2129             -pre => sub { print "pre: ", shift->get_name, "\n" },
2130              
2131             # is executed if node has a daughter, but before that daughter is processed
2132             -pre_daughter => sub { print "pre_daughter: ", shift->get_name, "\n" },
2133            
2134             # is executed if node has a daughter, after daughter has been processed
2135             -post_daughter => sub { print "post_daughter: ", shift->get_name, "\n" },
2136              
2137             # is executed whether or not node has sisters, if it does have sisters
2138             # they're processed first
2139             -in => sub { print "in: ", shift->get_name, "\n" },
2140            
2141             # is executed if node has a sister, before sister is processed
2142             -pre_sister => sub { print "pre_sister: ", shift->get_name, "\n" },
2143            
2144             # is executed if node has a sister, after sister is processed
2145             -post_sister => sub { print "post_sister: ", shift->get_name, "\n" },
2146            
2147             # is executed last
2148             -post => sub { print "post: ", shift->get_name, "\n" },
2149            
2150             # specifies traversal order, default 'ltr' means first_daugher -> next_sister
2151             # traversal, alternate value 'rtl' means last_daughter -> previous_sister traversal
2152             -order => 'ltr', # ltr = left-to-right, 'rtl' = right-to-left
2153             Comments:
2154              
2155             =cut
2156              
2157             sub visit_depth_first {
2158 145     145 1 283 my $self = shift;
2159 145 50       599 if ( my $root = $self->get_root ) {
2160 145         653 $root->visit_depth_first(looks_like_hash @_);
2161             }
2162 144         468 return $self;
2163             }
2164              
2165             =item visit_breadth_first()
2166              
2167             Visits nodes breadth first
2168              
2169             Type : Visitor method
2170             Title : visit_breadth_first
2171             Usage : $tree->visit_breadth_first( -pre => sub{ ... }, -post => sub { ... } );
2172             Function: Visits nodes in a breadth first traversal, executes handlers
2173             Returns : $tree
2174             Args : Optional handlers in the order in which they would be executed on an internal node:
2175            
2176             # first event handler, is executed when node is reached in recursion
2177             -pre => sub { print "pre: ", shift->get_name, "\n" },
2178            
2179             # is executed if node has a sister, before sister is processed
2180             -pre_sister => sub { print "pre_sister: ", shift->get_name, "\n" },
2181            
2182             # is executed if node has a sister, after sister is processed
2183             -post_sister => sub { print "post_sister: ", shift->get_name, "\n" },
2184            
2185             # is executed whether or not node has sisters, if it does have sisters
2186             # they're processed first
2187             -in => sub { print "in: ", shift->get_name, "\n" },
2188            
2189             # is executed if node has a daughter, but before that daughter is processed
2190             -pre_daughter => sub { print "pre_daughter: ", shift->get_name, "\n" },
2191            
2192             # is executed if node has a daughter, after daughter has been processed
2193             -post_daughter => sub { print "post_daughter: ", shift->get_name, "\n" },
2194            
2195             # is executed last
2196             -post => sub { print "post: ", shift->get_name, "\n" },
2197            
2198             # specifies traversal order, default 'ltr' means first_daugher -> next_sister
2199             # traversal, alternate value 'rtl' means last_daughter -> previous_sister traversal
2200             -order => 'ltr', # ltr = left-to-right, 'rtl' = right-to-left
2201             Comments:
2202              
2203             =cut
2204              
2205             sub visit_breadth_first {
2206 0     0 1 0 my $self = shift;
2207 0         0 my %args = looks_like_hash @_;
2208 0         0 $self->get_root->visit_breadth_first(%args);
2209 0         0 return $self;
2210             }
2211              
2212             =item visit_level_order()
2213              
2214             Visits nodes in a level order traversal.
2215              
2216             Type : Visitor method
2217             Title : visit_level_order
2218             Usage : $tree->visit_level_order( sub{...} );
2219             Function: Visits nodes in a level order traversal, executes sub
2220             Returns : $tree
2221             Args : A subroutine reference that operates on visited nodes.
2222             Comments:
2223              
2224             =cut
2225              
2226             sub visit_level_order {
2227 3     3 1 9 my ( $tree, $sub ) = @_;
2228 3 50       14 if ( my $root = $tree->get_root ) {
2229 3         23 $root->visit_level_order($sub);
2230             }
2231             else {
2232 0         0 throw 'BadArgs' => 'Tree has no root';
2233             }
2234 3         11 return $tree;
2235             }
2236              
2237             =back
2238              
2239             =head2 TREE MANIPULATION
2240              
2241             =over
2242              
2243             =item chronompl()
2244              
2245             Modifies branch lengths using the mean path lengths method of
2246             Britton et al. (2002). For more about this method, see:
2247             L
2248              
2249             Type : Tree manipulator
2250             Title : chronompl
2251             Usage : $tree->chronompl;
2252             Function: Makes tree ultrametric using MPL method
2253             Returns : The modified, now ultrametric invocant.
2254             Args : NONE
2255             Comments:
2256              
2257             =cut
2258              
2259             sub chronompl {
2260 1     1 1 4 my $self = shift;
2261             $self->visit_depth_first(
2262             '-post' => sub {
2263 7     7   20 my $node = shift;
2264 7         16 my %paths;
2265 7         29 my $children = $node->get_children;
2266 7         18 for my $child ( @{$children} ) {
  7         23  
2267 6         28 my $cp = $child->get_generic('paths');
2268 6         30 my $bl = $child->get_branch_length;
2269 6         15 for my $id ( keys %{$cp} ) {
  6         24  
2270 8         37 $paths{$id} = $cp->{$id} + $bl;
2271             }
2272             }
2273 7 100       16 if ( not scalar @{$children} ) {
  7         23  
2274 4         18 $paths{ $node->get_id } = 0;
2275             }
2276 7         47 $node->set_generic( 'paths' => \%paths );
2277 7         21 my $total = 0;
2278 7         51 $total += $_ for values %paths;
2279 7         29 my $mean = $total / scalar keys %paths;
2280 7         27 $node->set_generic( 'age' => $mean );
2281             }
2282 1         20 );
2283 1         33 return $self->agetobl;
2284             }
2285              
2286             =item grafenbl()
2287              
2288             Computes and assigns branch lengths using Grafen's method, which makes
2289             node ages proportional to clade size. For more about this method, see:
2290             L
2291              
2292             Type : Tree manipulator
2293             Title : grafenbl
2294             Usage : $tree->grafenbl;
2295             Function: Assigns branch lengths using Grafen's method
2296             Returns : The modified, now ultrametric invocant.
2297             Args : Optional, a power ('rho') to which all node ages are raised
2298             Comments:
2299              
2300             =cut
2301              
2302             sub grafenbl {
2303 0     0 1 0 my ( $self, $rho ) = @_;
2304 0         0 my $total = 0;
2305             $self->visit_depth_first(
2306             '-post' => sub {
2307 0     0   0 my $node = shift;
2308 0 0       0 if ( $node->is_terminal ) {
2309 0         0 $node->set_generic( 'adjntips' => 0 );
2310 0         0 $node->set_generic( 'ntips' => 1 );
2311             }
2312             else {
2313 0         0 my $children = $node->get_children;
2314 0         0 my $ntips = 0;
2315 0         0 for my $child ( @{$children} ) {
  0         0  
2316 0         0 $ntips += $child->get_generic('ntips');
2317             }
2318 0         0 $node->set_generic( 'ntips' => $ntips );
2319 0         0 $node->set_generic( 'adjntips' => $ntips - 1 );
2320 0 0       0 $total = $ntips if $node->is_root;
2321             }
2322             }
2323 0         0 );
2324             $self->visit(
2325             sub {
2326 0     0   0 my $node = shift;
2327 0 0       0 if ($total) {
2328 0         0 my $age = $node->get_generic('adjntips') / $total;
2329 0 0       0 if ($rho) {
2330 0         0 $age = $age**$rho;
2331             }
2332 0         0 $node->set_generic( 'age' => $age );
2333             }
2334             }
2335 0         0 );
2336 0         0 return $self->agetobl;
2337             }
2338              
2339             =item agetobl()
2340              
2341             Converts node ages to branch lengths
2342              
2343             Type : Tree manipulator
2344             Title : agetobl
2345             Usage : $tree->agetobl;
2346             Function: Converts node ages to branch lengths
2347             Returns : The modified invocant.
2348             Args : NONE
2349             Comments: This method uses ages as assigned to the generic 'age' slot
2350             on the nodes in the trees. I.e. for each node in the tree,
2351             $node->get_generic('age') must return a number
2352              
2353             =cut
2354              
2355             sub agetobl {
2356 2     2 1 9 my $self = shift;
2357 2         6 for my $node ( @{ $self->get_entities } ) {
  2         14  
2358 46 100       108 if ( my $parent = $node->get_parent ) {
2359 44   100     91 my $mp = $node->get_generic('age') || 0;
2360 44         95 my $pmp = $parent->get_generic('age');
2361 44         115 $node->set_branch_length( $pmp - $mp );
2362             }
2363             else {
2364 2         11 $node->set_branch_length(0);
2365             }
2366             }
2367 2         14 return $self;
2368             }
2369              
2370             =item rankprobbl()
2371              
2372             Generates branch lengths by calculating the rank probabilities for each node and applying
2373             the expected waiting times under a pure birth process to these ranks. Uses Stadler's
2374             RANKPROB algorithm as described in:
2375              
2376             B et al., 2006. Estimating the relative order of speciation
2377             or coalescence events on a given phylogeny. I.
2378             B<2>:285. L.
2379              
2380             Type : Tree manipulator
2381             Title : rankprobbl
2382             Usage : $tree->rankprobbl;
2383             Function: Generates pure birth branch lengths
2384             Returns : The modified invocant.
2385             Args : NONE
2386             Comments: Tree must be fully bifurcating
2387              
2388             =cut
2389              
2390             sub rankprobbl {
2391 0     0 1 0 my $self = shift;
2392 0         0 my $root = $self->get_root;
2393 0         0 my $intervals = $root->calc_terminals;
2394 0         0 my @times;
2395 0         0 for my $i ( 1 .. $intervals ) {
2396 0   0     0 my $previous = $times[-1] || 0;
2397 0         0 push @times, $previous + ( 1 / $i );
2398             }
2399 0         0 my $total = $times[-1];
2400 0         0 for my $node ( @{ $self->get_internals } ) {
  0         0  
2401 0         0 my $rankprobs = $root->calc_rankprob($node);
2402 0         0 my @weighted_waiting_times;
2403 0         0 for my $i ( 1 .. $#{ $rankprobs } ) {
  0         0  
2404 0         0 push @weighted_waiting_times, $rankprobs->[$i] * $times[$i - 1];
2405             }
2406 0         0 my $age = $total - sum(@weighted_waiting_times);
2407 0         0 $node->set_generic( 'age' => $age );
2408             }
2409 0         0 $self->agetobl;
2410 0         0 $root->set_branch_length(1);
2411 0         0 return $self;
2412             }
2413              
2414             =item ultrametricize()
2415              
2416             Sets all root-to-tip path lengths equal.
2417              
2418             Type : Tree manipulator
2419             Title : ultrametricize
2420             Usage : $tree->ultrametricize;
2421             Function: Sets all root-to-tip path
2422             lengths equal by stretching
2423             all terminal branches to the
2424             height of the tallest node.
2425             Returns : The modified invocant.
2426             Args : NONE
2427             Comments: This method is analogous to
2428             the 'ultrametricize' command
2429             in Mesquite, i.e. no rate smoothing
2430             or anything like that happens, just
2431             a lengthening of terminal branches.
2432              
2433             =cut
2434              
2435             sub ultrametricize {
2436 1     1 1 4 my $tree = shift;
2437 1         2 my $tallest = 0;
2438 1         2 foreach ( @{ $tree->get_terminals } ) {
  1         5  
2439 9         21 my $path_to_root = $_->calc_path_to_root;
2440 9 100       21 if ( $path_to_root > $tallest ) {
2441 5         9 $tallest = $path_to_root;
2442             }
2443             }
2444 1         3 foreach ( @{ $tree->get_terminals } ) {
  1         5  
2445 9         43 my $newbl =
2446             $_->get_branch_length + ( $tallest - $_->calc_path_to_root );
2447 9         24 $_->set_branch_length($newbl);
2448             }
2449 1         13 return $tree;
2450             }
2451              
2452             =item scale()
2453              
2454             Scales the tree to the specified height.
2455              
2456             Type : Tree manipulator
2457             Title : scale
2458             Usage : $tree->scale($height);
2459             Function: Scales the tree to the
2460             specified height.
2461             Returns : The modified invocant.
2462             Args : $height = a numerical value
2463             indicating root-to-tip path length.
2464             Comments: This method uses the
2465             $tree->calc_tree_height method, and
2466             so for additive trees the *average*
2467             root-to-tip path length is scaled to
2468             $height (i.e. some nodes might be
2469             taller than $height, others shorter).
2470              
2471             =cut
2472              
2473             sub scale {
2474 1     1 1 4 my ( $tree, $target_height ) = @_;
2475 1         6 my $current_height = $tree->calc_tree_height;
2476 1         5 my $scaling_factor = $target_height / $current_height;
2477 1         5 foreach ( @{ $tree->get_entities } ) {
  1         8  
2478 17         34 my $bl = $_->get_branch_length;
2479 17 100       33 if ($bl) {
2480 16         29 my $new_branch_length = $bl * $scaling_factor;
2481 16         29 $_->set_branch_length($new_branch_length);
2482             }
2483             }
2484 1         11 return $tree;
2485             }
2486              
2487             =item resolve()
2488              
2489             Randomly breaks polytomies.
2490              
2491             Type : Tree manipulator
2492             Title : resolve
2493             Usage : $tree->resolve;
2494             Function: Randomly breaks polytomies by inserting
2495             additional internal nodes.
2496             Returns : The modified invocant.
2497             Args : Optionally, when passed a true value (e.g. '1'), the newly created nodes
2498             will be unnamed, otherwise they will be named 'r1', 'r2', 'r3' and so on.
2499             Comments:
2500              
2501             =cut
2502              
2503             sub resolve {
2504 2     2 1 9 my ( $tree, $anonymous ) = @_;
2505 2         5 for my $node ( @{ $tree->get_internals } ) {
  2         77  
2506 14         30 my @children = @{ $node->get_children };
  14         50  
2507 14 100       66 if ( scalar @children > 2 ) {
2508 1         4 my $i = 1;
2509 1         6 while ( scalar @children > 2 ) {
2510 2         9 my %args = ( '-branch_length' => 0.00 );
2511 2 50       15 $args{'-name'} = 'r' . $i++ unless $anonymous;
2512 2         27 my $newnode = $fac->create_node(%args);
2513 2         17 $tree->insert($newnode);
2514 2         13 $newnode->set_parent($node);
2515 2         9 for ( 1 .. 2 ) {
2516 4         72 my $i = int( rand( scalar @children ) );
2517 4         25 $children[$i]->set_parent($newnode);
2518 4         16 splice @children, $i, 1;
2519             }
2520 2         16 push @children, $newnode;
2521             }
2522             }
2523             }
2524 2         20 return $tree;
2525             }
2526            
2527             =item replicate()
2528              
2529             Simulates tree(s) whose properties resemble that of the input tree in terms of birth/death
2530             rate, depth, and size/depth distribution of genera. This uses the R environment for
2531             statistics to get a maximum likelihood estimate of birth/death rates on the source tree
2532             and therefore requires the package L to be installed, and the R package
2533             'ape'. The idea is that this is used on a species tree that is ultrametric. To get
2534             simulated genera whose sizes and root depths approximate those of the source tree,
2535             annotate genus nodes in the source tree, e.g. using $tree->generize, and provide the
2536             optional -genera flag of replicate() with a true value.
2537              
2538             This method uses the function C from the R package C. If you use this
2539             method in a publication, you should therefore B (in addition to
2540             Bio::Phylo). More information about C can be found at L.
2541              
2542             Type : Tree manipulator
2543             Title : replicate
2544             Usage : my $forest = $tree->replicate;
2545             Function: Simulates tree(s) whose properties resemble that of the invocant tree
2546             Returns : Bio::Phylo::Forest
2547             Args : Optional: -trees => number of replicates, default is 1
2548             Optional: -rootedge => keep the birth/death root branch, then scale the tree(s)
2549             Optional: -genera => approximate distribution of source genus sizes and depths
2550             (do this by tagging internal nodes: $node->set_rank('genus'))
2551             Optional: -seed => a random integer seed for generating the birth/death tree
2552             Comments: Requires Statistics::R, and an R environment with 'ape' installed
2553             Expects to operate on an ultrametric tree
2554              
2555             =cut
2556            
2557             sub replicate {
2558 0     0 1 0 my ( $self, %args ) = @_;
2559 0 0 0     0 if ( looks_like_class('Statistics::R') and looks_like_class('Bio::Phylo::Generator') ) {
2560              
2561             # get birthdeath parameters
2562 0         0 $logger->info("going to estimate b/d");
2563 0         0 my $newick = $self->to_newick;
2564 0         0 my $R = Statistics::R->new;
2565 0 0       0 if ( my $seed = $args{'-seed'} ) {
2566 0         0 $R->run(qq[set.seed($seed)]);
2567             }
2568 0         0 $R->run(q[library("ape")]);
2569 0         0 $R->run(qq[phylo <- read.tree(text="$newick")]);
2570 0         0 $R->run(q[bd <- birthdeath(phylo)]);
2571 0         0 $R->run(q[ratio <- as.double(bd$para[1])]);
2572 0         0 my $b_over_d = $R->get(q[ratio]);
2573 0         0 $logger->info("b/d=$b_over_d");
2574            
2575             # generate the tree
2576 0         0 $logger->info("going to simulate tree(s)");
2577 0         0 my $gen = Bio::Phylo::Generator->new;
2578             my $forest = $gen->gen_rand_birth_death(
2579             '-trees' => $args{'-trees'} || 1,
2580             '-killrate' => $b_over_d,
2581 0   0     0 '-tips' => scalar(@{ $self->get_terminals }),
  0         0  
2582             );
2583            
2584             # invent tip labels
2585             $forest->visit(sub{
2586 0     0   0 my $t = shift;
2587 0         0 my $n = 0;
2588 0         0 for my $tip ( @{ $t->get_terminals } ) {
  0         0  
2589 0         0 my $genus = $self->_make_taxon_name;
2590 0         0 my $species = $self->_make_taxon_name;
2591 0 0       0 if ( $genus =~ /(..)$/ ) {
2592 0         0 my $suffix = $1;
2593 0         0 $species =~ s/..$/$suffix/;
2594 0         0 $tip->set_name( ucfirst($genus) . '_' . $species );
2595             }
2596             }
2597 0         0 });
2598            
2599             # scale the trees
2600 0         0 my $height = $self->calc_tree_height;
2601 0 0   0   0 $forest->visit(sub{shift->get_root->set_branch_length(0)}) if not $args{'-rootedge'};
  0         0  
2602 0     0   0 $forest->visit(sub{shift->scale($height)});
  0         0  
2603 0         0 $logger->info("tree height is $height");
2604            
2605             # create similar genera, optionally
2606 0 0       0 if ( $args{'-genera'} ) {
2607 0         0 $logger->info("going to approximate genera");
2608            
2609             # iterate over trees
2610 0         0 for my $replicate ( @{ $forest->get_entities } ) {
  0         0  
2611            
2612             # get distribution of source genus sizes and depths
2613 0         0 $logger->info("calculating source genus sizes and depths");
2614 0         0 my ( $counter, %genera ) = ( 0 );
2615             $self->visit(sub{
2616 0     0   0 my $node = shift;
2617 0         0 my $rank = $node->get_rank;
2618 0 0       0 if ( $rank ) {
2619 0 0       0 if ( $rank eq 'genus' ) {
2620 0         0 my $id = $node->get_id;
2621 0         0 my $height = $height - $node->calc_path_to_root;
2622 0         0 my $size = scalar(@{ $node->get_terminals });
  0         0  
2623 0   0     0 my $name = $node->get_name || 'Genus' . ++$counter;
2624 0         0 $genera{$id} = {
2625             'name' => $name,
2626             'size' => $size,
2627             'height' => $height,
2628             'node' => $node,
2629             };
2630             }
2631             }
2632 0         0 });
2633            
2634             # get distribution of target node sizes and depths
2635 0         0 $logger->info("calculating target genus sizes and depths");
2636 0         0 my ( %node );
2637             $replicate->visit(sub{
2638 0     0   0 my $node = shift;
2639 0         0 my $id = $node->get_id;
2640 0         0 my $height = $height - $node->calc_path_to_root;
2641 0         0 my $size = scalar(@{ $node->get_terminals });
  0         0  
2642 0         0 push @{ $node{$size} }, [ $node, $height, $id ];
  0         0  
2643 0         0 });
2644            
2645             # keep track of which members from the genera have already been assigned
2646 0         0 my %seen_labels;
2647            
2648             # start assigning genera, from big to small
2649 0         0 for my $genus ( sort { $genera{$b}->{'size'} <=> $genera{$a}->{'size'} } keys %genera ) {
  0         0  
2650             # get key for candidate set of nodes
2651 0   0     0 my $name = $genera{$genus}->{'name'} || "Genus${genus}";
2652 0         0 my $size = $genera{$genus}->{'size'};
2653 0         0 my @labels = shuffle map { $_->get_name } @{ $genera{$genus}->{'node'}->get_terminals };
  0         0  
  0         0  
2654            
2655             # avoid assigning labels more than once when genera are nested
2656 0         0 @labels = grep { ! $seen_labels{$_} } @labels;
  0         0  
2657 0         0 $seen_labels{$_}++ for @labels;
2658              
2659 0         0 $logger->info("processing $name ($size tips)");
2660 0 0       0 SIZE: while( not $node{$size} ) { last SIZE if --$size <= 1 }
  0         0  
2661            
2662             # get target height
2663 0 0       0 if ( $node{$size} ) {
2664 0         0 $logger->info("found candidate(s) with $size tips");
2665 0         0 my $h = $genera{$genus}->{'height'};
2666 0         0 my ($node) = map { $_->[0] }
2667 0         0 sort { abs($a->[1]-$h) <=> abs($b->[1]-$h) }
2668 0         0 @{ $node{$size} };
  0         0  
2669            
2670             # assign genus label to node and tips, remove all descendants
2671             # (and self!) from list of candidates, as we can't nest genera
2672 0         0 my $sp = 0;
2673 0         0 $node->set_name($name);
2674            
2675 0         0 for my $n ( $node, @{ $node->get_descendants } ) {
  0         0  
2676 0 0       0 $n->set_name($labels[$sp++]) if $n->is_terminal;
2677 0         0 for my $i ( 1 .. $size ) {
2678 0 0       0 if ( my $array = $node{$i} ) {
2679 0         0 my $id = $n->get_id;
2680 0         0 my @filtered = grep { $id != $_->[2] } @$array;
  0         0  
2681 0 0       0 @filtered ? $node{$i} = \@filtered : delete $node{$i};
2682             }
2683             }
2684             }
2685             }
2686             else {
2687 0         0 $logger->warn("exhausted candidate genera for $genus");
2688             }
2689             }
2690             }
2691             }
2692 0         0 return $forest;
2693             }
2694             }
2695            
2696             sub _make_taxon_name {
2697 0     0   0 my %l = (
2698             'v' => [ qw(a e i o u) ],
2699             'c' => [ qw(qu cr ct p pr ps rs ld ph gl ch l sc v n m) ],
2700             );
2701 0         0 my @suffixes =qw(us os is as es);
2702 0         0 my $length = 1 + int rand 2;
2703 0 0       0 my @order = int rand 2 ? qw(v c) : qw(c v);
2704 0         0 my ($l1) = shuffle(@{$l{$order[0]}});
  0         0  
2705 0         0 my ($l2) = shuffle(@{$l{$order[1]}});
  0         0  
2706 0         0 my @name = ( $l1, $l2 );
2707 0         0 for my $i ( 0 .. $length ) {
2708 0         0 ($l1) = shuffle(@{$l{$order[0]}});
  0         0  
2709 0         0 ($l2) = shuffle(@{$l{$order[1]}});
  0         0  
2710 0         0 push @name, $l1, $l2;
2711             }
2712 0         0 my $name = join '', @name;
2713 0         0 $name =~ s/[aeiou]+$//;
2714 0         0 my ($suffix) = shuffle(@suffixes);
2715 0         0 return $name . $suffix;
2716             }
2717              
2718             =item generize()
2719              
2720             Identifies monophyletic genera by traversing the tree, taking the first word of the tip
2721             names and finding the MRCA of each word. That MRCA is tagged as rank 'genus' and assigned
2722             the name.
2723              
2724             Type : Tree manipulator
2725             Title : generize
2726             Usage : $tree->generize(%args);
2727             Function: Identifies monophyletic genera
2728             Returns : Invocant
2729             Args : Optional: -delim => the delimiter that separates the genus name from any
2730             following (sub)specific epithets. Default is a space ' '.
2731             Optional: -monotypic => if true, also tags monotypic genera
2732             Optional: -polypara => if true, also tags poly/paraphyletic genera. Any
2733             putative genera nested within the largest of the
2734             entangled, poly/paraphyletic genera will be ignored.
2735             Comments:
2736              
2737             =cut
2738            
2739             sub generize {
2740 0     0 1 0 my ( $self, %args ) = @_;
2741 0   0     0 my $delim = $args{'-delim'} || ' ';
2742            
2743             # bin by genus name
2744 0         0 my %genera;
2745 0         0 for my $tip ( @{ $self->get_terminals } ) {
  0         0  
2746 0         0 my $binomial = $tip->get_name;
2747 0         0 my ($genus) = split /$delim/, $binomial;
2748 0 0       0 $genera{$genus} = [] if not $genera{$genus};
2749 0         0 push @{ $genera{$genus} }, $tip;
  0         0  
2750             }
2751            
2752             # identify and tag MRCAs
2753 0         0 my %skip;
2754 0         0 for my $genus ( sort { scalar(@{$genera{$b}}) <=> scalar(@{$genera{$a}}) } keys %genera ) {
  0         0  
  0         0  
  0         0  
2755 0 0       0 next if $skip{$genus};
2756 0         0 my $tips = $genera{$genus};
2757            
2758             # genus is monotypic
2759 0 0       0 if ( scalar(@$tips) == 1 ) {
2760 0         0 $logger->info("$genus is monotypic");
2761 0 0       0 $tips->[0]->set_rank('genus') if $args{'-monotypic'};
2762             }
2763             else {
2764            
2765             # get the MRCA
2766 0         0 my ( $mrca, %seen );
2767 0         0 my @paths = map { @{ $_->get_ancestors } } @{ $tips };
  0         0  
  0         0  
  0         0  
2768 0         0 $seen{$_->get_id}++ for @paths;
2769 0         0 ($mrca) = map { $_->[0] }
2770 0         0 sort { $b->[1] <=> $a->[1] }
2771 0         0 map { [ $_, $_->calc_path_to_root ] }
2772 0         0 grep { $seen{$_->get_id} == @$tips } @paths;
  0         0  
2773            
2774             # identify mono/poly/para
2775 0         0 my $clade_size = @{ $mrca->get_terminals };
  0         0  
2776 0         0 my $tip_count = @{ $tips };
  0         0  
2777 0 0       0 if ( $clade_size == $tip_count ) {
2778 0         0 $logger->info("$genus is monophyletic");
2779 0         0 $mrca->set_rank('genus');
2780 0         0 $mrca->set_name($genus);
2781             }
2782             else {
2783            
2784             # we could now have nested, smaller genera inside this one
2785 0         0 $logger->info("$genus is non-monophyletic $clade_size != $tip_count");
2786 0 0       0 if ( $args{'-polypara'} ) {
2787 0         0 $logger->info("tagging non-monophyletic $genus anyway");
2788 0         0 $mrca->set_rank('genus');
2789 0         0 $mrca->set_name($genus);
2790 0         0 my @names = map { $_->get_name } @{ $mrca->get_terminals };
  0         0  
  0         0  
2791 0         0 for my $name ( @names ) {
2792 0         0 $name =~ s/^(.+?)${delim}.+$/$1/;
2793 0         0 $skip{$name}++;
2794             }
2795             }
2796             }
2797             }
2798             }
2799 0         0 return $self;
2800             }
2801              
2802             =item prune_tips()
2803              
2804             Prunes argument nodes from invocant.
2805              
2806             Type : Tree manipulator
2807             Title : prune_tips
2808             Usage : $tree->prune_tips(\@taxa);
2809             Function: Prunes specified taxa from invocant.
2810             Returns : A pruned Bio::Phylo::Forest::Tree object.
2811             Args : A reference to an array of taxon names, or a taxa block, or a
2812             reference to an array of taxon objects, or a reference to an
2813             array of node objects
2814             Comments:
2815              
2816             =cut
2817              
2818             sub prune_tips {
2819 10     10 1 1170 my ( $self, $tips ) = @_;
2820 10         17 my %prune = map { $_->get_id => 1 } @{ $self->_get_tip_objects($tips) };
  6         14  
  10         38  
2821 10         19 my @keep;
2822 10         15 for my $tip ( @{ $self->get_terminals } ) {
  10         49  
2823 118 100       185 if ( not $prune{$tip->get_id} ) {
2824 112         184 push @keep, $tip;
2825             }
2826             }
2827 10         52 return $self->keep_tips(\@keep);
2828             }
2829              
2830             =item keep_tips()
2831              
2832             Keeps argument nodes from invocant (i.e. prunes all others).
2833              
2834             Type : Tree manipulator
2835             Title : keep_tips
2836             Usage : $tree->keep_tips(\@taxa);
2837             Function: Keeps specified taxa from invocant.
2838             Returns : The pruned Bio::Phylo::Forest::Tree object.
2839             Args : Same as prune_tips, but with inverted meaning
2840             Comments:
2841              
2842             =cut
2843              
2844             sub _get_tip_objects {
2845 24     24   54 my ( $self, $arg ) = @_;
2846 24         44 my @tips;
2847              
2848             # argument is a taxa block
2849 24 50       91 if ( blessed $arg ) {
2850 0         0 for my $taxon ( @{ $arg->get_entities } ) {
  0         0  
2851 0         0 my @nodes = @{ $taxon->get_nodes };
  0         0  
2852 0         0 for my $node ( @nodes ) {
2853 0 0       0 push @tips, $node if $self->contains($node);
2854             }
2855             }
2856             }
2857              
2858             # arg is an array ref
2859             else {
2860 24         47 my $TAXON = _TAXON_;
2861 24         38 my $NODE = _NODE_;
2862 24         39 for my $thing ( @{ $arg } ) {
  24         69  
2863              
2864             # thing is a taxon or node object
2865 133 100       308 if ( blessed $thing ) {
2866 117 50       202 if ( $thing->_type == $TAXON ) {
    50          
2867 0         0 my @nodes = @{ $thing->get_nodes };
  0         0  
2868 0         0 for my $node ( @nodes ) {
2869 0 0       0 push @tips, $node if $self->contains($node);
2870             }
2871             }
2872             elsif ( $thing->_type == $NODE ) {
2873 117 100       197 push @tips, $thing if $self->contains($thing);
2874             }
2875             }
2876              
2877             # thing is a name
2878             else {
2879 16 50       78 if ( my $tip = $self->get_by_name($thing) ) {
2880 16         59 push @tips, $tip;
2881             }
2882             }
2883             }
2884             }
2885 24         88 return \@tips;
2886             }
2887              
2888             sub keep_tips {
2889 14     14 1 1093 my ( $self, $tip_names ) = @_;
2890              
2891             # get node objects for tips
2892 14         31 my @tips = @{ $self->_get_tip_objects($tip_names) };
  14         87  
2893            
2894             # identify nodes that are somewhere on the path from tip to root
2895 14         38 my %seen;
2896 14         35 for my $tip ( @tips ) {
2897 121         153 my $node = $tip;
2898 121         202 PARENT: while ( $node ) {
2899 343         555 my $id = $node->get_id;
2900 343 100       582 if ( not exists $seen{$id} ) {
2901 235         355 $seen{$id} = 0;
2902 235         395 $node = $node->get_parent;
2903             }
2904             else {
2905 108         175 last PARENT;
2906             }
2907             }
2908             }
2909              
2910             # now do the pruning
2911             $self->visit_depth_first(
2912             '-post' => sub {
2913             # prune node
2914 258     258   328 my $n = shift;
2915 258         420 my $nid = $n->get_id;
2916 258         527 my $p = $n->get_parent;
2917 258 100       490 if ( not exists $seen{$nid} ) {
2918 23 100       111 $p->delete($n) if $p;
2919 23         93 $self->delete($n);
2920             # record number of children lost by parent
2921 23 100       54 if (defined $p) {
2922 22         44 my $pid = $p->get_id;
2923 22 100       56 if ( exists $seen{$pid} ) {
2924 12         24 $seen{$pid}++;
2925             }
2926             }
2927 23         74 return;
2928             }
2929             # remove nodes who lost children and are now down to a single one
2930 235         292 my @children = @{ $n->get_children };
  235         426  
2931 235 100 100     800 if ( (scalar @children == 1) && ($seen{$nid} > 0) ) {
2932 10         26 my ($c) = @children;
2933 10         37 my $bl = $n->get_branch_length;
2934 10         38 my $cbl = $c->get_branch_length;
2935 10 100 100     53 $c->set_branch_length( $bl + $cbl ) if defined $cbl && defined $bl;
2936 10         34 $self->delete($n);
2937 10         42 $c->set_parent($p);
2938 10 100       45 $p->delete($n) if $p;
2939             }
2940             }
2941 14         164 );
2942 14         242 return $self;
2943             }
2944              
2945             =item negative_to_zero()
2946              
2947             Converts negative branch lengths to zero.
2948              
2949             Type : Tree manipulator
2950             Title : negative_to_zero
2951             Usage : $tree->negative_to_zero;
2952             Function: Converts negative branch
2953             lengths to zero.
2954             Returns : The modified invocant.
2955             Args : NONE
2956             Comments:
2957              
2958             =cut
2959              
2960             sub negative_to_zero {
2961 3     3 1 7 my $tree = shift;
2962 3         5 foreach my $node ( @{ $tree->get_entities } ) {
  3         10  
2963 17         37 my $bl = $node->get_branch_length;
2964 17 50 66     61 if ( $bl && $bl < 0 ) {
2965 0         0 $node->set_branch_length(0);
2966             }
2967             }
2968 3         8 return $tree;
2969             }
2970              
2971             =item ladderize()
2972              
2973             Sorts nodes in ascending (or descending) order of number of children. Tips are
2974             sorted alphabetically (ascending or descending) relative to their siblings.
2975              
2976             Type : Tree manipulator
2977             Title : ladderize
2978             Usage : $tree->ladderize(1);
2979             Function: Sorts nodes
2980             Returns : The modified invocant.
2981             Args : Optional, a true value to reverse the sort order
2982              
2983             =cut
2984              
2985             sub ladderize {
2986 2     2 1 7 my ( $self, $right ) = @_;
2987 2         5 my %child_count;
2988             $self->visit_depth_first(
2989             '-post' => sub {
2990 18     18   30 my $node = shift;
2991            
2992             # record the number of descendants for the focal
2993             # node. because this is a post-order traversal
2994             # we have already counted the children of the
2995             # children, recursively. bin nodes and tips in
2996             # separate containers.
2997 18         44 my $id = $node->get_id;
2998 18         33 my @children = @{ $node->get_children };
  18         39  
2999 18         29 my $count = 1;
3000 18         26 my ( @tips, @nodes );
3001 18         36 for my $child (@children) {
3002 16         40 $count += $child_count{ $child->get_id };
3003 16 100       48 if ( $child->is_terminal ) {
3004 10         85 push @tips, $child;
3005             }
3006             else {
3007 6         15 push @nodes, $child;
3008             }
3009             }
3010 18         42 $child_count{$id} = $count;
3011            
3012             # sort the immediate children. if these are
3013             # tips we will sort alphabetically by name (so
3014             # that cherries are sorted predictably), otherwise
3015             # sort by descendant count
3016 18         30 my @sorted;
3017            
3018 18 50       56 if ($right) {
3019 0         0 @sorted = map { $_->[0] }
3020 0         0 sort { $b->[1] <=> $a->[1] }
3021 0         0 map { [ $_, $child_count{ $_->get_id } ] } @nodes;
  0         0  
3022 0         0 push @sorted, sort { $b->get_name cmp $a->get_name } @tips;
  0         0  
3023             }
3024             else {
3025 6         15 @sorted = map { $_->[0] }
3026 0         0 sort { $a->[1] <=> $b->[1] }
3027 18         48 map { [ $_, $child_count{ $_->get_id } ] } @nodes;
  6         18  
3028 18         51 unshift @sorted, sort { $a->get_name cmp $b->get_name } @tips;
  2         18  
3029             }
3030              
3031             # apply the new sort order
3032 18         66 for my $i ( 0 .. $#sorted ) {
3033 16         46 $node->insert_at_index( $sorted[$i], $i );
3034             }
3035             }
3036 2         32 );
3037 2         71 return $self;
3038             }
3039              
3040             =item sort_tips()
3041              
3042             Sorts nodes in (an approximation of) the provided ordering. Given an array
3043             reference of taxa, an array reference of name strings, or a taxa object, this
3044             method attempts to order the tips in the same way. It does this by recursively
3045             computing the rank for all internal nodes by taking the average rank of its
3046             children. This results in the following orderings:
3047              
3048             (a,b,c,d,e,f); => $tree->sort_tips( [ qw(a c b f d e) ] ) => (a,c,b,f,d,e);
3049            
3050             (a,b,(c,d),e,f); => $tree->sort_tips( [ qw(a b e d c f) ] ); => (a,b,(e,(d,c)),f);
3051            
3052             ((a,b),((c,d),e),f); => $tree->sort_tips( [ qw(a e d c b f) ] ); => ((e,(d,c)),(a,b),f);
3053              
3054             Type : Tree manipulator
3055             Title : sort_tips
3056             Usage : $tree->sort_tips($ordering);
3057             Function: Sorts nodes
3058             Returns : The modified invocant.
3059             Args : Required, an array reference (or taxa object) whose ordering to match
3060              
3061             =cut
3062              
3063             sub sort_tips {
3064 4     4 1 20 my ( $self, $taxa ) = @_;
3065             my @taxa =
3066             UNIVERSAL::can( $taxa, 'get_entities' )
3067 0         0 ? @{ $taxa->get_entities }
3068 4 50       29 : @{$taxa};
  4         21  
3069             my @names =
3070 4 50       22 map { UNIVERSAL::can( $_, 'get_name' ) ? $_->get_name : $_ } @taxa;
  24         184  
3071 4         18 my $i = 1;
3072 4         15 my %rank = map { $_ => $i++ } @names;
  24         91  
3073             $self->visit_depth_first(
3074             '-post' => sub {
3075 34     34   88 my $node = shift;
3076 34         68 my @children = @{ $node->get_children };
  34         128  
3077 34 100       126 if (@children) {
3078 10         37 my @ranks = map { $_->get_generic('rank') } @children;
  30         127  
3079 10         80 my $sum = sum @ranks;
3080 10         48 my $mean = $sum / scalar(@ranks);
3081 10         59 $node->set_generic( 'rank' => $mean );
3082 10         91 $node->clear;
3083             $node->insert(
3084             sort {
3085 10         87 $a->get_generic('rank') <=> $b->get_generic('rank')
  32         137  
3086             } @children
3087             );
3088             }
3089             else {
3090 24         166 $node->set_generic( 'rank' => $rank{ $node->get_name } );
3091             }
3092             }
3093 4         75 );
3094 4         107 return $self->_analyze;
3095             }
3096              
3097             =item exponentiate()
3098              
3099             Raises branch lengths to argument.
3100              
3101             Type : Tree manipulator
3102             Title : exponentiate
3103             Usage : $tree->exponentiate($power);
3104             Function: Raises branch lengths to $power.
3105             Returns : The modified invocant.
3106             Args : A $power in any of perl's number formats.
3107              
3108             =cut
3109              
3110             sub exponentiate {
3111 0     0 1 0 my ( $tree, $power ) = @_;
3112 0 0       0 if ( !looks_like_number $power ) {
3113 0         0 throw 'BadNumber' => "Power \"$power\" is a bad number";
3114             }
3115             else {
3116 0         0 foreach my $node ( @{ $tree->get_entities } ) {
  0         0  
3117 0         0 my $bl = $node->get_branch_length;
3118 0         0 $node->set_branch_length( $bl**$power );
3119             }
3120             }
3121 0         0 return $tree;
3122             }
3123              
3124             =item multiply()
3125              
3126             Multiples branch lengths by argument.
3127              
3128             Type : Tree manipulator
3129             Title : multiply
3130             Usage : $tree->multiply($num);
3131             Function: Multiplies branch lengths by $num.
3132             Returns : The modified invocant.
3133             Args : A $number in any of perl's number formats.
3134              
3135             =cut
3136            
3137             sub multiply {
3138 0     0 1 0 my ( $tree, $num ) = @_;
3139 0 0       0 if ( !looks_like_number $num ) {
3140 0         0 throw 'BadNumber' => "Number '$num' is a bad number";
3141             }
3142             $tree->visit(sub{
3143 0     0   0 my $node = shift;
3144 0         0 my $length = $node->get_branch_length;
3145 0 0       0 if ( $length ) {
3146 0         0 $node->set_branch_length( $length * $num );
3147             }
3148 0         0 });
3149 0         0 return $tree;
3150             }
3151              
3152             =item log_transform()
3153              
3154             Log argument base transform branch lengths.
3155              
3156             Type : Tree manipulator
3157             Title : log_transform
3158             Usage : $tree->log_transform($base);
3159             Function: Log $base transforms branch lengths.
3160             Returns : The modified invocant.
3161             Args : A $base in any of perl's number formats.
3162              
3163             =cut
3164              
3165             sub log_transform {
3166 0     0 1 0 my ( $tree, $base ) = @_;
3167 0 0       0 if ( !looks_like_number $base ) {
3168 0         0 throw 'BadNumber' => "Base \"$base\" is a bad number";
3169             }
3170             else {
3171 0         0 foreach my $node ( @{ $tree->get_entities } ) {
  0         0  
3172 0         0 my $bl = $node->get_branch_length;
3173 0         0 my $newbl;
3174 0         0 eval { $newbl = ( log $bl ) / ( log $base ); };
  0         0  
3175 0 0       0 if ($@) {
3176 0         0 throw 'OutOfBounds' =>
3177             "Invalid input for log transform: $@";
3178             }
3179             else {
3180 0         0 $node->set_branch_length($newbl);
3181             }
3182             }
3183             }
3184 0         0 return $tree;
3185             }
3186              
3187             =item remove_unbranched_internals()
3188              
3189             Collapses internal nodes with fewer than 2 children.
3190              
3191             Type : Tree manipulator
3192             Title : remove_unbranched_internals
3193             Usage : $tree->remove_unbranched_internals;
3194             Function: Collapses internal nodes
3195             with fewer than 2 children.
3196             Returns : The modified invocant.
3197             Args : NONE
3198             Comments:
3199              
3200             =cut
3201              
3202             sub remove_unbranched_internals {
3203 6     6 1 29 my $self = shift;
3204 6         11 my @delete;
3205             $self->visit_depth_first(
3206             '-post' => sub {
3207 1450     1450   1759 my $node = shift;
3208 1450         1647 my @children = @{ $node->get_children };
  1450         2242  
3209            
3210             # the node is interior, now need to check for each child
3211             # if it's interior as well
3212 1450 100       3223 if ( @children ) {
3213            
3214             # special case for the root with unbranched child
3215 717 50 66     1408 if ( $node->is_root and 1 == @children ) {
3216 0         0 my ($child) = @children;
3217 0         0 for my $gchild ( @{ $child->get_children } ) {
  0         0  
3218            
3219             # compute the new branch length for $gchild
3220 0         0 my $clength = $child->get_branch_length;
3221 0         0 my $glength = $gchild->get_branch_length;
3222 0 0       0 my $length = $clength if defined $clength;
3223 0 0       0 $length += $glength if defined $glength;
3224 0 0       0 $gchild->set_branch_length($length) if defined $length;
3225            
3226             # connect grandchild to root
3227 0         0 $gchild->set_parent($node);
3228 0         0 $node->delete($child);
3229            
3230             # will delete these nodes from the tree array
3231             # after the recursion
3232 0         0 push @delete, $child;
3233             }
3234             }
3235             else {
3236            
3237             # iterate over children
3238 717         1052 for my $child ( @children ) {
3239 1491         2781 my $child_name = $child->get_name;
3240 1491         1811 my @grandchildren = @{ $child->get_children };
  1491         2559  
3241            
3242             # $child is an unbranched internal, so $grandchildren[0]
3243             # needs to be connected to $node
3244 1491 100       3955 if ( 1 == scalar @grandchildren ) {
3245 1         2 my $gchild = $grandchildren[0];
3246            
3247             # compute the new branch length for $gchild
3248 1         3 my $clength = $child->get_branch_length;
3249 1         3 my $glength = $gchild->get_branch_length;
3250 1 50       3 my $length = $clength if defined $clength;
3251 1 50       4 $length += $glength if defined $glength;
3252 1 50       3 $gchild->set_branch_length($length) if defined $length;
3253            
3254 1         5 $gchild->set_parent($node);
3255 1         4 $node->delete($child);
3256            
3257             # will delete these nodes from the tree array
3258             # after the recursion
3259 1         5 push @delete, $child;
3260             }
3261             }
3262             }
3263             }
3264             }
3265 6         67 );
3266 6         82 $self->delete($_) for @delete;
3267 6         25 return $self;
3268             }
3269              
3270             =item remove_orphans()
3271              
3272             Removes all unconnected nodes.
3273              
3274             Type : Tree manipulator
3275             Title : remove_orphans
3276             Usage : $tree->remove_orphans;
3277             Function: Removes all unconnected nodes
3278             Returns : The modified invocant.
3279             Args : NONE
3280             Comments:
3281              
3282             =cut
3283              
3284             sub remove_orphans {
3285 0     0 1 0 my $self = shift;
3286            
3287             # collect all nodes that are topologically connected
3288 0         0 my %seen;
3289             $self->visit_depth_first(
3290             '-pre' => sub {
3291 0     0   0 $seen{ shift->get_id }++;
3292             }
3293 0         0 );
3294            
3295             # collect all nodes
3296 0         0 my @delete;
3297             $self->visit(sub {
3298 0     0   0 my $node = shift;
3299 0 0       0 push @delete, $node if not $seen{$node->get_id};
3300 0         0 });
3301 0         0 $self->delete($_) for @delete;
3302            
3303             # notify user
3304 0 0       0 if ( scalar @delete ) {
3305 0         0 $logger->warn("deleted ".scalar(@delete)." orphaned nodes");
3306             }
3307            
3308 0         0 return $self;
3309             }
3310              
3311             =item deroot()
3312              
3313             Collapses one of the children of a basal bifurcation
3314              
3315             Type : Tree manipulator
3316             Title : deroot
3317             Usage : $tree->deroot;
3318             Function: Removes root
3319             Returns : The modified invocant.
3320             Args : Optional: node to collapse
3321             Comments:
3322              
3323             =cut
3324              
3325             sub deroot {
3326 0     0 1 0 my ($self,$collapsible) = @_;
3327 0         0 my $root = $self->get_root;
3328 0         0 my @children = @{ $root->get_children };
  0         0  
3329 0 0       0 if ( scalar @children < 3 ) {
3330 0 0       0 if ( not $collapsible) {
3331 0         0 ($collapsible) = grep { $_->is_internal } @children;
  0         0  
3332             }
3333 0         0 $collapsible->collapse;
3334 0         0 return $self;
3335             }
3336             else {
3337 0         0 return $self;
3338             }
3339             }
3340              
3341             =back
3342              
3343             =head2 UTILITY METHODS
3344              
3345             =over
3346              
3347             =item clone()
3348              
3349             Clones invocant.
3350              
3351             Type : Utility method
3352             Title : clone
3353             Usage : my $clone = $object->clone;
3354             Function: Creates a copy of the invocant object.
3355             Returns : A copy of the invocant.
3356             Args : Optional: a hash of code references to
3357             override reflection-based getter/setter copying
3358              
3359             my $clone = $object->clone(
3360             'set_forest' => sub {
3361             my ( $self, $clone ) = @_;
3362             for my $forest ( @{ $self->get_forests } ) {
3363             $clone->set_forest( $forest );
3364             }
3365             },
3366             'set_matrix' => sub {
3367             my ( $self, $clone ) = @_;
3368             for my $matrix ( @{ $self->get_matrices } ) {
3369             $clone->set_matrix( $matrix );
3370             }
3371             );
3372              
3373             Comments: Cloning is currently experimental, use with caution.
3374             It works on the assumption that the output of get_foo
3375             called on the invocant is to be provided as argument
3376             to set_foo on the clone - such as
3377             $clone->set_name( $self->get_name ). Sometimes this
3378             doesn't work, for example where this symmetry doesn't
3379             exist, or where the return value of get_foo isn't valid
3380             input for set_foo. If such a copy fails, a warning is
3381             emitted. To make sure all relevant attributes are copied
3382             into the clone, additional code references can be
3383             provided, as in the example above. Typically, this is
3384             done by overrides of this method in child classes.
3385              
3386             =cut
3387              
3388             sub clone {
3389 2     2 1 11 my $self = shift;
3390 2         9 $logger->info("cloning $self");
3391 2         5 my %subs = @_;
3392              
3393             # override, because we'll handle insert
3394 2     0   10 $subs{'set_root'} = sub { };
3395 2     0   7 $subs{'set_root_node'} = sub { };
3396              
3397             # we'll clone node objects, so no raw copying
3398             $subs{'insert'} = sub {
3399 0     0   0 my ( $self, $clone ) = @_;
3400 0         0 my %clone_of;
3401 0         0 for my $node ( @{ $self->get_entities } ) {
  0         0  
3402 0         0 my $cloned_node = $node->clone;
3403 0         0 $clone_of{ $node->get_id } = $cloned_node;
3404 0         0 $clone->insert($cloned_node);
3405             }
3406 0         0 for my $node ( @{ $self->get_entities } ) {
  0         0  
3407 0         0 my $cloned_node = $clone_of{ $node->get_id };
3408 0 0       0 if ( my $parent = $node->get_parent ) {
3409 0         0 my $cloned_parent_node = $clone_of{ $parent->get_id };
3410 0         0 $cloned_node->set_parent($cloned_parent_node);
3411             }
3412             }
3413 2         9 };
3414 2         11 return $self->SUPER::clone(%subs);
3415             }
3416              
3417             =back
3418              
3419             =head2 SERIALIZERS
3420              
3421             =over
3422              
3423             =item to_nexus()
3424              
3425             Serializes invocant to nexus string.
3426              
3427             Type : Stringifier
3428             Title : to_nexus
3429             Usage : my $string = $tree->to_nexus;
3430             Function: Turns the invocant tree object
3431             into a nexus string
3432             Returns : SCALAR
3433             Args : Any arguments that can be passed to Bio::Phylo::Forest::to_nexus
3434              
3435             =cut
3436              
3437             sub to_nexus {
3438 0     0 1 0 my $self = shift;
3439 0         0 my $forest = $fac->create_forest;
3440 0         0 $forest->insert($self);
3441 0         0 return $forest->to_nexus(@_);
3442             }
3443              
3444             =item to_newick()
3445              
3446             Serializes invocant to newick string.
3447              
3448             Type : Stringifier
3449             Title : to_newick
3450             Usage : my $string = $tree->to_newick;
3451             Function: Turns the invocant tree object
3452             into a newick string
3453             Returns : SCALAR
3454             Args : NONE
3455              
3456             =cut
3457              
3458             sub to_newick {
3459 25     25 1 70 my $self = shift;
3460 25         90 my %args = @_;
3461 25         212 my $newick = unparse( '-format' => 'newick', '-phylo' => $self, %args );
3462 25         340 return $newick;
3463             }
3464              
3465             =item to_xml()
3466              
3467             Serializes invocant to xml.
3468              
3469             Type : Serializer
3470             Title : to_xml
3471             Usage : my $xml = $obj->to_xml;
3472             Function: Turns the invocant object into an XML string.
3473             Returns : SCALAR
3474             Args : NONE
3475              
3476             =cut
3477              
3478             sub to_xml {
3479 0     0 1 0 my $self = shift;
3480 0         0 my $xsi_type = 'nex:IntTree';
3481 0         0 for my $node ( @{ $self->get_entities } ) {
  0         0  
3482 0         0 my $length = $node->get_branch_length;
3483 0 0 0     0 if ( defined $length and $length !~ /^[+-]?\d+$/ ) {
3484 0         0 $xsi_type = 'nex:FloatTree';
3485             }
3486             }
3487 0         0 $self->set_attributes( 'xsi:type' => $xsi_type );
3488 0         0 my $xml = $self->get_xml_tag;
3489 0 0       0 if ( my $root = $self->get_root ) {
3490 0         0 $xml .= $root->to_xml;
3491             }
3492 0         0 $xml .= $self->sets_to_xml . sprintf('', $self->get_tag);
3493 0         0 return $xml;
3494             }
3495              
3496             =item to_svg()
3497              
3498             Serializes invocant to SVG.
3499              
3500             Type : Serializer
3501             Title : to_svg
3502             Usage : my $svg = $obj->to_svg;
3503             Function: Turns the invocant object into an SVG string.
3504             Returns : SCALAR
3505             Args : Same args as the Bio::Phylo::Treedrawer constructor
3506             Notes : This will only work if you have the SVG module
3507             from CPAN installed on your system.
3508              
3509             =cut
3510              
3511             sub to_svg {
3512 0     0 1 0 my $self = shift;
3513 0         0 my $drawer = $fac->create_drawer(@_);
3514 0         0 $drawer->set_tree($self);
3515 0         0 return $drawer->draw;
3516             }
3517              
3518             =item to_dom()
3519              
3520             Type : Serializer
3521             Title : to_dom
3522             Usage : $tree->to_dom($dom)
3523             Function: Generates a DOM subtree from the invocant
3524             and its contained objects
3525             Returns : an Element object
3526             Args : DOM factory object
3527              
3528             =cut
3529              
3530             sub to_dom {
3531 0     0 1 0 my ( $self, $dom ) = @_;
3532 0   0     0 $dom ||= $Bio::Phylo::NeXML::DOM::DOM;
3533 0 0       0 unless ( looks_like_object $dom, _DOMCREATOR_ ) {
3534 0         0 throw 'BadArgs' => 'DOM factory object not provided';
3535             }
3536 0         0 my $xsi_type = 'nex:IntTree';
3537 0         0 for my $node ( @{ $self->get_entities } ) {
  0         0  
3538 0         0 my $length = $node->get_branch_length;
3539 0 0 0     0 if ( defined $length and $length !~ /^[+-]?\d+$/ ) {
3540 0         0 $xsi_type = 'nex:FloatTree';
3541             }
3542             }
3543 0         0 $self->set_attributes( 'xsi:type' => $xsi_type );
3544 0         0 my $elt = $self->get_dom_elt($dom);
3545 0 0       0 if ( my $root = $self->get_root ) {
3546 0         0 $elt->set_child($_) for $root->to_dom($dom);
3547             }
3548 0         0 return $elt;
3549             }
3550              
3551             =begin comment
3552              
3553             Type : Internal method
3554             Title : _consolidate
3555             Usage : $tree->_consolidate;
3556             Function: Does pre-order traversal, only keeps
3557             nodes seen during traversal in tree,
3558             in order of traversal
3559             Returns :
3560             Args :
3561              
3562             =end comment
3563              
3564             =cut
3565              
3566             sub _consolidate {
3567 0     0   0 my $self = shift;
3568 0         0 my @nodes;
3569 0     0   0 $self->visit_depth_first( '-pre' => sub { push @nodes, shift } );
  0         0  
3570 0         0 $self->clear;
3571 0         0 $self->insert(@nodes);
3572             }
3573              
3574             =begin comment
3575              
3576             Type : Internal method
3577             Title : _container
3578             Usage : $tree->_container;
3579             Function:
3580             Returns : CONSTANT
3581             Args :
3582              
3583             =end comment
3584              
3585             =cut
3586              
3587 451     451   840 sub _container { $CONTAINER_CONSTANT }
3588              
3589             =begin comment
3590              
3591             Type : Internal method
3592             Title : _type
3593             Usage : $tree->_type;
3594             Function:
3595             Returns : CONSTANT
3596             Args :
3597              
3598             =end comment
3599              
3600             =cut
3601              
3602 25353     25353   41469 sub _type { $TYPE_CONSTANT }
3603 2     2   6 sub _tag { 'tree' }
3604              
3605             =back
3606              
3607             =cut
3608              
3609             # podinherit_insert_token
3610              
3611             =head1 SEE ALSO
3612              
3613             There is a mailing list at L
3614             for any user or developer questions and discussions.
3615              
3616             =over
3617              
3618             =item L
3619              
3620             The L object inherits from
3621             the L object, so the methods defined
3622             therein also apply to trees.
3623              
3624             =item L
3625              
3626             Also see the manual: L and L.
3627              
3628             =back
3629              
3630             =head1 CITATION
3631              
3632             If you use Bio::Phylo in published research, please cite it:
3633              
3634             B, B, B, B
3635             and B, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
3636             I B<12>:63.
3637             L
3638              
3639             =cut
3640              
3641             }
3642             1;
3643             __DATA__