File Coverage

blib/lib/Bio/Phylo/Forest/TreeRole.pm
Criterion Covered Total %
statement 642 1422 45.1
branch 152 398 38.1
condition 46 110 41.8
subroutine 79 146 54.1
pod 77 92 83.7
total 996 2168 45.9


line stmt bran cond sub pod time code
1             package Bio::Phylo::Forest::TreeRole;
2 31     31   202 use strict;
  31         64  
  31         777  
3 31     31   133 use warnings;
  31         56  
  31         606  
4 31     31   1176 use Bio::Phylo::Util::MOP;
  31         59  
  31         222  
5 31     31   142 use base 'Bio::Phylo::Listable';
  31         77  
  31         4418  
6 31     31   192 use Bio::Phylo::Util::Exceptions 'throw';
  31         57  
  31         1398  
7 31     31   176 use Bio::Phylo::Util::CONSTANT qw'/looks_like/ :objecttypes';
  31         63  
  31         7763  
8 31     31   6869 use Bio::Phylo::Util::OptionalInterface 'Bio::Tree::TreeI';
  31         76  
  31         157  
9 31     31   7885 use Bio::Phylo::Forest::Node;
  31         77  
  31         265  
10 31     31   171 use Bio::Phylo::IO 'unparse';
  31         58  
  31         1242  
11 31     31   157 use Bio::Phylo::Factory;
  31         56  
  31         124  
12 31     31   126 use Scalar::Util 'blessed';
  31         53  
  31         1083  
13 31     31   150 use List::Util qw'sum shuffle';
  31         61  
  31         7741  
14             my $LOADED_WRAPPERS = 0;
15             {
16             my $logger = __PACKAGE__->get_logger;
17             my ( $TYPE_CONSTANT, $CONTAINER_CONSTANT ) = ( _TREE_, _FOREST_ );
18             my $fac = Bio::Phylo::Factory->new;
19             my %default_constructor_args = (
20             '-listener' => sub {
21             my ( $self, $method, @args ) = @_;
22             for my $node (@args) {
23             if ( $method eq 'insert' ) {
24             $node->set_tree($self);
25             }
26             elsif ( $method eq 'delete' ) {
27             $node->set_tree();
28             }
29             elsif ( $method eq '_set_things' ) {
30             $_->set_tree($self) for @{ $node };
31             }
32             }
33             },
34             );
35              
36             =head1 NAME
37              
38             Bio::Phylo::Forest::TreeRole - Extra behaviours for a phylogenetic tree
39              
40             =head1 SYNOPSIS
41              
42             # some way to get a tree
43             use Bio::Phylo::IO;
44             my $string = '((A,B),C);';
45             my $forest = Bio::Phylo::IO->parse(
46             -format => 'newick',
47             -string => $string
48             );
49             my $tree = $forest->first;
50              
51             # do something:
52             print $tree->calc_imbalance;
53              
54             # prints "1"
55              
56             =head1 DESCRIPTION
57              
58             The object models a phylogenetic tree, a container of
59             L<Bio::Phylo::Forest::Node> objects. The tree object
60             inherits from L<Bio::Phylo::Listable>, so look there
61             for more methods.
62              
63             =head1 METHODS
64              
65             =head2 CONSTRUCTORS
66              
67             =over
68              
69             =item new()
70              
71             Tree constructor.
72              
73             Type : Constructor
74             Title : new
75             Usage : my $tree = Bio::Phylo::Forest::Tree->new;
76             Function: Instantiates a Bio::Phylo::Forest::Tree object.
77             Returns : A Bio::Phylo::Forest::Tree object.
78             Args : No required arguments.
79              
80             =cut
81              
82             sub new : Constructor {
83              
84             # could be child class
85 196     196 1 411 my $class = shift;
86              
87             # notify user
88 196         1284 $logger->info("constructor called for '$class'");
89 196 100       609 if ( not $LOADED_WRAPPERS ) {
90 29 0 0 0 0 53 eval do { local $/; <DATA> };
  29 0 0 0 0 134  
  29 0   0 0 33256  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0 0 0  
  0 0   0   0  
  0 0   0   0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0 0       0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
  0         0  
91 29         81 $LOADED_WRAPPERS++;
92             }
93              
94             # go up inheritance tree, eventually get an ID
95 196         1184 my $self = $class->SUPER::new( %default_constructor_args, @_ );
96 196         845 return $self;
97 31     31   187 }
  31         66  
  31         155  
98              
99             =item new_from_bioperl()
100              
101             Tree constructor from Bio::Tree::TreeI argument.
102              
103             Type : Constructor
104             Title : new_from_bioperl
105             Usage : my $tree =
106             Bio::Phylo::Forest::Tree->new_from_bioperl(
107             $bptree
108             );
109             Function: Instantiates a
110             Bio::Phylo::Forest::Tree object.
111             Returns : A Bio::Phylo::Forest::Tree object.
112             Args : A tree that implements Bio::Tree::TreeI
113              
114             =cut
115              
116             sub new_from_bioperl {
117 0     0 1 0 my ( $class, $bptree ) = @_;
118 0         0 my $self;
119 0 0 0     0 if ( blessed $bptree && $bptree->isa('Bio::Tree::TreeI') ) {
120 0         0 $self = $fac->create_tree;
121             # bless $self, $class;
122 0         0 $self = $self->_recurse( $bptree->get_root_node );
123              
124             # copy name
125 0         0 my $name = $bptree->id;
126 0 0       0 $self->set_name($name) if defined $name;
127              
128             # copy score
129 0         0 my $score = $bptree->score;
130 0 0       0 $self->set_score($score) if defined $score;
131             }
132             else {
133 0         0 throw 'ObjectMismatch' => 'Not a bioperl tree!';
134             }
135 0         0 return $self;
136             }
137              
138             =begin comment
139              
140             Type : Internal method
141             Title : _recurse
142             Usage : $tree->_recurse( $bpnode );
143             Function: Traverses a bioperl tree, instantiates a Bio::Phylo::Forest::Node
144             object for every Bio::Tree::NodeI object it encounters, copying
145             the parent, sibling and child relationships.
146             Returns : None (modifies invocant).
147             Args : A Bio::Tree::NodeI object.
148              
149             =end comment
150              
151             =cut
152              
153             sub _recurse {
154 0     0   0 my ( $self, $bpnode, $parent ) = @_;
155 0         0 my $node = Bio::Phylo::Forest::Node->new_from_bioperl($bpnode);
156 0 0       0 if ($parent) {
157 0         0 $parent->set_child($node);
158             }
159 0         0 $self->insert($node);
160 0         0 foreach my $bpchild ( $bpnode->each_Descendent ) {
161 0         0 $self->_recurse( $bpchild, $node );
162             }
163 0         0 return $self;
164             }
165              
166             =begin comment
167              
168             Type : Internal method
169             Title : _analyze
170             Usage : $tree->_analyze;
171             Function: Traverses the tree, creates references to first_daughter,
172             last_daughter, next_sister and previous_sister.
173             Returns : A Bio::Phylo::Forest::Tree object.
174             Args : none.
175             Comments: This method only looks at the parent, so theoretically
176             one could mess around with the
177             Bio::Phylo::Forest::Node::set_parent(Bio::Phylo::Forest::Node) method and
178             subsequently call Bio::Phylo::Forest::Tree::_analyze to overwrite old
179             (and wrong) child and sister references with new (and correct) ones.
180              
181             =end comment
182              
183             =cut
184              
185             sub _analyze {
186 9     9   21 my $tree = $_[0];
187 9         31 my $nodes = $tree->get_entities;
188 9         24 foreach ( @{$nodes} ) {
  9         27  
189 229         493 $_->set_next_sister();
190 229         451 $_->set_previous_sister();
191 229         451 $_->set_first_daughter();
192 229         386 $_->set_last_daughter();
193             }
194 9         22 my ( $first, $next );
195              
196             # mmmm... O(N^2)
197 9         17 NODE: for my $i ( 0 .. $#{$nodes} ) {
  9         24  
198 229         298 $first = $nodes->[$i];
199 229         288 for my $j ( ( $i + 1 ) .. $#{$nodes} ) {
  229         412  
200 2084         2618 $next = $nodes->[$j];
201 2084         3331 my ( $firstp, $nextp ) =
202             ( $first->get_parent, $next->get_parent );
203 2084 100 66     7059 if ( $firstp && $nextp && $firstp == $nextp ) {
      100        
204 115 100       234 if ( !$first->get_next_sister ) {
205 6         15 $first->set_next_sister($next);
206             }
207 115 100       234 if ( !$next->get_previous_sister ) {
208 6         13 $next->set_previous_sister($first);
209             }
210 115         223 next NODE;
211             }
212             }
213             }
214              
215             # O(N)
216 9         22 foreach ( @{$nodes} ) {
  9         24  
217 229         419 my $p = $_->get_parent;
218 229 100       427 if ($p) {
219 220 100       391 if ( !$_->get_next_sister ) {
220 105         254 $p->set_last_daughter($_);
221 105         185 next;
222             }
223 115 100       222 if ( !$_->get_previous_sister ) {
224 105         203 $p->set_first_daughter($_);
225             }
226             }
227             }
228 9         77 return $tree;
229             }
230              
231             =back
232              
233             =head2 QUERIES
234              
235             =over
236              
237             =item get_midpoint()
238              
239             Gets node that divides tree into two distance-balanced partitions.
240              
241             Type : Query
242             Title : get_midpoint
243             Usage : my $midpoint = $tree->get_midpoint;
244             Function: Gets node nearest to the middle of the longest path
245             Returns : A Bio::Phylo::Forest::Node object.
246             Args : NONE
247             Comments: This algorithm was ported from ETE.
248             It assumes the tree has branch lengths.
249              
250             =cut
251              
252             sub get_midpoint {
253 1     1 1 589 my $self = shift;
254 1         4 my $root = $self->get_root;
255 1         9 my $nA = $self->get_tallest_tip;
256 1         5 my $nB = $nA->get_farthest_node(1);
257 1 50       5 $logger->error("no farthest node!") unless $nB;
258 1         6 my $A2B_dist = $nA->calc_path_to_root + $nB->calc_path_to_root;
259 1         3 my $outgroup = $nA;
260 1         4 my $middist = $A2B_dist / 2;
261 1         2 my $cdist = 0;
262 1         3 my $current = $nA;
263 1         3 while ($current) {
264              
265 2 100       7 if ( $cdist > $middist ) {
266 1         3 last;
267             }
268             else {
269 1 50       4 if ( my $parent = $current->get_parent ) {
270 1         5 $cdist += $current->get_branch_length;
271 1         4 $current = $parent;
272             }
273             else {
274 0         0 last;
275             }
276             }
277             }
278 1         15 return $current;
279             }
280              
281             =item get_terminals()
282              
283             Get terminal nodes.
284              
285             Type : Query
286             Title : get_terminals
287             Usage : my @terminals = @{ $tree->get_terminals };
288             Function: Retrieves all terminal nodes in
289             the Bio::Phylo::Forest::Tree object.
290             Returns : An array reference of
291             Bio::Phylo::Forest::Node objects.
292             Args : NONE
293             Comments: If the tree is valid, this method
294             retrieves the same set of nodes as
295             $node->get_terminals($root). However,
296             because there is no recursion it may
297             be faster. Also, the node method by
298             the same name does not see orphans.
299              
300             =cut
301              
302             sub get_terminals {
303 151     151 1 15772 my $self = shift;
304 151         243 my @terminals;
305 151 100       490 if ( my $root = $self->get_root ) {
306             $root->visit_level_order(
307             sub {
308 4483     4483   5567 my $node = shift;
309 4483 100       8468 if ( $node->is_terminal ) {
310 2421         5098 push @terminals, $node;
311             }
312             }
313 121         1097 );
314             }
315             else {
316             $self->visit(
317             sub {
318 0     0   0 my $n = shift;
319 0 0       0 if ( $n->is_terminal ) {
320 0         0 push @terminals, $n;
321             }
322             }
323 30         163 );
324             }
325 151         1041 return \@terminals;
326             }
327              
328             =item get_internals()
329              
330             Get internal nodes.
331              
332             Type : Query
333             Title : get_internals
334             Usage : my @internals = @{ $tree->get_internals };
335             Function: Retrieves all internal nodes
336             in the Bio::Phylo::Forest::Tree object.
337             Returns : An array reference of
338             Bio::Phylo::Forest::Node objects.
339             Args : NONE
340             Comments: If the tree is valid, this method
341             retrieves the same set of nodes as
342             $node->get_internals($root). However,
343             because there is no recursion it may
344             be faster. Also, the node method by
345             the same name does not see orphans.
346              
347             =cut
348              
349             sub get_internals {
350 13     13 1 28 my $self = shift;
351 13         26 my @internals = grep { scalar @{ $_->get_children } } @{ $self->get_entities };
  184         242  
  184         362  
  13         47  
352 13         47 return \@internals;
353             }
354              
355             =item get_cherries()
356              
357             Get all cherries, i.e. nodes that have two terminal children
358              
359             Type : Query
360             Title : get_cherries
361             Usage : my @cherries = @{ $tree->get_cherries };
362             Function: Returns an array ref of cherries
363             Returns : ARRAY
364             Args : NONE
365              
366             =cut
367              
368             sub get_cherries {
369 1     1 1 14 my $self = shift;
370 1         2 my @cherries;
371 1         2 for my $node ( @{ $self->get_entities } ) {
  1         3  
372 10         13 my @children = @{ $node->get_children };
  10         18  
373            
374             # node has to be bifurcating
375 10 100       25 if ( scalar(@children) == 2 ) {
376            
377             # both children need to be tips
378 3 50 66     5 if ( not @{ $children[0]->get_children } and not @{ $children[1]->get_children } ) {
  3         8  
  2         5  
379 2         5 push @cherries, $node;
380             }
381             }
382             }
383 1         4 return \@cherries;
384             }
385              
386             =item get_all_rootings()
387              
388             Gets a forest of all rooted versions of the invocant tree.
389              
390             Type : Query
391             Title : get_all_rootings
392             Usage : my $forest = $tree->get_all_rootings;
393             Function: Returns an array ref of cherries
394             Returns : Bio::Phylo::Forest object
395             Args : NONE
396             Comments: This method assumes the invocant tree has a basal trichotomy.
397             "Rooted" trees with a basal bifurcation will give strange
398             results.
399              
400             =cut
401              
402             sub get_all_rootings {
403 0     0 1 0 my $self = shift;
404 0         0 my $forest = $fac->create_forest;
405            
406             # iterate over all nodes
407 0         0 my $i = 0;
408             $self->visit(sub{
409            
410             # clone the tree
411 0     0   0 my $clone = $self->clone;
412 0         0 my $node = $clone->get_by_index($i++);
413 0         0 my $anc = $node->get_ancestors;
414            
415             # create the new root if node isn't already root
416 0 0       0 if ( $anc->[0] ) {
417 0         0 my $nroot = $fac->create_node;
418 0         0 $nroot->set_child($node);
419 0         0 $clone->insert($nroot);
420 0 0       0 $anc->[0]->delete($node) if $anc->[0];
421            
422             # flip the nodes on the path to the root
423 0         0 for my $j ( 0 .. $#{ $anc } ) {
  0         0  
424 0         0 $nroot->set_child($anc->[$j]);
425 0         0 $nroot = $anc->[$j];
426             }
427 0         0 $forest->insert($clone);
428             }
429 0         0 });
430 0         0 return $forest;
431             }
432              
433            
434             =item get_root()
435              
436             Get root node.
437              
438             Type : Query
439             Title : get_root
440             Usage : my $root = $tree->get_root;
441             Function: Returns the root node.
442             Returns : Bio::Phylo::Forest::Node
443             Args : NONE
444              
445             =cut
446              
447             sub get_root {
448 1804     1804 1 83474 my $self = shift;
449            
450             # the simplest approach: look for nodes without parents
451 1804         2572 my ($root) = grep { ! $_->get_parent } @{ $self->get_entities };
  364674         608804  
  1804         4259  
452 1804 100       4059 if ( $root ) {
453 1745         4818 return $root;
454             }
455            
456             else {
457 59         78 my ( %children_of, %node_by_id );
458 59         65 for my $node ( @{ $self->get_entities } ) {
  59         108  
459 0         0 $node_by_id{ $node->get_id } = $node;
460 0 0       0 if ( my $parent = $node->get_parent ) {
461 0         0 my $parent_id = $parent->get_id;
462 0 0       0 $children_of{$parent_id} = [] if not $children_of{$parent_id};
463 0         0 push @{ $children_of{$parent_id} }, $node;
  0         0  
464             }
465             else {
466 0         0 return $node;
467             }
468             }
469 59         120 for my $parent ( keys %children_of ) {
470 0 0       0 if ( not exists $node_by_id{$parent} ) {
471 0         0 my @children = @{ $children_of{$parent} };
  0         0  
472 0 0       0 if ( scalar @children > 1 ) {
473 0         0 $logger->warn("Tree has multiple roots");
474             }
475 0         0 return shift @children;
476             }
477             }
478 59         158 return;
479             }
480             }
481              
482             =item get_ntax()
483              
484             Gets number of tips
485              
486             Type : Query
487             Title : get_ntax
488             Usage : my $ntax = $tree->get_ntax;
489             Function: Calculates the number of terminal nodes
490             Returns : Int
491             Args : NONE
492              
493             =cut
494            
495 0     0 1 0 sub get_ntax { scalar(@{ shift->get_terminals } ) }
  0         0  
496              
497             =item get_tallest_tip()
498              
499             Retrieves the node furthest from the root.
500              
501             Type : Query
502             Title : get_tallest_tip
503             Usage : my $tip = $tree->get_tallest_tip;
504             Function: Retrieves the node furthest from the
505             root in the current Bio::Phylo::Forest::Tree
506             object.
507             Returns : Bio::Phylo::Forest::Node
508             Args : NONE
509             Comments: If the tree has branch lengths, the tallest tip is
510             based on root-to-tip path length, else it is based
511             on number of nodes to root
512              
513             =cut
514              
515             sub get_tallest_tip {
516 3     3 1 6 my $self = shift;
517 3         4 my $criterion;
518              
519             # has (at least some) branch lengths
520 3 50       15 if ( $self->calc_tree_length ) {
521 3         6 $criterion = 'calc_path_to_root';
522             }
523             else {
524 0         0 $criterion = 'calc_nodes_to_root';
525             }
526 3         5 my $tallest;
527 3         5 my $height = 0;
528 3         6 for my $tip ( @{ $self->get_terminals } ) {
  3         12  
529 16 50       42 if ( my $path = $tip->$criterion ) {
530 16 100       38 if ( $path > $height ) {
531 4         6 $tallest = $tip;
532 4         9 $height = $path;
533             }
534             }
535             }
536 3         14 return $tallest;
537             }
538              
539             =item get_nodes_for_taxa()
540              
541             Gets node objects for the supplied taxon objects
542              
543             Type : Query
544             Title : get_nodes_for_taxa
545             Usage : my @nodes = @{ $tree->get_nodes_for_taxa(\@taxa) };
546             Function: Gets node objects for the supplied taxon objects
547             Returns : array ref of Bio::Phylo::Forest::Node objects
548             Args : A reference to an array of Bio::Phylo::Taxa::Taxon objects
549             or a Bio::Phylo::Taxa object
550              
551             =cut
552              
553             sub get_nodes_for_taxa {
554 5     5 1 9 my ( $self, $taxa ) = @_;
555 5         8 my ( $is_taxa, $taxa_objs );
556 5         8 eval { $is_taxa = looks_like_object $taxa, _TAXA_ };
  5         10  
557 5 100 66     17 if ( $is_taxa and not $@ ) {
558 1         3 $taxa_objs = $taxa->get_entities;
559             }
560             else {
561 4         5 $taxa_objs = $taxa;
562             }
563 5         7 my %ids = map { $_->get_id => 1 } @{$taxa_objs};
  11         22  
  5         10  
564 5         9 my @nodes;
565 5         6 for my $node ( @{ $self->get_entities } ) {
  5         12  
566 25 100       44 if ( my $taxon = $node->get_taxon ) {
567 15 100       39 push @nodes, $node if $ids{ $taxon->get_id };
568             }
569             }
570 5         13 return \@nodes;
571             }
572              
573             =item get_mrca()
574              
575             Get most recent common ancestor of argument nodes.
576              
577             Type : Query
578             Title : get_mrca
579             Usage : my $mrca = $tree->get_mrca(\@nodes);
580             Function: Retrieves the most recent
581             common ancestor of \@nodes
582             Returns : Bio::Phylo::Forest::Node
583             Args : A reference to an array of
584             Bio::Phylo::Forest::Node objects
585             in $tree.
586              
587             =cut
588              
589             sub get_mrca {
590 0     0 1 0 my ( $tree, $nodes ) = @_;
591 0 0 0     0 if ( not $nodes or not @{$nodes} ) {
    0          
592 0         0 return;
593             }
594 0         0 elsif ( scalar @{$nodes} == 1 ) {
595 0         0 return $nodes->[0];
596             }
597             else {
598 0         0 my $node1 = shift @{$nodes};
  0         0  
599 0         0 my $node2 = shift @{$nodes};
  0         0  
600 0         0 my $anc1 = $node1->get_ancestors;
601 0         0 my $anc2 = $node2->get_ancestors;
602 0         0 unshift @{$anc1}, $node1;
  0         0  
603 0         0 unshift @{$anc2}, $node2;
  0         0  
604 0         0 TRAVERSAL: for my $i ( 0 .. $#{$anc1} ) {
  0         0  
605 0         0 for my $j ( 0 .. $#{$anc2} ) {
  0         0  
606 0 0       0 if ( $anc1->[$i]->get_id == $anc2->[$j]->get_id ) {
607 0         0 unshift @{$nodes}, $anc1->[$i];
  0         0  
608 0         0 last TRAVERSAL;
609             }
610             }
611             }
612 0         0 return $tree->get_mrca($nodes);
613             }
614             }
615              
616             =back
617              
618             =head2 TESTS
619              
620             =over
621              
622             =item is_binary()
623              
624             Test if tree is bifurcating.
625              
626             Type : Test
627             Title : is_binary
628             Usage : if ( $tree->is_binary ) {
629             # do something
630             }
631             Function: Tests whether the invocant
632             object is bifurcating.
633             Returns : BOOLEAN
634             Args : NONE
635              
636             =cut
637              
638             sub is_binary {
639 2     2 1 3 my $self = shift;
640 2         5 my $return = 1;
641             $self->visit(sub{
642 33     33   35 my $count = scalar(@{ shift->get_children });
  33         62  
643 33 100 100     92 $return = 0 if $count != 0 && $count != 2;
644 2         13 });
645 2         12 $return;
646             }
647              
648             =item is_ultrametric()
649              
650             Test if tree is ultrametric.
651              
652             Type : Test
653             Title : is_ultrametric
654             Usage : if ( $tree->is_ultrametric(0.01) ) {
655             # do something
656             }
657             Function: Tests whether the invocant is
658             ultrametric.
659             Returns : BOOLEAN
660             Args : Optional margin between pairwise
661             comparisons (default = 0).
662             Comments: The test is done by performing
663             all pairwise comparisons for
664             root-to-tip path lengths. Since many
665             programs introduce rounding errors
666             in branch lengths the optional argument is
667             available to test TRUE for nearly
668             ultrametric trees. For example, a value
669             of 0.01 indicates that no pairwise
670             comparison may differ by more than 1%.
671             Note: behaviour is undefined for
672             negative branch lengths.
673              
674             =cut
675              
676             sub is_ultrametric {
677 44     44 1 7904 my $tree = shift;
678 44   100     135 my $margin = shift || 0;
679 44         89 my ( @tips, %path );
680             $tree->visit_depth_first(
681             '-pre' => sub {
682 840     840   1140 my $node = shift;
683 840 100       1556 if ( my $parent = $node->get_parent ) {
684             $path{ $node->get_id } =
685 796   100     1470 $path{ $parent->get_id } +
686             ( $node->get_branch_length || 0 );
687             }
688             else {
689 44   100     133 $path{ $node->get_id } = $node->get_branch_length || 0;
690             }
691 840 100       2183 push @tips, $node if $node->is_terminal;
692             }
693 44         357 );
694 44         347 for my $i ( 0 .. ( $#tips - 1 ) ) {
695 374         749 my $id1 = $tips[$i]->get_id;
696 374         712 PATH: for my $j ( $i + 1 .. $#tips ) {
697 2016         3847 my $id2 = $tips[$j]->get_id;
698 2016 100       3813 next PATH unless $path{$id2};
699 2001 100       4500 return 0 if abs( 1 - $path{$id1} / $path{$id2} ) > $margin;
700             }
701             }
702 40         247 return 1;
703             }
704              
705             =item is_monophyletic()
706              
707             Tests if first argument (node array ref) is monophyletic with respect
708             to second argument.
709              
710             Type : Test
711             Title : is_monophyletic
712             Usage : if ( $tree->is_monophyletic(\@tips, $node) ) {
713             # do something
714             }
715             Function: Tests whether the set of \@tips is
716             monophyletic w.r.t. $outgroup.
717             Returns : BOOLEAN
718             Args : A reference to a list of nodes, and a node.
719             Comments: This method is essentially the
720             same as
721             &Bio::Phylo::Forest::Node::is_outgroup_of.
722              
723             =cut
724              
725             sub is_monophyletic {
726 2     2 1 10 my $tree = shift;
727 2         3 my ( $nodes, $outgroup );
728 2 50       8 if ( @_ == 2 ) {
    0          
729 2         4 ( $nodes, $outgroup ) = @_;
730             }
731             elsif ( @_ == 4 ) {
732 0         0 my %args = @_;
733 0         0 $nodes = $args{'-nodes'};
734 0         0 $outgroup = $args{'-outgroup'};
735             }
736 2         3 for my $i ( 0 .. $#{$nodes} ) {
  2         7  
737 4         7 for my $j ( ( $i + 1 ) .. $#{$nodes} ) {
  4         11  
738 4         25 my $mrca = $nodes->[$i]->get_mrca( $nodes->[$j] );
739 4 100       14 return if $mrca->is_ancestor_of($outgroup);
740             }
741             }
742 1         8 return 1;
743             }
744              
745             =item is_paraphyletic()
746              
747             Type : Test
748             Title : is_paraphyletic
749             Usage : if ( $tree->is_paraphyletic(\@nodes,$node) ){ }
750             Function: Tests whether or not a given set of nodes are paraphyletic
751             (representing the full clade) given an outgroup
752             Returns : [-1,0,1] , -1 if the group is not monophyletic
753             0 if the group is not paraphyletic
754             1 if the group is paraphyletic
755             Args : Array ref of node objects which are in the tree,
756             Outgroup to compare the nodes to
757              
758             =cut
759              
760             sub is_paraphyletic {
761 0     0 1 0 my $tree = shift;
762 0         0 my ( $nodes, $outgroup );
763 0 0       0 if ( @_ == 2 ) {
    0          
764 0         0 ( $nodes, $outgroup ) = @_;
765             }
766             elsif ( @_ == 4 ) {
767 0         0 my %args = @_;
768 0         0 $nodes = $args{'-nodes'};
769 0         0 $outgroup = $args{'-outgroup'};
770             }
771 0 0       0 return -1 if !$tree->is_monophyletic( $nodes, $outgroup );
772 0         0 my @all = ( @{$nodes}, $outgroup );
  0         0  
773 0         0 my $mrca = $tree->get_mrca( \@all );
774 0         0 my $tips = $mrca->get_terminals;
775 0 0       0 return scalar @{$tips} == scalar @all ? 0 : 1;
  0         0  
776             }
777              
778             =item is_clade()
779              
780             Tests if argument (node array ref) forms a clade.
781              
782             Type : Test
783             Title : is_clade
784             Usage : if ( $tree->is_clade(\@tips) ) {
785             # do something
786             }
787             Function: Tests whether the set of
788             \@tips forms a clade
789             Returns : BOOLEAN
790             Args : A reference to an array of Bio::Phylo::Forest::Node objects, or a
791             reference to an array of Bio::Phylo::Taxa::Taxon objects, or a
792             Bio::Phylo::Taxa object
793             Comments:
794              
795             =cut
796              
797             sub is_clade {
798 3     3 1 5 my ( $tree, $arg ) = @_;
799 3         4 my ( $is_taxa, $is_node_array, $tips );
800              
801             # check if arg is a Taxa object
802 3         6 eval { $is_taxa = looks_like_object $arg, _TAXA_ };
  3         28  
803 3 50 33     8 if ( $is_taxa and not $@ ) {
804 0         0 $tips = $tree->get_nodes_for_taxa($arg);
805             }
806              
807             # check if arg is an array of Taxon object
808 3         6 eval { $is_node_array = looks_like_object $arg->[0], _TAXON_ };
  3         17  
809 3 50 33     12 if ( $is_node_array and not $@ ) {
810 3         7 $tips = $tree->get_nodes_for_taxa($arg);
811             }
812             else {
813 0         0 $tips = $arg; # arg is an array of Node objects
814             }
815 3         5 my $mrca;
816 3         4 for my $i ( 1 .. $#{$tips} ) {
  3         7  
817 3 50       13 $mrca ? $mrca = $mrca->get_mrca( $tips->[$i] ) : $mrca =
818             $tips->[0]->get_mrca( $tips->[$i] );
819             }
820 3 100       6 scalar @{ $mrca->get_terminals } == scalar @{$tips} ? return 1 : return;
  3         10  
  3         17  
821             }
822              
823             =item is_cladogram()
824              
825             Tests if tree is a cladogram (i.e. no branch lengths)
826              
827             Type : Test
828             Title : is_cladogram
829             Usage : if ( $tree->is_cladogram() ) {
830             # do something
831             }
832             Function: Tests whether the tree is a
833             cladogram (i.e. no branch lengths)
834             Returns : BOOLEAN
835             Args : NONE
836             Comments:
837              
838             =cut
839              
840             sub is_cladogram {
841 1     1 1 4 my $tree = shift;
842 1         2 for my $node ( @{ $tree->get_entities } ) {
  1         3  
843 0 0       0 return 0 if defined $node->get_branch_length;
844             }
845 1         4 return 1;
846             }
847              
848             =back
849              
850             =head2 CALCULATIONS
851              
852             =over
853              
854             =item calc_branch_length_distance()
855              
856             Calculates the Euclidean branch length distance between two trees. See
857             Kuhner & Felsenstein (1994). A simulation comparison of phylogeny algorithms
858             under equal and unequal evolutionary rates. MBE 11(3):459-468.
859              
860             Type : Calculation
861             Title : calc_branch_length_distance
862             Usage : my $distance =
863             $tree1->calc_branch_length_distance($tree2);
864             Function: Calculates the Euclidean branch length distance between two trees
865             Returns : SCALAR, number
866             Args : NONE
867              
868             =cut
869              
870             #=item calc_robinson_foulds_distance()
871             #
872             #Calculates the Robinson and Foulds distance between two trees.
873             #
874             # Type : Calculation
875             # Title : calc_robinson_foulds_distance
876             # Usage : my $distance =
877             # $tree1->calc_robinson_foulds_distance($tree2);
878             # Function: Calculates the Robinson and Foulds distance between two trees
879             # Returns : SCALAR, number
880             # Args : NONE
881             #
882             #=cut
883             #
884             # sub calc_robinson_foulds_distance {
885             # my ( $self, $other ) = @_;
886             # my $tuples = $self->_calc_branch_diffs($other);
887             # my $sum = 0;
888             # for my $tuple ( @{ $tuples } ) {
889             # my $diff = $tuple->[0] - $tuple->[1];
890             # $sum += abs $diff;
891             # }
892             # return $sum;
893             # }
894             sub calc_branch_length_distance {
895 1     1 1 4 my ( $self, $other ) = @_;
896 1         7 my $squared = $self->calc_branch_length_score($other);
897 1         8 return sqrt($squared);
898             }
899              
900             =item calc_branch_length_score()
901              
902             Calculates the squared Euclidean branch length distance between two trees.
903              
904             Type : Calculation
905             Title : calc_branch_length_score
906             Usage : my $score =
907             $tree1->calc_branch_length_score($tree2);
908             Function: Calculates the squared Euclidean branch
909             length distance between two trees
910             Returns : SCALAR, number
911             Args : A Bio::Phylo::Forest::Tree object,
912             Optional second argument flags that results should be normalized
913             =cut
914              
915             sub calc_branch_length_score {
916 2     2 1 6 my ( $self, $other, $normalize ) = @_;
917 2         14 my $tuples = $self->_calc_branch_diffs($other);
918 2         7 my $sum = 0;
919 2         5 for my $tuple ( @{$tuples} ) {
  2         5  
920 5   50     20 my $diff = ( $tuple->[0] || 0 ) - ( $tuple->[1] || 0 );
      50        
921 5         12 $sum += $diff**2;
922             }
923 2 50       16 return $normalize ? $sum / scalar(@{$tuples}) : $sum;
  0         0  
924             }
925              
926              
927             =begin comment
928              
929             Returns an array ref containing array references, with the first element of
930             each nested array ref representing the length of the branch subtending a
931             particular split on the invocant (or 0), the second element the length of the
932             same branch on argument (or 0), the third element a boolean to indicate whether
933             the split was present in both trees, and the fourth element a sorted, comma-separated
934             list of the MD5-hashed names of all tips subtended by that split.
935              
936             Type : Calculation
937             Title : calc_branch_diffs
938             Usage : my $triples =
939             $tree1->calc_branch_diffs($tree2);
940             Function: Creates two-dimensional array of equivalent branch lengths
941             Returns : Two-dimensional array (triples)
942             Args : NONE
943              
944             =end comment
945              
946             =cut
947              
948             sub _calc_branch_diffs {
949 23     23   57 my ( $self, $other ) = @_;
950              
951             # we create an anonymous subroutine which
952             # we will apply to $self and $other
953             my $length_for_split_creator = sub {
954              
955             # so this will be $self and $other
956 46     46   78 my $tree = shift;
957              
958             # keys will be hashed, comma-separated tip names,
959             # values will be branch lengths
960 46         87 my %length_for_split;
961              
962             # this will assemble the comma-separated,
963             # hashed tip names
964             my %hash_for_node;
965              
966             # post-order traversal, so tips are processed first
967             $tree->visit_depth_first(
968             '-post' => sub {
969 671         882 my $node = shift;
970 671         1116 my $id = $node->get_id;
971 671         908 my @children = @{ $node->get_children };
  671         1164  
972 671         953 my $hash;
973              
974             # we only enter into this case AFTER tips
975             # have been processed, so %hash_for_node
976             # values will be assigned for all children
977 671 100 100     1469 if (@children and $node->get_parent) {
978              
979             # these will be growing lists from
980             # tips to root
981             my $unsorted = join ',',
982 265         439 map { $hash_for_node{ $_->get_id } } @children;
  531         902  
983              
984             # we need to split, sort and join
985             # so that splits where the subtended,
986             # higher topology is different still
987             # yield the same concatenated hash
988 265         1134 $hash = join ',', sort { $a cmp $b } split /,/,
  3051         3809  
989             $unsorted;
990              
991             # coerce to a numeric type
992 265   100     658 $length_for_split{$hash} = $node->get_branch_length || 0;
993             }
994             else {
995              
996             # this is how we ensure that every tip name is a
997             # single, unique string without unexpected characters
998             # (especially, commas).
999             # Digest::MD5 was in CORE since 5.7
1000 406         1544 require Digest::MD5;
1001 406         913 $hash = Digest::MD5::md5( $node->get_name );
1002             }
1003              
1004             # store for the next recursion
1005 671         2572 $hash_for_node{$id} = $hash;
1006             }
1007 46         319 );
1008              
1009             # this is the return value for the anonymous sub
1010 46         568 return %length_for_split;
1011 23         131 };
1012              
1013             # here we execute the anonymous sub. twice.
1014 23         58 my %lengths_self = $length_for_split_creator->($self);
1015 23         70 my %lengths_other = $length_for_split_creator->($other);
1016 23         53 my @tuples;
1017              
1018             # first visit the splits in $self, which will identify
1019             # those it shares with $other and those missing in $other
1020 23         83 for my $split ( keys %lengths_self ) {
1021 133         153 my $tuple;
1022 133 100       211 if ( exists $lengths_other{$split} ) {
1023             $tuple =
1024 128   100     316 [ $lengths_self{$split}, $lengths_other{$split} || 0, 1, $split ];
1025             }
1026             else {
1027 5         12 $tuple = [ $lengths_self{$split}, 0, 0, $split ];
1028             }
1029 133         229 push @tuples, $tuple;
1030             }
1031              
1032             # then check if there are splits in $other but not in $self
1033 23         72 for my $split ( keys %lengths_other ) {
1034 132 100       238 if ( not exists $lengths_self{$split} ) {
1035 4         12 push @tuples, [ 0, $lengths_other{$split}, 0, $split ];
1036             }
1037             }
1038 23         144 return \@tuples;
1039             }
1040              
1041             =item calc_tree_length()
1042              
1043             Calculates the sum of all branch lengths.
1044              
1045             Type : Calculation
1046             Title : calc_tree_length
1047             Usage : my $tree_length =
1048             $tree->calc_tree_length;
1049             Function: Calculates the sum of all branch
1050             lengths (i.e. the tree length).
1051             Returns : FLOAT
1052             Args : NONE
1053              
1054             =cut
1055              
1056             sub calc_tree_length {
1057 40     40 1 87 my $self = shift;
1058 40         62 my $tl = 0;
1059             $self->visit(sub{
1060 628   100 628   1030 $tl += shift->get_branch_length || 0;
1061 40         210 });
1062 40         229 return $tl;
1063             }
1064              
1065             =item calc_tree_height()
1066              
1067             Calculates the height of the tree.
1068              
1069             Type : Calculation
1070             Title : calc_tree_height
1071             Usage : my $tree_height =
1072             $tree->calc_tree_height;
1073             Function: Calculates the height
1074             of the tree.
1075             Returns : FLOAT
1076             Args : NONE
1077             Comments: For ultrametric trees this
1078             method returns the height, but
1079             this is done by averaging over
1080             all root-to-tip path lengths, so
1081             for additive trees the result
1082             should consequently be interpreted
1083             differently.
1084              
1085             =cut
1086              
1087             sub calc_tree_height {
1088 3     3 1 11 my $self = shift;
1089 3         17 my $th = $self->calc_total_paths / $self->calc_number_of_terminals;
1090 3         17 return $th;
1091             }
1092              
1093             =item calc_number_of_nodes()
1094              
1095             Calculates the number of nodes.
1096              
1097             Type : Calculation
1098             Title : calc_number_of_nodes
1099             Usage : my $number_of_nodes =
1100             $tree->calc_number_of_nodes;
1101             Function: Calculates the number of
1102             nodes (internals AND terminals).
1103             Returns : INT
1104             Args : NONE
1105              
1106             =cut
1107              
1108             sub calc_number_of_nodes {
1109 1     1 1 3 my $self = shift;
1110 1         2 my $numnodes = scalar @{ $self->get_entities };
  1         4  
1111 1         5 return $numnodes;
1112             }
1113              
1114             =item calc_number_of_terminals()
1115              
1116             Calculates the number of terminal nodes.
1117              
1118             Type : Calculation
1119             Title : calc_number_of_terminals
1120             Usage : my $number_of_terminals =
1121             $tree->calc_number_of_terminals;
1122             Function: Calculates the number
1123             of terminal nodes.
1124             Returns : INT
1125             Args : NONE
1126              
1127             =cut
1128              
1129             sub calc_number_of_terminals {
1130 43     43 1 62 my $self = shift;
1131 43         68 my $numterm = scalar @{ $self->get_terminals };
  43         104  
1132 43         127 return $numterm;
1133             }
1134              
1135             =item calc_number_of_internals()
1136              
1137             Calculates the number of internal nodes.
1138              
1139             Type : Calculation
1140             Title : calc_number_of_internals
1141             Usage : my $number_of_internals =
1142             $tree->calc_number_of_internals;
1143             Function: Calculates the number
1144             of internal nodes.
1145             Returns : INT
1146             Args : NONE
1147              
1148             =cut
1149              
1150             sub calc_number_of_internals {
1151 5     5 1 11 my $self = shift;
1152 5         10 my $numint = scalar @{ $self->get_internals };
  5         25  
1153 5         27 return $numint;
1154             }
1155              
1156             =item calc_number_of_cherries()
1157              
1158             Calculates the number of cherries, i.e. the number of nodes that subtend
1159             exactly two tips. See for applications of this metric:
1160             L<http://dx.doi.org/10.1016/S0025-5564(99)00060-7>
1161              
1162             Type : Calculation
1163             Title : calc_number_of_cherries
1164             Usage : my $number_of_cherries =
1165             $tree->calc_number_of_cherries;
1166             Function: Calculates the number of cherries
1167             Returns : INT
1168             Args : NONE
1169              
1170             =cut
1171              
1172             sub calc_number_of_cherries {
1173 1     1 1 2 my $self = shift;
1174 1         2 my %cherry;
1175 1         2 for my $tip ( @{ $self->get_terminals } ) {
  1         10  
1176 4 50       11 if ( my $parent = $tip->get_parent ) {
1177 4 50       17 if ( $parent->is_preterminal ) {
1178 4         10 my $children = $parent->get_children;
1179 4 50       7 if ( scalar @{$children} == 2 ) {
  4         8  
1180 4         8 $cherry{ $parent->get_id }++;
1181             }
1182             }
1183             }
1184             }
1185 1         5 my @cherry_ids = keys %cherry;
1186 1         7 return scalar @cherry_ids;
1187             }
1188              
1189             =item calc_total_paths()
1190              
1191             Calculates the sum of all root-to-tip path lengths.
1192              
1193             Type : Calculation
1194             Title : calc_total_paths
1195             Usage : my $total_paths =
1196             $tree->calc_total_paths;
1197             Function: Calculates the sum of all
1198             root-to-tip path lengths.
1199             Returns : FLOAT
1200             Args : NONE
1201              
1202             =cut
1203              
1204             sub calc_total_paths {
1205 4     4 1 8 my $self = shift;
1206 4         10 my $tp = 0;
1207 4         8 foreach ( @{ $self->get_terminals } ) {
  4         18  
1208 36         96 $tp += $_->calc_path_to_root;
1209             }
1210 4         25 return $tp;
1211             }
1212              
1213             =item calc_redundancy()
1214              
1215             Calculates the amount of shared (redundant) history on the total.
1216              
1217             Type : Calculation
1218             Title : calc_redundancy
1219             Usage : my $redundancy =
1220             $tree->calc_redundancy;
1221             Function: Calculates the amount of shared
1222             (redundant) history on the total.
1223             Returns : FLOAT
1224             Args : NONE
1225             Comments: Redundancy is calculated as
1226             1 / ( treelength - height / ( ntax * height - height ) )
1227              
1228             =cut
1229              
1230             sub calc_redundancy {
1231 1     1 1 3 my $self = shift;
1232 1         5 my $tl = $self->calc_tree_length;
1233 1         6 my $th = $self->calc_tree_height;
1234 1         3 my $ntax = $self->calc_number_of_terminals;
1235 1         6 my $red = 1 - ( ( $tl - $th ) / ( ( $th * $ntax ) - $th ) );
1236 1         6 return $red;
1237             }
1238              
1239             =item calc_imbalance()
1240              
1241             Calculates Colless' coefficient of tree imbalance.
1242              
1243             Type : Calculation
1244             Title : calc_imbalance
1245             Usage : my $imbalance = $tree->calc_imbalance;
1246             Function: Calculates Colless' coefficient
1247             of tree imbalance.
1248             Returns : FLOAT
1249             Args : NONE
1250             Comments: As described in Colless, D.H., 1982.
1251             The theory and practice of phylogenetic
1252             systematics. Systematic Zoology 31(1): 100-104
1253              
1254             =cut
1255              
1256             sub calc_imbalance {
1257 6     6 1 17 my $self = shift;
1258 6         10 my %descendants;
1259 6         11 my $n = 0;
1260 6         11 my $sumdiff = 0;
1261             $self->visit_depth_first(
1262             '-post' => sub {
1263 119     119   165 my $node = shift;
1264 119         425 my @children = @{ $node->get_children };
  119         218  
1265            
1266             # node is internal, compute n descendants left and right
1267 119 100       244 if ( @children == 2 ) {
    100          
1268 55         72 my $li = shift @children;
1269 55         88 my $ri = shift @children;
1270 55         105 my $li_ndesc = $descendants{$li->get_id};
1271 55         99 my $ri_ndesc = $descendants{$ri->get_id};
1272 55         264 $sumdiff += abs($li_ndesc - $ri_ndesc);
1273 55         106 $descendants{$node->get_id} = $li_ndesc + $ri_ndesc;
1274             }
1275            
1276             # node is terminal, initialize tally of descendants
1277             elsif ( @children == 0 ) {
1278 63         89 $n++;
1279 63         116 $descendants{$node->get_id} = 1;
1280             }
1281            
1282             # node is either a polytomy or an unbranched internal. Can't proceed in either case.
1283             else {
1284 1         4 throw 'ObjectMismatch' => "Colless's imbalance only possible for binary trees";
1285             }
1286             }
1287 6         67 );
1288 5 50       38 if ( $n < 3 ) {
1289 0         0 $logger->error("too few nodes in tree: $n<=2");
1290 0         0 return undef;
1291             }
1292             else {
1293 5         160 return $sumdiff / ( ($n-1) * ($n-2) / 2 );
1294             }
1295             }
1296              
1297             =item calc_i2()
1298              
1299             Calculates I2 imbalance.
1300              
1301             Type : Calculation
1302             Title : calc_i2
1303             Usage : my $ci2 = $tree->calc_i2;
1304             Function: Calculates I2 imbalance.
1305             Returns : FLOAT
1306             Args : NONE
1307             Comments:
1308              
1309             =cut
1310              
1311             sub calc_i2 {
1312 0     0 1 0 my $self = shift;
1313 0         0 my ( $maxic, $sum, $I2 ) = ( 0, 0 );
1314 0 0       0 if ( !$self->is_binary ) {
1315 0         0 throw 'ObjectMismatch' => 'I2 imbalance only possible for binary trees';
1316             }
1317 0         0 my $numtips = $self->calc_number_of_terminals;
1318 0         0 $numtips -= 2;
1319 0         0 while ($numtips) {
1320 0         0 $maxic += $numtips;
1321 0         0 $numtips--;
1322             }
1323 0         0 foreach my $node ( @{ $self->get_internals } ) {
  0         0  
1324 0         0 my ( $fd, $ld, $ftips, $ltips ) =
1325             ( $node->get_first_daughter, $node->get_last_daughter, 0, 0 );
1326 0 0       0 if ( $fd->is_internal ) {
1327 0         0 foreach ( @{ $fd->get_descendants } ) {
  0         0  
1328 0 0       0 if ( $_->is_terminal ) {
1329 0         0 $ftips++;
1330             }
1331             else {
1332 0         0 next;
1333             }
1334             }
1335             }
1336             else {
1337 0         0 $ftips = 1;
1338             }
1339 0 0       0 if ( $ld->is_internal ) {
1340 0         0 foreach ( @{ $ld->get_descendants } ) {
  0         0  
1341 0 0       0 if ( $_->is_terminal ) {
1342 0         0 $ltips++;
1343             }
1344             else {
1345 0         0 next;
1346             }
1347             }
1348             }
1349             else {
1350 0         0 $ltips = 1;
1351             }
1352 0 0       0 next unless ( $ftips + $ltips - 2 );
1353 0         0 $sum += abs( $ftips - $ltips ) / abs( $ftips + $ltips - 2 );
1354             }
1355 0 0       0 if ( $maxic == 0 ) {
1356 0         0 $logger->error("too few nodes in tree: $maxic==0");
1357 0         0 return undef;
1358             }
1359             else {
1360 0         0 $I2 = $sum / $maxic;
1361 0         0 return $I2;
1362             }
1363             }
1364              
1365             =item calc_gamma()
1366              
1367             Calculates the Pybus & Harvey (2000) gamma statistic.
1368              
1369             Type : Calculation
1370             Title : calc_gamma
1371             Usage : my $gamma = $tree->calc_gamma();
1372             Function: Calculates the Pybus gamma statistic
1373             Returns : FLOAT
1374             Args : NONE
1375             Comments: As described in Pybus, O.G. and
1376             Harvey, P.H., 2000. Testing
1377             macro-evolutionary models using
1378             incomplete molecular phylogenies.
1379             Proc. R. Soc. Lond. B 267, 2267-2272
1380              
1381             =cut
1382              
1383             # code due to Aki Mimoto
1384             sub calc_gamma {
1385 0     0 1 0 my $self = shift;
1386 0         0 my $tl = $self->calc_tree_length;
1387 0         0 my $terminals = $self->get_terminals;
1388 0         0 my $n = scalar @{$terminals};
  0         0  
1389 0         0 my $height = $self->calc_tree_height;
1390              
1391             # Calculate the distance of each node to the root
1392             # my %soft_refs;
1393             # my $root = $self->get_root;
1394             # $soft_refs{$root} = 0;
1395             # my @nodes = $root;
1396             # while (@nodes) {
1397             # my $node = shift @nodes;
1398             # my $path_len = $soft_refs{$node} += $node->get_branch_length;
1399             # my $children = $node->get_children or next;
1400             # for my $child (@$children) {
1401             # $soft_refs{$child} = $path_len;
1402             # }
1403             # push @nodes, @{$children};
1404             # }
1405             # the commented out block is more efficiently implemented like so:
1406             my %soft_refs =
1407 0         0 map { $_ => $_->calc_path_to_root } @{ $self->get_entities };
  0         0  
  0         0  
1408              
1409             # Then, we know how far each node is from the root. At this point, we
1410             # can sort through and create the @g array
1411             my %node_spread =
1412 0         0 map { ( $_ => 1 ) } values %soft_refs; # remove duplicates
  0         0  
1413 0         0 my @sorted_nodes = sort { $a <=> $b } keys %node_spread;
  0         0  
1414 0         0 my $prev = 0;
1415 0         0 my @g;
1416 0         0 for my $length (@sorted_nodes) {
1417 0         0 push @g, $length - $prev;
1418 0         0 $prev = $length;
1419             }
1420 0         0 my $sum = 0;
1421 0         0 eval { require Math::BigFloat };
  0         0  
1422 0 0       0 if ($@) { # BigFloat is not available.
1423 0         0 for ( my $i = 2 ; $i < $n ; $i++ ) {
1424 0         0 for ( my $k = 2 ; $k <= $i ; $k++ ) {
1425 0         0 $sum += $k * $g[ $k - 1 ];
1426             }
1427             }
1428 0         0 my $numerator = ( $sum / ( $n - 2 ) ) - ( $tl / 2 );
1429 0         0 my $denominator = $tl * sqrt( 1 / ( 12 * ( $n - 2 ) ) );
1430 0         0 $self->_store_cache( $numerator / $denominator );
1431 0         0 return $numerator / $denominator;
1432             }
1433              
1434             # Big Float is available. We'll use it then
1435 0         0 $sum = Math::BigFloat->new(0);
1436 0         0 for ( my $i = 2 ; $i < $n ; $i++ ) {
1437 0         0 for ( my $k = 2 ; $k <= $i ; $k++ ) {
1438 0         0 $sum->badd( $k * $g[ $k - 1 ] );
1439             }
1440             }
1441 0         0 $sum->bdiv( $n - 2 );
1442 0         0 $sum->bsub( $tl / 2 );
1443 0         0 my $denominator = Math::BigFloat->new(1);
1444 0         0 $denominator->bdiv( 12 * ( $n - 2 ) );
1445 0         0 $denominator->bsqrt();
1446 0         0 $sum->bdiv( $denominator * $tl );
1447            
1448             # R seems to be unhappy about long numbers, so truncating
1449 0         0 $sum->accuracy(10);
1450 0         0 return $sum;
1451             }
1452              
1453             =item calc_fiala_stemminess()
1454              
1455             Calculates stemminess measure of Fiala and Sokal (1985).
1456              
1457             Type : Calculation
1458             Title : calc_fiala_stemminess
1459             Usage : my $fiala_stemminess =
1460             $tree->calc_fiala_stemminess;
1461             Function: Calculates stemminess measure
1462             Fiala and Sokal (1985).
1463             Returns : FLOAT
1464             Args : NONE
1465             Comments: As described in Fiala, K.L. and
1466             R.R. Sokal, 1985. Factors
1467             determining the accuracy of
1468             cladogram estimation: evaluation
1469             using computer simulation.
1470             Evolution, 39: 609-622
1471              
1472             =cut
1473              
1474             sub calc_fiala_stemminess {
1475 1     1 1 4 my $self = shift;
1476 1         2 my @internals = @{ $self->get_internals };
  1         5  
1477 1         4 my $total = 0;
1478 1         4 my $nnodes = ( scalar @internals - 1 );
1479 1         3 foreach my $node (@internals) {
1480 8 100       20 if ( $node->get_parent ) {
1481 7         15 my $desclengths = $node->get_branch_length;
1482 7         12 my @children = @{ $node->get_descendants };
  7         19  
1483 7         13 for my $child (@children) {
1484 44         77 $desclengths += $child->get_branch_length;
1485             }
1486 7         16 $total += ( $node->get_branch_length / $desclengths );
1487             }
1488             }
1489 1 50       4 if ( $nnodes ) {
1490 1         11 return $total /= $nnodes;
1491             }
1492             else {
1493 0         0 $logger->error("too few nodes in tree: n-1=$nnodes");
1494 0         0 return undef;
1495             }
1496             }
1497              
1498             =item calc_rohlf_stemminess()
1499              
1500             Calculates stemminess measure from Rohlf et al. (1990).
1501              
1502             Type : Calculation
1503             Title : calc_rohlf_stemminess
1504             Usage : my $rohlf_stemminess =
1505             $tree->calc_rohlf_stemminess;
1506             Function: Calculates stemminess measure
1507             from Rohlf et al. (1990).
1508             Returns : FLOAT
1509             Args : NONE
1510             Comments: As described in Rohlf, F.J.,
1511             W.S. Chang, R.R. Sokal, J. Kim,
1512             1990. Accuracy of estimated
1513             phylogenies: effects of tree
1514             topology and evolutionary model.
1515             Evolution, 44(6): 1671-1684
1516              
1517             =cut
1518              
1519             sub calc_rohlf_stemminess {
1520              
1521             # invocant is a tree
1522 3     3 1 10 my $self = shift;
1523 3 100       14 throw ObjectMismatch => "This algorithm isn't generalized to
1524             deal with multifurcations" if $self->calc_resolution < 1;
1525 2 100       13 throw ObjectMismatch => "This algorithm requires branch lengths"
1526             unless $self->calc_tree_length;
1527              
1528             # all internal nodes in the tree
1529 1         4 my @internals = @{ $self->get_internals };
  1         8  
1530              
1531             # all terminal nodes in the tree
1532 1         4 my @terminals = @{ $self->get_terminals };
  1         5  
1533              
1534             # this will become the sum of all STni
1535 1         3 my $total = 0;
1536              
1537             # 1/(t-2), by which we multiply total
1538 1         6 my $one_over_t_minus_two = 1 / ( scalar @terminals - 2 );
1539              
1540             # iterate over all nodes, as per equation (1)
1541 1         5 for my $node (@internals) {
1542              
1543             # only process nodes that aren't the root
1544 8 100       22 if ( my $parent = $node->get_parent ) {
1545              
1546             # Wj->i is defined as "the length of the edge
1547             # (in time units) between HTU i (a hypothetical
1548             # taxonomic unit, i.e. an internal node) and
1549             # its ancestor j"
1550 7         17 my $Wj_i = $node->get_branch_length;
1551              
1552             # hj is defined as "the 'height' of HTU j (the
1553             # time of its origin, a known quantity since we
1554             # know the true tree in these simulations)".
1555 7         17 my $hj = $parent->calc_path_to_root;
1556 7 100       14 if ( !$hj ) {
1557 2         5 next;
1558             }
1559              
1560             # as per equation (2) in Rohlf et al. (1990)
1561 5         11 $total += ( $Wj_i / $hj );
1562             }
1563             }
1564              
1565             # multiply by 1/(t-2) as per equation (1)
1566 1         9 return $one_over_t_minus_two * $total;
1567             }
1568              
1569             =item calc_resolution()
1570              
1571             Calculates tree resolution.
1572              
1573             Type : Calculation
1574             Title : calc_resolution
1575             Usage : my $resolution =
1576             $tree->calc_resolution;
1577             Function: Calculates the number
1578             of internal nodes over the
1579             total number of internal nodes
1580             on a fully bifurcating
1581             tree of the same size.
1582             Returns : FLOAT
1583             Args : NONE
1584              
1585             =cut
1586              
1587             sub calc_resolution {
1588 4     4 1 11 my $self = shift;
1589 4         20 my $res = $self->calc_number_of_internals /
1590             ( $self->calc_number_of_terminals - 1 );
1591 4         29 return $res;
1592             }
1593              
1594             =item calc_branching_times()
1595              
1596             Calculates cumulative branching times.
1597              
1598             Type : Calculation
1599             Title : calc_branching_times
1600             Usage : my $branching_times =
1601             $tree->calc_branching_times;
1602             Function: Returns a two-dimensional array.
1603             The first dimension consists of
1604             the "records", so that in the
1605             second dimension $AoA[$first][0]
1606             contains the internal node references,
1607             and $AoA[$first][1] the branching
1608             time of the internal node. The
1609             records are orderered from root to
1610             tips by time from the origin.
1611             Returns : SCALAR[][] or FALSE
1612             Args : NONE
1613              
1614             =cut
1615              
1616             sub calc_branching_times {
1617 6     6 1 14 my $self = shift;
1618 6         14 my @branching_times;
1619 6 100       31 if ( !$self->is_ultrametric(0.01) ) {
1620 1         5 throw 'ObjectMismatch' =>
1621             'tree isn\'t ultrametric, results would be meaningless';
1622             }
1623             else {
1624 5         13 my @temp;
1625 5         13 my $seen_tip = 0;
1626             $self->visit_depth_first(
1627             '-pre' => sub {
1628 125     125   205 my $node = shift;
1629 125 100 100     494 if ( not $seen_tip or $node->is_internal ) {
1630 65         180 my $bt = $node->get_branch_length;
1631 65 100       179 if ( my $parent = $node->get_parent ) {
1632 60         212 $bt += $parent->get_generic('bt');
1633             }
1634 65         281 $node->set_generic( 'bt' => $bt );
1635 65         190 push @temp, [ $node, $bt ];
1636 65 100       207 if ( $node->is_terminal ) {
1637 5         18 $seen_tip++;
1638             }
1639             }
1640             }
1641 5         61 );
1642 5         66 @branching_times = sort { $a->[1] <=> $b->[1] } @temp;
  178         305  
1643             }
1644 5         26 return \@branching_times;
1645             }
1646              
1647             =item calc_waiting_times()
1648              
1649             Calculates intervals between splits.
1650              
1651             Type : Calculation
1652             Title : calc_waiting_times
1653             Usage : my $waitings =
1654             $tree->calc_waiting_times;
1655             Function: Returns a two-dimensional array.
1656             The first dimension consists of
1657             the "records", so that in the
1658             second dimension $AoA[$first][0]
1659             contains the internal node references,
1660             and $AoA[$first][1] the waiting
1661             time of the internal node. The
1662             records are orderered from root to
1663             tips by time from the origin.
1664             Returns : SCALAR[][] or FALSE
1665             Args : NONE
1666              
1667             =cut
1668              
1669             sub calc_waiting_times {
1670 3     3 1 48 my $self = shift;
1671 3         18 my $times = $self->calc_branching_times;
1672 3         8 for ( my $i = $#{$times} ; $i > 0 ; $i-- ) {
  3         17  
1673 44         97 $times->[$i]->[1] -= $times->[ $i - 1 ]->[1];
1674             }
1675 3         16 return $times;
1676             }
1677              
1678             =item calc_node_ages()
1679              
1680             Calculates node ages.
1681              
1682             Type : Calculation
1683             Title : calc_node_ages
1684             Usage : $tree->calc_node_ages;
1685             Function: Calculates the age of all the nodes in the tree (i.e. the distance
1686             from the tips) and assigns these to the 'age' slot, such that,
1687             after calling this method, the age of any one node can be retrieved
1688             by calling $node->get_generic('age');
1689             Returns : The invocant
1690             Args : NONE
1691             Comments: This method computes, in a sense, the opposite of
1692             calc_branching_times: here, we compute the distance from the tips
1693             (i.e. how long ago the split occurred), whereas calc_branching_times
1694             calculates the distance from the root.
1695              
1696             =cut
1697              
1698             sub calc_node_ages {
1699 1     1 1 2 my $self = shift;
1700             $self->visit_depth_first(
1701             '-post' => sub {
1702 5     5   9 my $node = shift;
1703 5         10 my $age = 0;
1704 5 100       11 if ( my $child = $node->get_child(0) ) {
1705 2         7 $age =
1706             $child->get_generic('age') + $child->get_branch_length;
1707             }
1708 5         17 $node->set_generic( 'age' => $age );
1709             }
1710 1         10 );
1711 1         4 return $self;
1712             }
1713              
1714             =item calc_ltt()
1715              
1716             Calculates lineage-through-time data points.
1717              
1718             Type : Calculation
1719             Title : calc_ltt
1720             Usage : my $ltt = $tree->calc_ltt;
1721             Function: Returns a two-dimensional array.
1722             The first dimension consists of the
1723             "records", so that in the second
1724             dimension $AoA[$first][0] contains
1725             the internal node references, and
1726             $AoA[$first][1] the branching time
1727             of the internal node, and $AoA[$first][2]
1728             the cumulative number of lineages over
1729             time. The records are orderered from
1730             root to tips by time from the origin.
1731             Returns : SCALAR[][] or FALSE
1732             Args : NONE
1733              
1734             =cut
1735              
1736             sub calc_ltt {
1737 2     2 1 6 my $self = shift;
1738 2 100       10 if ( !$self->is_ultrametric(0.01) ) {
1739 1         7 throw 'ObjectMismatch' =>
1740             'tree isn\'t ultrametric, results are meaningless';
1741             }
1742 1         9 my $ltt = ( $self->calc_branching_times );
1743 1         5 my $lineages = 1;
1744 1         4 for my $i ( 0 .. $#{$ltt} ) {
  1         5  
1745 9         17 $lineages += ( scalar @{ $ltt->[$i][0]->get_children } - 1 );
  9         26  
1746 9         26 $ltt->[$i][2] = $lineages;
1747             }
1748 1         11 return $ltt;
1749             }
1750              
1751             =item calc_symdiff()
1752              
1753             Calculates the symmetric difference metric between invocant and argument. This
1754             metric is identical to the Robinson-Foulds tree comparison distance. See
1755             L<http://dx.doi.org/10.1016/0025-5564(81)90043-2>
1756              
1757             Type : Calculation
1758             Title : calc_symdiff
1759             Usage : my $symdiff =
1760             $tree->calc_symdiff($other_tree);
1761             Function: Returns the symmetric difference
1762             metric between $tree and $other_tree,
1763             sensu Penny and Hendy, 1985.
1764             Returns : SCALAR
1765             Args : A Bio::Phylo::Forest::Tree object,
1766             Optional second argument flags that results should be normalized
1767             Comments: Trees in comparison must span
1768             the same set of terminal taxa
1769             or results are meaningless.
1770              
1771             =cut
1772              
1773             sub calc_symdiff {
1774 21     21 1 99 my ( $tree, $other_tree, $normalize ) = @_;
1775 21         131 my $tuples = $tree->_calc_branch_diffs($other_tree);
1776 21         39 my $symdiff = 0;
1777             #use Data::Dumper;
1778             #warn Dumper($tuples);
1779 21         43 for my $tuple ( @{$tuples} ) {
  21         42  
1780 132 100       218 $symdiff++ unless $tuple->[2];
1781             }
1782 21 100       156 return $normalize ? $symdiff / scalar(@{$tuples}) : $symdiff;
  3         18  
1783             }
1784              
1785             =item calc_avtd()
1786              
1787             Calculates the average taxonomic distinctiveness. See
1788             Clarke KR, Warwick RM (1998) A taxonomic distinctness index and its statistical
1789             properties. J Appl Ecol 35:523-525
1790             L<http://dx.doi.org/10.1046/j.1365-2664.1998.3540523.x>
1791              
1792             Type : Calculation
1793             Title : calc_avtd
1794             Usage : my $avtd = $tree->calc_avtd;
1795             Function: Returns the average taxonomic distinctiveness
1796             Returns : SCALAR
1797             Args : A Bio::Phylo::Forest::Tree object
1798             Comments:
1799              
1800             =cut
1801              
1802             sub calc_avtd {
1803 0     0 1 0 my $tree = shift;
1804 0         0 my @tips = @{ $tree->get_terminals };
  0         0  
1805 0         0 my $dist = 0;
1806 0         0 for my $i ( 0 .. $#tips - 1 ) {
1807 0         0 for my $j ( $i + 1 .. $#tips ) {
1808 0         0 $dist += $tips[$i]->calc_patristic_distance($tips[$j]);
1809             }
1810             }
1811 0         0 return $dist / scalar(@tips);
1812             }
1813              
1814             =item calc_fp()
1815              
1816             Calculates the Fair Proportion value for each terminal.
1817              
1818             Type : Calculation
1819             Title : calc_fp
1820             Usage : my $fp = $tree->calc_fp();
1821             Function: Returns the Fair Proportion
1822             value for each terminal
1823             Returns : HASHREF
1824             Args : NONE
1825              
1826             =cut
1827              
1828             # code due to Aki Mimoto
1829             sub calc_fp {
1830 0     0 1 0 my $self = shift;
1831              
1832             # First establish how many children sit on each of the nodes
1833 0         0 my %weak_ref;
1834 0         0 my $terminals = $self->get_terminals;
1835 0         0 for my $terminal (@$terminals) {
1836 0         0 my $index = $terminal;
1837 0         0 do { $weak_ref{$index}++ } while ( $index = $index->get_parent );
  0         0  
1838             }
1839              
1840             # Then, assign each terminal a value
1841 0         0 my $fp = {};
1842 0         0 for my $terminal (@$terminals) {
1843 0         0 my $name = $terminal->get_name;
1844 0         0 my $fpi = 0;
1845 0         0 do {
1846             $fpi +=
1847 0   0     0 ( $terminal->get_branch_length || 0 ) / $weak_ref{$terminal};
1848             } while ( $terminal = $terminal->get_parent );
1849 0         0 $fp->{$name} = $fpi;
1850             }
1851 0         0 return $fp;
1852             }
1853              
1854             =item calc_fp_mean()
1855              
1856             Calculates the mean Fair Proportion value over all terminals.
1857              
1858             Type : Calculation
1859             Title : calc_fp_mean
1860             Usage : my $fp = $tree->calc_fp_mean();
1861             Function: Returns the mean Fair Proportion
1862             value over all terminals
1863             Returns : FLOAT
1864             Args : NONE
1865              
1866             =cut
1867            
1868             sub calc_fp_mean {
1869 0     0 1 0 my $self = shift;
1870 0         0 my $fp = $self->calc_fp;
1871 0         0 my @fp = values %{ $fp };
  0         0  
1872 0         0 return sum(@fp)/scalar(@fp);
1873             }
1874              
1875             =item calc_es()
1876              
1877             Calculates the Equal Splits value for each terminal
1878              
1879             Type : Calculation
1880             Title : calc_es
1881             Usage : my $es = $tree->calc_es();
1882             Function: Returns the Equal Splits value for each terminal
1883             Returns : HASHREF
1884             Args : NONE
1885              
1886             =cut
1887              
1888             # code due to Aki Mimoto
1889             sub calc_es {
1890 0     0 1 0 my $self = shift;
1891              
1892             # First establish how many children sit on each of the nodes
1893 0         0 my $terminals = $self->get_terminals;
1894 0         0 my $es = {};
1895 0         0 for my $terminal ( @{$terminals} ) {
  0         0  
1896 0         0 my $name = $terminal->get_name;
1897 0         0 my $esi = 0;
1898 0         0 my $divisor = 1;
1899 0         0 do {
1900 0   0     0 my $length = $terminal->get_branch_length || 0;
1901 0   0     0 my $children = $terminal->get_children || [];
1902 0   0     0 $divisor *= @$children || 1;
1903 0         0 $esi += $length / $divisor;
1904             } while ( $terminal = $terminal->get_parent );
1905 0         0 $es->{$name} = $esi;
1906             }
1907 0         0 return $es;
1908             }
1909              
1910             =item calc_es_mean()
1911              
1912             Calculates the mean Equal Splits value over all terminals
1913              
1914             Type : Calculation
1915             Title : calc_es_mean
1916             Usage : my $es = $tree->calc_es_mean();
1917             Function: Returns the Equal Splits value over all terminals
1918             Returns : FLOAT
1919             Args : NONE
1920              
1921             =cut
1922              
1923             sub calc_es_mean {
1924 0     0 1 0 my $self = shift;
1925 0         0 my $es = $self->calc_es;
1926 0         0 my @es = values %{ $es };
  0         0  
1927 0         0 return sum(@es)/scalar(@es);
1928             }
1929              
1930             =item calc_pe()
1931              
1932             Calculates the Pendant Edge value for each terminal.
1933              
1934             Type : Calculation
1935             Title : calc_pe
1936             Usage : my $es = $tree->calc_pe();
1937             Function: Returns the Pendant Edge value for each terminal
1938             Returns : HASHREF
1939             Args : NONE
1940              
1941             =cut
1942              
1943             # code due to Aki Mimoto
1944             sub calc_pe {
1945 0     0 1 0 my $self = shift;
1946 0 0       0 my $terminals = $self->get_terminals or return {};
1947             my $pe =
1948 0         0 { map { $_->get_name => $_->get_branch_length } @{$terminals} };
  0         0  
  0         0  
1949 0         0 return $pe;
1950             }
1951              
1952             =item calc_pe_mean()
1953              
1954             Calculates the mean Pendant Edge value over all terminals
1955              
1956             Type : Calculation
1957             Title : calc_pe_mean
1958             Usage : my $es = $tree->calc_pe_mean();
1959             Function: Returns the mean Pendant Edge value over all terminals
1960             Returns : FLOAT
1961             Args : NONE
1962              
1963             =cut
1964              
1965             sub calc_pe_mean {
1966 0     0 1 0 my $self = shift;
1967 0         0 my $pe = $self->calc_pe;
1968 0         0 my @pe = values %{ $pe };
  0         0  
1969 0         0 return sum(@pe)/scalar(@pe);
1970             }
1971              
1972             =item calc_shapley()
1973              
1974             Calculates the Shapley value for each terminal.
1975              
1976             Type : Calculation
1977             Title : calc_shapley
1978             Usage : my $es = $tree->calc_shapley();
1979             Function: Returns the Shapley value for each terminal
1980             Returns : HASHREF
1981             Args : NONE
1982              
1983             =cut
1984              
1985             # code due to Aki Mimoto
1986             sub calc_shapley {
1987 0     0 1 0 my $self = shift;
1988              
1989             # First find out how many tips are at the ends of each edge.
1990 0 0       0 my $terminals = $self->get_terminals or return; # nothing to see!
1991 0         0 my $edge_lookup = {};
1992 0         0 my $index = $terminals->[0];
1993              
1994             # Iterate through the edges and find out which side each terminal reside
1995 0         0 _calc_shapley_traverse( $index, undef, $edge_lookup, 'root' );
1996              
1997             # At this point, it's possible to create the calculation matrix
1998 0         0 my $n = @$terminals;
1999 0         0 my @m;
2000 0         0 my $edges = [ keys %$edge_lookup ];
2001 0         0 for my $e ( 0 .. $#$edges ) {
2002 0         0 my $edge = $edges->[$e];
2003             my $el =
2004 0         0 $edge_lookup->{$edge}; # Lookup for terminals on one edge side
2005             my $v =
2006 0         0 keys %{ $el
2007 0         0 ->{terminals} }; # Number of elements on one side of the edge
2008 0         0 for my $l ( 0 .. $#$terminals ) {
2009 0         0 my $terminal = $terminals->[$l];
2010 0         0 my $name = $terminal->get_name;
2011 0 0       0 if ( $el->{terminals}{$name} ) {
2012 0         0 $m[$l][$e] = ( $n - $v ) / ( $n * $v );
2013             }
2014             else {
2015 0         0 $m[$l][$e] = $v / ( $n * ( $n - $v ) );
2016             }
2017             }
2018             }
2019              
2020             # Now we can calculate through the matrix
2021 0         0 my $shapley = {};
2022 0         0 for my $l ( 0 .. $#$terminals ) {
2023 0         0 my $terminal = $terminals->[$l];
2024 0         0 my $name = $terminal->get_name;
2025 0         0 for my $e ( 0 .. $#$edges ) {
2026 0         0 my $edge = $edge_lookup->{ $edges->[$e] };
2027 0         0 $shapley->{$name} += $edge->{branch_length} * $m[$l][$e];
2028             }
2029             }
2030 0         0 return $shapley;
2031             }
2032              
2033             sub _calc_shapley_traverse {
2034              
2035             # This does a depth first traversal to assign the terminals
2036             # to the outgoing side of each branch.
2037 0     0   0 my ( $index, $previous, $edge_lookup, $direction ) = @_;
2038 0 0       0 return unless $index;
2039 0   0     0 $previous ||= '';
2040              
2041             # Is this element a root?
2042 0         0 my $is_root = !$index->get_parent;
2043              
2044             # Now assemble all the terminal datapoints and use the soft reference
2045             # to keep track of which end the terminals are attached
2046 0         0 my @core_terminals;
2047 0 0 0     0 if ( $previous and $index->is_terminal ) {
2048 0         0 push @core_terminals, $index->get_name;
2049             }
2050 0   0     0 my $parent = $index->get_parent || '';
2051 0         0 my @child_terminals;
2052 0   0     0 my $child_nodes = $index->get_children || [];
2053 0         0 for my $child (@$child_nodes) {
2054 0 0       0 next unless $child ne $previous;
2055 0         0 push @child_terminals,
2056             _calc_shapley_traverse( $child, $index, $edge_lookup, 'tip' );
2057             }
2058 0         0 my @parent_terminals;
2059 0 0       0 if ( $parent ne $previous ) {
2060 0         0 push @parent_terminals,
2061             _calc_shapley_traverse( $parent, $index, $edge_lookup, 'root' );
2062             }
2063              
2064             # We're going to toss the root node and we need to merge the root's child branches
2065 0 0       0 unless ($is_root) {
2066             $edge_lookup->{$index} = {
2067             branch_length => $index->get_branch_length,
2068             terminals => {
2069 0 0       0 map { $_ => 1 } @core_terminals,
  0         0  
2070             $direction eq 'root' ? @parent_terminals : @child_terminals
2071             }
2072             };
2073             }
2074 0         0 return ( @core_terminals, @child_terminals, @parent_terminals );
2075             }
2076            
2077             =item calc_shapley_mean()
2078              
2079             Calculates the mean Shapley value over all terminals
2080              
2081             Type : Calculation
2082             Title : calc_shapley_mean
2083             Usage : my $es = $tree->calc_shapley_mean();
2084             Function: Returns the mean Shapley value over all terminals
2085             Returns : HASHREF
2086             Args : NONE
2087              
2088             =cut
2089              
2090             sub calc_shapley_mean {
2091 0     0 1 0 my $self = shift;
2092 0         0 my $sv = $self->calc_shapley;
2093 0         0 my @sv = values %{ $sv };
  0         0  
2094 0         0 return sum(@sv)/scalar(@sv);
2095             }
2096              
2097             =back
2098              
2099             =head2 VISITOR METHODS
2100              
2101             The following methods are a - not entirely true-to-form - implementation of the Visitor
2102             design pattern: the nodes in a tree are visited, and rather than having an object
2103             operate on them, a set of code references is used. This can be used, for example, to
2104             serialize a tree to a string format. To create a newick string without branch lengths
2105             you would use something like this (there is a more powerful 'to_newick' method, so this
2106             is just an example):
2107              
2108             $tree->visit_depth_first(
2109             '-pre_daughter' => sub { print '(' },
2110             '-post_daughter' => sub { print ')' },
2111             '-in' => sub { print shift->get_name },
2112             '-pre_sister' => sub { print ',' },
2113             );
2114             print ';';
2115              
2116             =over
2117              
2118             =item visit_depth_first()
2119              
2120             Visits nodes depth first
2121              
2122             Type : Visitor method
2123             Title : visit_depth_first
2124             Usage : $tree->visit_depth_first( -pre => sub{ ... }, -post => sub { ... } );
2125             Function: Visits nodes in a depth first traversal, executes subs
2126             Returns : $tree
2127             Args : Optional handlers in the order in which they would be executed on an internal node:
2128            
2129             # first event handler, is executed when node is reached in recursion
2130             -pre => sub { print "pre: ", shift->get_name, "\n" },
2131              
2132             # is executed if node has a daughter, but before that daughter is processed
2133             -pre_daughter => sub { print "pre_daughter: ", shift->get_name, "\n" },
2134            
2135             # is executed if node has a daughter, after daughter has been processed
2136             -post_daughter => sub { print "post_daughter: ", shift->get_name, "\n" },
2137              
2138             # is executed whether or not node has sisters, if it does have sisters
2139             # they're processed first
2140             -in => sub { print "in: ", shift->get_name, "\n" },
2141            
2142             # is executed if node has a sister, before sister is processed
2143             -pre_sister => sub { print "pre_sister: ", shift->get_name, "\n" },
2144            
2145             # is executed if node has a sister, after sister is processed
2146             -post_sister => sub { print "post_sister: ", shift->get_name, "\n" },
2147            
2148             # is executed last
2149             -post => sub { print "post: ", shift->get_name, "\n" },
2150            
2151             # specifies traversal order, default 'ltr' means first_daugher -> next_sister
2152             # traversal, alternate value 'rtl' means last_daughter -> previous_sister traversal
2153             -order => 'ltr', # ltr = left-to-right, 'rtl' = right-to-left
2154             Comments:
2155              
2156             =cut
2157              
2158             sub visit_depth_first {
2159 141     141 1 275 my $self = shift;
2160 141 50       474 if ( my $root = $self->get_root ) {
2161 141         741 $root->visit_depth_first(looks_like_hash @_);
2162             }
2163 140         358 return $self;
2164             }
2165              
2166             =item visit_breadth_first()
2167              
2168             Visits nodes breadth first
2169              
2170             Type : Visitor method
2171             Title : visit_breadth_first
2172             Usage : $tree->visit_breadth_first( -pre => sub{ ... }, -post => sub { ... } );
2173             Function: Visits nodes in a breadth first traversal, executes handlers
2174             Returns : $tree
2175             Args : Optional handlers in the order in which they would be executed on an internal node:
2176            
2177             # first event handler, is executed when node is reached in recursion
2178             -pre => sub { print "pre: ", shift->get_name, "\n" },
2179            
2180             # is executed if node has a sister, before sister is processed
2181             -pre_sister => sub { print "pre_sister: ", shift->get_name, "\n" },
2182            
2183             # is executed if node has a sister, after sister is processed
2184             -post_sister => sub { print "post_sister: ", shift->get_name, "\n" },
2185            
2186             # is executed whether or not node has sisters, if it does have sisters
2187             # they're processed first
2188             -in => sub { print "in: ", shift->get_name, "\n" },
2189            
2190             # is executed if node has a daughter, but before that daughter is processed
2191             -pre_daughter => sub { print "pre_daughter: ", shift->get_name, "\n" },
2192            
2193             # is executed if node has a daughter, after daughter has been processed
2194             -post_daughter => sub { print "post_daughter: ", shift->get_name, "\n" },
2195            
2196             # is executed last
2197             -post => sub { print "post: ", shift->get_name, "\n" },
2198            
2199             # specifies traversal order, default 'ltr' means first_daugher -> next_sister
2200             # traversal, alternate value 'rtl' means last_daughter -> previous_sister traversal
2201             -order => 'ltr', # ltr = left-to-right, 'rtl' = right-to-left
2202             Comments:
2203              
2204             =cut
2205              
2206             sub visit_breadth_first {
2207 0     0 1 0 my $self = shift;
2208 0         0 my %args = looks_like_hash @_;
2209 0         0 $self->get_root->visit_breadth_first(%args);
2210 0         0 return $self;
2211             }
2212              
2213             =item visit_level_order()
2214              
2215             Visits nodes in a level order traversal.
2216              
2217             Type : Visitor method
2218             Title : visit_level_order
2219             Usage : $tree->visit_level_order( sub{...} );
2220             Function: Visits nodes in a level order traversal, executes sub
2221             Returns : $tree
2222             Args : A subroutine reference that operates on visited nodes.
2223             Comments:
2224              
2225             =cut
2226              
2227             sub visit_level_order {
2228 3     3 1 9 my ( $tree, $sub ) = @_;
2229 3 50       18 if ( my $root = $tree->get_root ) {
2230 3         26 $root->visit_level_order($sub);
2231             }
2232             else {
2233 0         0 throw 'BadArgs' => 'Tree has no root';
2234             }
2235 3         11 return $tree;
2236             }
2237              
2238             =back
2239              
2240             =head2 TREE MANIPULATION
2241              
2242             =over
2243              
2244             =item chronompl()
2245              
2246             Modifies branch lengths using the mean path lengths method of
2247             Britton et al. (2002). For more about this method, see:
2248             L<http://dx.doi.org/10.1016/S1055-7903(02)00268-3>
2249              
2250             Type : Tree manipulator
2251             Title : chronompl
2252             Usage : $tree->chronompl;
2253             Function: Makes tree ultrametric using MPL method
2254             Returns : The modified, now ultrametric invocant.
2255             Args : NONE
2256             Comments:
2257              
2258             =cut
2259              
2260             sub chronompl {
2261 1     1 1 3 my $self = shift;
2262             $self->visit_depth_first(
2263             '-post' => sub {
2264 7     7   9 my $node = shift;
2265 7         9 my %paths;
2266 7         16 my $children = $node->get_children;
2267 7         11 for my $child ( @{$children} ) {
  7         16  
2268 6         14 my $cp = $child->get_generic('paths');
2269 6         15 my $bl = $child->get_branch_length;
2270 6         10 for my $id ( keys %{$cp} ) {
  6         12  
2271 8         19 $paths{$id} = $cp->{$id} + $bl;
2272             }
2273             }
2274 7 100       10 if ( not scalar @{$children} ) {
  7         16  
2275 4         12 $paths{ $node->get_id } = 0;
2276             }
2277 7         24 $node->set_generic( 'paths' => \%paths );
2278 7         12 my $total = 0;
2279 7         43 $total += $_ for values %paths;
2280 7         17 my $mean = $total / scalar keys %paths;
2281 7         12 $node->set_generic( 'age' => $mean );
2282             }
2283 1         12 );
2284 1         16 return $self->agetobl;
2285             }
2286              
2287             =item grafenbl()
2288              
2289             Computes and assigns branch lengths using Grafen's method, which makes
2290             node ages proportional to clade size. For more about this method, see:
2291             L<http://dx.doi.org/10.1098/rstb.1989.0106>
2292              
2293             Type : Tree manipulator
2294             Title : grafenbl
2295             Usage : $tree->grafenbl;
2296             Function: Assigns branch lengths using Grafen's method
2297             Returns : The modified, now ultrametric invocant.
2298             Args : Optional, a power ('rho') to which all node ages are raised
2299             Comments:
2300              
2301             =cut
2302              
2303             sub grafenbl {
2304 0     0 1 0 my ( $self, $rho ) = @_;
2305 0         0 my $total = 0;
2306             $self->visit_depth_first(
2307             '-post' => sub {
2308 0     0   0 my $node = shift;
2309 0 0       0 if ( $node->is_terminal ) {
2310 0         0 $node->set_generic( 'adjntips' => 0 );
2311 0         0 $node->set_generic( 'ntips' => 1 );
2312             }
2313             else {
2314 0         0 my $children = $node->get_children;
2315 0         0 my $ntips = 0;
2316 0         0 for my $child ( @{$children} ) {
  0         0  
2317 0         0 $ntips += $child->get_generic('ntips');
2318             }
2319 0         0 $node->set_generic( 'ntips' => $ntips );
2320 0         0 $node->set_generic( 'adjntips' => $ntips - 1 );
2321 0 0       0 $total = $ntips if $node->is_root;
2322             }
2323             }
2324 0         0 );
2325             $self->visit(
2326             sub {
2327 0     0   0 my $node = shift;
2328 0 0       0 if ($total) {
2329 0         0 my $age = $node->get_generic('adjntips') / $total;
2330 0 0       0 if ($rho) {
2331 0         0 $age = $age**$rho;
2332             }
2333 0         0 $node->set_generic( 'age' => $age );
2334             }
2335             }
2336 0         0 );
2337 0         0 return $self->agetobl;
2338             }
2339              
2340             =item agetobl()
2341              
2342             Converts node ages to branch lengths
2343              
2344             Type : Tree manipulator
2345             Title : agetobl
2346             Usage : $tree->agetobl;
2347             Function: Converts node ages to branch lengths
2348             Returns : The modified invocant.
2349             Args : NONE
2350             Comments: This method uses ages as assigned to the generic 'age' slot
2351             on the nodes in the trees. I.e. for each node in the tree,
2352             $node->get_generic('age') must return a number
2353              
2354             =cut
2355              
2356             sub agetobl {
2357 2     2 1 8 my $self = shift;
2358 2         4 for my $node ( @{ $self->get_entities } ) {
  2         6  
2359 46 100       92 if ( my $parent = $node->get_parent ) {
2360 44   100     90 my $mp = $node->get_generic('age') || 0;
2361 44         86 my $pmp = $parent->get_generic('age');
2362 44         99 $node->set_branch_length( $pmp - $mp );
2363             }
2364             else {
2365 2         14 $node->set_branch_length(0);
2366             }
2367             }
2368 2         8 return $self;
2369             }
2370              
2371             =item rankprobbl()
2372              
2373             Generates branch lengths by calculating the rank probabilities for each node and applying
2374             the expected waiting times under a pure birth process to these ranks. Uses Stadler's
2375             RANKPROB algorithm as described in:
2376              
2377             B<Gernhard, T.> et al., 2006. Estimating the relative order of speciation
2378             or coalescence events on a given phylogeny. I<Evolutionary Bioinformatics Online>.
2379             B<2>:285. L<http://www.ncbi.nlm.nih.gov/pmc/articles/PMC2674681/>.
2380              
2381             Type : Tree manipulator
2382             Title : rankprobbl
2383             Usage : $tree->rankprobbl;
2384             Function: Generates pure birth branch lengths
2385             Returns : The modified invocant.
2386             Args : NONE
2387             Comments: Tree must be fully bifurcating
2388              
2389             =cut
2390              
2391             sub rankprobbl {
2392 0     0 1 0 my $self = shift;
2393 0         0 my $root = $self->get_root;
2394 0         0 my $intervals = $root->calc_terminals;
2395 0         0 my @times;
2396 0         0 for my $i ( 1 .. $intervals ) {
2397 0   0     0 my $previous = $times[-1] || 0;
2398 0         0 push @times, $previous + ( 1 / $i );
2399             }
2400 0         0 my $total = $times[-1];
2401 0         0 for my $node ( @{ $self->get_internals } ) {
  0         0  
2402 0         0 my $rankprobs = $root->calc_rankprob($node);
2403 0         0 my @weighted_waiting_times;
2404 0         0 for my $i ( 1 .. $#{ $rankprobs } ) {
  0         0  
2405 0         0 push @weighted_waiting_times, $rankprobs->[$i] * $times[$i - 1];
2406             }
2407 0         0 my $age = $total - sum(@weighted_waiting_times);
2408 0         0 $node->set_generic( 'age' => $age );
2409             }
2410 0         0 $self->agetobl;
2411 0         0 $root->set_branch_length(1);
2412 0         0 return $self;
2413             }
2414              
2415             =item ultrametricize()
2416              
2417             Sets all root-to-tip path lengths equal.
2418              
2419             Type : Tree manipulator
2420             Title : ultrametricize
2421             Usage : $tree->ultrametricize;
2422             Function: Sets all root-to-tip path
2423             lengths equal by stretching
2424             all terminal branches to the
2425             height of the tallest node.
2426             Returns : The modified invocant.
2427             Args : NONE
2428             Comments: This method is analogous to
2429             the 'ultrametricize' command
2430             in Mesquite, i.e. no rate smoothing
2431             or anything like that happens, just
2432             a lengthening of terminal branches.
2433              
2434             =cut
2435              
2436             sub ultrametricize {
2437 1     1 1 2 my $tree = shift;
2438 1         2 my $tallest = 0;
2439 1         3 foreach ( @{ $tree->get_terminals } ) {
  1         3  
2440 9         16 my $path_to_root = $_->calc_path_to_root;
2441 9 100       19 if ( $path_to_root > $tallest ) {
2442 5         8 $tallest = $path_to_root;
2443             }
2444             }
2445 1         3 foreach ( @{ $tree->get_terminals } ) {
  1         5  
2446 9         23 my $newbl =
2447             $_->get_branch_length + ( $tallest - $_->calc_path_to_root );
2448 9         23 $_->set_branch_length($newbl);
2449             }
2450 1         13 return $tree;
2451             }
2452              
2453             =item scale()
2454              
2455             Scales the tree to the specified height.
2456              
2457             Type : Tree manipulator
2458             Title : scale
2459             Usage : $tree->scale($height);
2460             Function: Scales the tree to the
2461             specified height.
2462             Returns : The modified invocant.
2463             Args : $height = a numerical value
2464             indicating root-to-tip path length.
2465             Comments: This method uses the
2466             $tree->calc_tree_height method, and
2467             so for additive trees the *average*
2468             root-to-tip path length is scaled to
2469             $height (i.e. some nodes might be
2470             taller than $height, others shorter).
2471              
2472             =cut
2473              
2474             sub scale {
2475 1     1 1 5 my ( $tree, $target_height ) = @_;
2476 1         8 my $current_height = $tree->calc_tree_height;
2477 1         6 my $scaling_factor = $target_height / $current_height;
2478 1         3 foreach ( @{ $tree->get_entities } ) {
  1         7  
2479 17         45 my $bl = $_->get_branch_length;
2480 17 100       40 if ($bl) {
2481 16         32 my $new_branch_length = $bl * $scaling_factor;
2482 16         39 $_->set_branch_length($new_branch_length);
2483             }
2484             }
2485 1         9 return $tree;
2486             }
2487              
2488             =item resolve()
2489              
2490             Randomly breaks polytomies.
2491              
2492             Type : Tree manipulator
2493             Title : resolve
2494             Usage : $tree->resolve;
2495             Function: Randomly breaks polytomies by inserting
2496             additional internal nodes.
2497             Returns : The modified invocant.
2498             Args : Optionally, when passed a true value (e.g. '1'), the newly created nodes
2499             will be unnamed, otherwise they will be named 'r1', 'r2', 'r3' and so on.
2500             Comments:
2501              
2502             =cut
2503              
2504             sub resolve {
2505 2     2 1 6 my ( $tree, $anonymous ) = @_;
2506 2         4 for my $node ( @{ $tree->get_internals } ) {
  2         13  
2507 14         17 my @children = @{ $node->get_children };
  14         24  
2508 14 100       32 if ( scalar @children > 2 ) {
2509 1         2 my $i = 1;
2510 1         3 while ( scalar @children > 2 ) {
2511 2         5 my %args = ( '-branch_length' => 0.00 );
2512 2 50       12 $args{'-name'} = 'r' . $i++ unless $anonymous;
2513 2         15 my $newnode = $fac->create_node(%args);
2514 2         8 $tree->insert($newnode);
2515 2         8 $newnode->set_parent($node);
2516 2         6 for ( 1 .. 2 ) {
2517 4         40 my $i = int( rand( scalar @children ) );
2518 4         14 $children[$i]->set_parent($newnode);
2519 4         9 splice @children, $i, 1;
2520             }
2521 2         14 push @children, $newnode;
2522             }
2523             }
2524             }
2525 2         5 return $tree;
2526             }
2527            
2528             =item replicate()
2529              
2530             Simulates tree(s) whose properties resemble that of the input tree in terms of birth/death
2531             rate, depth, and size/depth distribution of genera. This uses the R environment for
2532             statistics to get a maximum likelihood estimate of birth/death rates on the source tree
2533             and therefore requires the package L<Statistics::R> to be installed, and the R package
2534             'ape'. The idea is that this is used on a species tree that is ultrametric. To get
2535             simulated genera whose sizes and root depths approximate those of the source tree,
2536             annotate genus nodes in the source tree, e.g. using $tree->generize, and provide the
2537             optional -genera flag of replicate() with a true value.
2538              
2539             This method uses the function C<birthdeath> from the R package C<ape>. If you use this
2540             method in a publication, you should therefore B<cite that package> (in addition to
2541             Bio::Phylo). More information about C<ape> can be found at L<http://ape-package.ird.fr/>.
2542              
2543             Type : Tree manipulator
2544             Title : replicate
2545             Usage : my $forest = $tree->replicate;
2546             Function: Simulates tree(s) whose properties resemble that of the invocant tree
2547             Returns : Bio::Phylo::Forest
2548             Args : Optional: -trees => number of replicates, default is 1
2549             Optional: -rootedge => keep the birth/death root branch, then scale the tree(s)
2550             Optional: -genera => approximate distribution of source genus sizes and depths
2551             (do this by tagging internal nodes: $node->set_rank('genus'))
2552             Optional: -seed => a random integer seed for generating the birth/death tree
2553             Comments: Requires Statistics::R, and an R environment with 'ape' installed
2554             Expects to operate on an ultrametric tree
2555              
2556             =cut
2557            
2558             sub replicate {
2559 0     0 1 0 my ( $self, %args ) = @_;
2560 0 0 0     0 if ( looks_like_class('Statistics::R') and looks_like_class('Bio::Phylo::Generator') ) {
2561              
2562             # get birthdeath parameters
2563 0         0 $logger->info("going to estimate b/d");
2564 0         0 my $newick = $self->to_newick;
2565 0         0 my $R = Statistics::R->new;
2566 0 0       0 if ( my $seed = $args{'-seed'} ) {
2567 0         0 $R->run(qq[set.seed($seed)]);
2568             }
2569 0         0 $R->run(q[library("ape")]);
2570 0         0 $R->run(qq[phylo <- read.tree(text="$newick")]);
2571 0         0 $R->run(q[bd <- birthdeath(phylo)]);
2572 0         0 $R->run(q[ratio <- as.double(bd$para[1])]);
2573 0         0 my $b_over_d = $R->get(q[ratio]);
2574 0         0 $logger->info("b/d=$b_over_d");
2575            
2576             # generate the tree
2577 0         0 $logger->info("going to simulate tree(s)");
2578 0         0 my $gen = Bio::Phylo::Generator->new;
2579             my $forest = $gen->gen_rand_birth_death(
2580             '-trees' => $args{'-trees'} || 1,
2581             '-killrate' => $b_over_d,
2582 0   0     0 '-tips' => scalar(@{ $self->get_terminals }),
  0         0  
2583             );
2584            
2585             # invent tip labels
2586             $forest->visit(sub{
2587 0     0   0 my $t = shift;
2588 0         0 my $n = 0;
2589 0         0 for my $tip ( @{ $t->get_terminals } ) {
  0         0  
2590 0         0 my $genus = $self->_make_taxon_name;
2591 0         0 my $species = $self->_make_taxon_name;
2592 0 0       0 if ( $genus =~ /(..)$/ ) {
2593 0         0 my $suffix = $1;
2594 0         0 $species =~ s/..$/$suffix/;
2595 0         0 $tip->set_name( ucfirst($genus) . '_' . $species );
2596             }
2597             }
2598 0         0 });
2599            
2600             # scale the trees
2601 0         0 my $height = $self->calc_tree_height;
2602 0 0   0   0 $forest->visit(sub{shift->get_root->set_branch_length(0)}) if not $args{'-rootedge'};
  0         0  
2603 0     0   0 $forest->visit(sub{shift->scale($height)});
  0         0  
2604 0         0 $logger->info("tree height is $height");
2605            
2606             # create similar genera, optionally
2607 0 0       0 if ( $args{'-genera'} ) {
2608 0         0 $logger->info("going to approximate genera");
2609            
2610             # iterate over trees
2611 0         0 for my $replicate ( @{ $forest->get_entities } ) {
  0         0  
2612            
2613             # get distribution of source genus sizes and depths
2614 0         0 $logger->info("calculating source genus sizes and depths");
2615 0         0 my ( $counter, %genera ) = ( 0 );
2616             $self->visit(sub{
2617 0     0   0 my $node = shift;
2618 0         0 my $rank = $node->get_rank;
2619 0 0       0 if ( $rank ) {
2620 0 0       0 if ( $rank eq 'genus' ) {
2621 0         0 my $id = $node->get_id;
2622 0         0 my $height = $height - $node->calc_path_to_root;
2623 0         0 my $size = scalar(@{ $node->get_terminals });
  0         0  
2624 0   0     0 my $name = $node->get_name || 'Genus' . ++$counter;
2625 0         0 $genera{$id} = {
2626             'name' => $name,
2627             'size' => $size,
2628             'height' => $height,
2629             'node' => $node,
2630             };
2631             }
2632             }
2633 0         0 });
2634            
2635             # get distribution of target node sizes and depths
2636 0         0 $logger->info("calculating target genus sizes and depths");
2637 0         0 my ( %node );
2638             $replicate->visit(sub{
2639 0     0   0 my $node = shift;
2640 0         0 my $id = $node->get_id;
2641 0         0 my $height = $height - $node->calc_path_to_root;
2642 0         0 my $size = scalar(@{ $node->get_terminals });
  0         0  
2643 0         0 push @{ $node{$size} }, [ $node, $height, $id ];
  0         0  
2644 0         0 });
2645            
2646             # keep track of which members from the genera have already been assigned
2647 0         0 my %seen_labels;
2648            
2649             # start assigning genera, from big to small
2650 0         0 for my $genus ( sort { $genera{$b}->{'size'} <=> $genera{$a}->{'size'} } keys %genera ) {
  0         0  
2651             # get key for candidate set of nodes
2652 0   0     0 my $name = $genera{$genus}->{'name'} || "Genus${genus}";
2653 0         0 my $size = $genera{$genus}->{'size'};
2654 0         0 my @labels = shuffle map { $_->get_name } @{ $genera{$genus}->{'node'}->get_terminals };
  0         0  
  0         0  
2655            
2656             # avoid assigning labels more than once when genera are nested
2657 0         0 @labels = grep { ! $seen_labels{$_} } @labels;
  0         0  
2658 0         0 $seen_labels{$_}++ for @labels;
2659              
2660 0         0 $logger->info("processing $name ($size tips)");
2661 0 0       0 SIZE: while( not $node{$size} ) { last SIZE if --$size <= 1 }
  0         0  
2662            
2663             # get target height
2664 0 0       0 if ( $node{$size} ) {
2665 0         0 $logger->info("found candidate(s) with $size tips");
2666 0         0 my $h = $genera{$genus}->{'height'};
2667 0         0 my ($node) = map { $_->[0] }
2668 0         0 sort { abs($a->[1]-$h) <=> abs($b->[1]-$h) }
2669 0         0 @{ $node{$size} };
  0         0  
2670            
2671             # assign genus label to node and tips, remove all descendants
2672             # (and self!) from list of candidates, as we can't nest genera
2673 0         0 my $sp = 0;
2674 0         0 $node->set_name($name);
2675            
2676 0         0 for my $n ( $node, @{ $node->get_descendants } ) {
  0         0  
2677 0 0       0 $n->set_name($labels[$sp++]) if $n->is_terminal;
2678 0         0 for my $i ( 1 .. $size ) {
2679 0 0       0 if ( my $array = $node{$i} ) {
2680 0         0 my $id = $n->get_id;
2681 0         0 my @filtered = grep { $id != $_->[2] } @$array;
  0         0  
2682 0 0       0 @filtered ? $node{$i} = \@filtered : delete $node{$i};
2683             }
2684             }
2685             }
2686             }
2687             else {
2688 0         0 $logger->warn("exhausted candidate genera for $genus");
2689             }
2690             }
2691             }
2692             }
2693 0         0 return $forest;
2694             }
2695             }
2696            
2697             sub _make_taxon_name {
2698 0     0   0 my %l = (
2699             'v' => [ qw(a e i o u) ],
2700             'c' => [ qw(qu cr ct p pr ps rs ld ph gl ch l sc v n m) ],
2701             );
2702 0         0 my @suffixes =qw(us os is as es);
2703 0         0 my $length = 1 + int rand 2;
2704 0 0       0 my @order = int rand 2 ? qw(v c) : qw(c v);
2705 0         0 my ($l1) = shuffle(@{$l{$order[0]}});
  0         0  
2706 0         0 my ($l2) = shuffle(@{$l{$order[1]}});
  0         0  
2707 0         0 my @name = ( $l1, $l2 );
2708 0         0 for my $i ( 0 .. $length ) {
2709 0         0 ($l1) = shuffle(@{$l{$order[0]}});
  0         0  
2710 0         0 ($l2) = shuffle(@{$l{$order[1]}});
  0         0  
2711 0         0 push @name, $l1, $l2;
2712             }
2713 0         0 my $name = join '', @name;
2714 0         0 $name =~ s/[aeiou]+$//;
2715 0         0 my ($suffix) = shuffle(@suffixes);
2716 0         0 return $name . $suffix;
2717             }
2718              
2719             =item generize()
2720              
2721             Identifies monophyletic genera by traversing the tree, taking the first word of the tip
2722             names and finding the MRCA of each word. That MRCA is tagged as rank 'genus' and assigned
2723             the name.
2724              
2725             Type : Tree manipulator
2726             Title : generize
2727             Usage : $tree->generize(%args);
2728             Function: Identifies monophyletic genera
2729             Returns : Invocant
2730             Args : Optional: -delim => the delimiter that separates the genus name from any
2731             following (sub)specific epithets. Default is a space ' '.
2732             Optional: -monotypic => if true, also tags monotypic genera
2733             Optional: -polypara => if true, also tags poly/paraphyletic genera. Any
2734             putative genera nested within the largest of the
2735             entangled, poly/paraphyletic genera will be ignored.
2736             Comments:
2737              
2738             =cut
2739            
2740             sub generize {
2741 0     0 1 0 my ( $self, %args ) = @_;
2742 0   0     0 my $delim = $args{'-delim'} || ' ';
2743            
2744             # bin by genus name
2745 0         0 my %genera;
2746 0         0 for my $tip ( @{ $self->get_terminals } ) {
  0         0  
2747 0         0 my $binomial = $tip->get_name;
2748 0         0 my ($genus) = split /$delim/, $binomial;
2749 0 0       0 $genera{$genus} = [] if not $genera{$genus};
2750 0         0 push @{ $genera{$genus} }, $tip;
  0         0  
2751             }
2752            
2753             # identify and tag MRCAs
2754 0         0 my %skip;
2755 0         0 for my $genus ( sort { scalar(@{$genera{$b}}) <=> scalar(@{$genera{$a}}) } keys %genera ) {
  0         0  
  0         0  
  0         0  
2756 0 0       0 next if $skip{$genus};
2757 0         0 my $tips = $genera{$genus};
2758            
2759             # genus is monotypic
2760 0 0       0 if ( scalar(@$tips) == 1 ) {
2761 0         0 $logger->info("$genus is monotypic");
2762 0 0       0 $tips->[0]->set_rank('genus') if $args{'-monotypic'};
2763             }
2764             else {
2765            
2766             # get the MRCA
2767 0         0 my ( $mrca, %seen );
2768 0         0 my @paths = map { @{ $_->get_ancestors } } @{ $tips };
  0         0  
  0         0  
  0         0  
2769 0         0 $seen{$_->get_id}++ for @paths;
2770 0         0 ($mrca) = map { $_->[0] }
2771 0         0 sort { $b->[1] <=> $a->[1] }
2772 0         0 map { [ $_, $_->calc_path_to_root ] }
2773 0         0 grep { $seen{$_->get_id} == @$tips } @paths;
  0         0  
2774            
2775             # identify mono/poly/para
2776 0         0 my $clade_size = @{ $mrca->get_terminals };
  0         0  
2777 0         0 my $tip_count = @{ $tips };
  0         0  
2778 0 0       0 if ( $clade_size == $tip_count ) {
2779 0         0 $logger->info("$genus is monophyletic");
2780 0         0 $mrca->set_rank('genus');
2781 0         0 $mrca->set_name($genus);
2782             }
2783             else {
2784            
2785             # we could now have nested, smaller genera inside this one
2786 0         0 $logger->info("$genus is non-monophyletic $clade_size != $tip_count");
2787 0 0       0 if ( $args{'-polypara'} ) {
2788 0         0 $logger->info("tagging non-monophyletic $genus anyway");
2789 0         0 $mrca->set_rank('genus');
2790 0         0 $mrca->set_name($genus);
2791 0         0 my @names = map { $_->get_name } @{ $mrca->get_terminals };
  0         0  
  0         0  
2792 0         0 for my $name ( @names ) {
2793 0         0 $name =~ s/^(.+?)${delim}.+$/$1/;
2794 0         0 $skip{$name}++;
2795             }
2796             }
2797             }
2798             }
2799             }
2800 0         0 return $self;
2801             }
2802              
2803             =item prune_tips()
2804              
2805             Prunes argument nodes from invocant.
2806              
2807             Type : Tree manipulator
2808             Title : prune_tips
2809             Usage : $tree->prune_tips(\@taxa);
2810             Function: Prunes specified taxa from invocant.
2811             Returns : A pruned Bio::Phylo::Forest::Tree object.
2812             Args : A reference to an array of taxon names, or a taxa block, or a
2813             reference to an array of taxon objects, or a reference to an
2814             array of node objects
2815             Comments:
2816              
2817             =cut
2818              
2819             sub prune_tips {
2820 10     10 1 1531 my ( $self, $tips ) = @_;
2821 10         26 my %prune = map { $_->get_id => 1 } @{ $self->_get_tip_objects($tips) };
  6         17  
  10         55  
2822 10         24 my @keep;
2823 10         22 for my $tip ( @{ $self->get_terminals } ) {
  10         59  
2824 120 100       208 if ( not $prune{$tip->get_id} ) {
2825 114         195 push @keep, $tip;
2826             }
2827             }
2828 10         62 return $self->keep_tips(\@keep);
2829             }
2830              
2831             =item keep_tips()
2832              
2833             Keeps argument nodes from invocant (i.e. prunes all others).
2834              
2835             Type : Tree manipulator
2836             Title : keep_tips
2837             Usage : $tree->keep_tips(\@taxa);
2838             Function: Keeps specified taxa from invocant.
2839             Returns : The pruned Bio::Phylo::Forest::Tree object.
2840             Args : Same as prune_tips, but with inverted meaning
2841             Comments:
2842              
2843             =cut
2844              
2845             sub _get_tip_objects {
2846 24     24   51 my ( $self, $arg ) = @_;
2847 24         42 my @tips;
2848              
2849             # argument is a taxa block
2850 24 50       98 if ( blessed $arg ) {
2851 0         0 for my $taxon ( @{ $arg->get_entities } ) {
  0         0  
2852 0         0 my @nodes = @{ $taxon->get_nodes };
  0         0  
2853 0         0 for my $node ( @nodes ) {
2854 0 0       0 push @tips, $node if $self->contains($node);
2855             }
2856             }
2857             }
2858              
2859             # arg is an array ref
2860             else {
2861 24         47 my $TAXON = _TAXON_;
2862 24         45 my $NODE = _NODE_;
2863 24         37 for my $thing ( @{ $arg } ) {
  24         51  
2864              
2865             # thing is a taxon or node object
2866 137 100       336 if ( blessed $thing ) {
2867 121 50       270 if ( $thing->_type == $TAXON ) {
    50          
2868 0         0 my @nodes = @{ $thing->get_nodes };
  0         0  
2869 0         0 for my $node ( @nodes ) {
2870 0 0       0 push @tips, $node if $self->contains($node);
2871             }
2872             }
2873             elsif ( $thing->_type == $NODE ) {
2874 121 100       210 push @tips, $thing if $self->contains($thing);
2875             }
2876             }
2877              
2878             # thing is a name
2879             else {
2880 16 50       85 if ( my $tip = $self->get_by_name($thing) ) {
2881 16         39 push @tips, $tip;
2882             }
2883             }
2884             }
2885             }
2886 24         90 return \@tips;
2887             }
2888              
2889             sub keep_tips {
2890 14     14 1 678 my ( $self, $tip_names ) = @_;
2891              
2892             # get node objects for tips
2893 14         23 my @tips = @{ $self->_get_tip_objects($tip_names) };
  14         44  
2894            
2895             # identify nodes that are somewhere on the path from tip to root
2896 14         32 my %seen;
2897 14         32 for my $tip ( @tips ) {
2898 121         156 my $node = $tip;
2899 121         195 PARENT: while ( $node ) {
2900 345         559 my $id = $node->get_id;
2901 345 100       571 if ( not exists $seen{$id} ) {
2902 237         365 $seen{$id} = 0;
2903 237         388 $node = $node->get_parent;
2904             }
2905             else {
2906 108         170 last PARENT;
2907             }
2908             }
2909             }
2910              
2911             # now do the pruning
2912             $self->visit_depth_first(
2913             '-post' => sub {
2914             # prune node
2915 262     262   373 my $n = shift;
2916 262         450 my $nid = $n->get_id;
2917 262         462 my $p = $n->get_parent;
2918 262 100       527 if ( not exists $seen{$nid} ) {
2919 25 100       128 $p->delete($n) if $p;
2920 25         361 $self->delete($n);
2921             # record number of children lost by parent
2922 25 100       57 if (defined $p) {
2923 24         48 my $pid = $p->get_id;
2924 24 100       60 if ( exists $seen{$pid} ) {
2925 14         27 $seen{$pid}++;
2926             }
2927             }
2928 25         89 return;
2929             }
2930             # remove nodes who lost children and are now down to a single one
2931 237         273 my @children = @{ $n->get_children };
  237         423  
2932 237 100 100     796 if ( (scalar @children == 1) && ($seen{$nid} > 0) ) {
2933 12         26 my ($c) = @children;
2934 12         38 my $bl = $n->get_branch_length;
2935 12         34 my $cbl = $c->get_branch_length;
2936 12 100 100     78 $c->set_branch_length( $bl + $cbl ) if defined $cbl && defined $bl;
2937 12         50 $self->delete($n);
2938 12         48 $c->set_parent($p);
2939 12 100       57 $p->delete($n) if $p;
2940             }
2941             }
2942 14         173 );
2943 14         216 return $self;
2944             }
2945              
2946             =item negative_to_zero()
2947              
2948             Converts negative branch lengths to zero.
2949              
2950             Type : Tree manipulator
2951             Title : negative_to_zero
2952             Usage : $tree->negative_to_zero;
2953             Function: Converts negative branch
2954             lengths to zero.
2955             Returns : The modified invocant.
2956             Args : NONE
2957             Comments:
2958              
2959             =cut
2960              
2961             sub negative_to_zero {
2962 3     3 1 8 my $tree = shift;
2963 3         6 foreach my $node ( @{ $tree->get_entities } ) {
  3         14  
2964 17         43 my $bl = $node->get_branch_length;
2965 17 50 66     68 if ( $bl && $bl < 0 ) {
2966 0         0 $node->set_branch_length(0);
2967             }
2968             }
2969 3         9 return $tree;
2970             }
2971              
2972             =item ladderize()
2973              
2974             Sorts nodes in ascending (or descending) order of number of children. Tips are
2975             sorted alphabetically (ascending or descending) relative to their siblings.
2976              
2977             Type : Tree manipulator
2978             Title : ladderize
2979             Usage : $tree->ladderize(1);
2980             Function: Sorts nodes
2981             Returns : The modified invocant.
2982             Args : Optional, a true value to reverse the sort order
2983              
2984             =cut
2985              
2986             sub ladderize {
2987 2     2 1 8 my ( $self, $right ) = @_;
2988 2         4 my %child_count;
2989             $self->visit_depth_first(
2990             '-post' => sub {
2991 18     18   38 my $node = shift;
2992            
2993             # record the number of descendants for the focal
2994             # node. because this is a post-order traversal
2995             # we have already counted the children of the
2996             # children, recursively. bin nodes and tips in
2997             # separate containers.
2998 18         47 my $id = $node->get_id;
2999 18         32 my @children = @{ $node->get_children };
  18         44  
3000 18         35 my $count = 1;
3001 18         28 my ( @tips, @nodes );
3002 18         34 for my $child (@children) {
3003 16         34 $count += $child_count{ $child->get_id };
3004 16 100       43 if ( $child->is_terminal ) {
3005 10         26 push @tips, $child;
3006             }
3007             else {
3008 6         17 push @nodes, $child;
3009             }
3010             }
3011 18         37 $child_count{$id} = $count;
3012            
3013             # sort the immediate children. if these are
3014             # tips we will sort alphabetically by name (so
3015             # that cherries are sorted predictably), otherwise
3016             # sort by descendant count
3017 18         28 my @sorted;
3018            
3019 18 50       31 if ($right) {
3020 0         0 @sorted = map { $_->[0] }
3021 0         0 sort { $b->[1] <=> $a->[1] }
3022 0         0 map { [ $_, $child_count{ $_->get_id } ] } @nodes;
  0         0  
3023 0         0 push @sorted, sort { $b->get_name cmp $a->get_name } @tips;
  0         0  
3024             }
3025             else {
3026 6         12 @sorted = map { $_->[0] }
3027 0         0 sort { $a->[1] <=> $b->[1] }
3028 18         43 map { [ $_, $child_count{ $_->get_id } ] } @nodes;
  6         14  
3029 18         43 unshift @sorted, sort { $a->get_name cmp $b->get_name } @tips;
  2         13  
3030             }
3031              
3032             # apply the new sort order
3033 18         61 for my $i ( 0 .. $#sorted ) {
3034 16         40 $node->insert_at_index( $sorted[$i], $i );
3035             }
3036             }
3037 2         22 );
3038 2         43 return $self;
3039             }
3040              
3041             =item sort_tips()
3042              
3043             Sorts nodes in (an approximation of) the provided ordering. Given an array
3044             reference of taxa, an array reference of name strings, or a taxa object, this
3045             method attempts to order the tips in the same way. It does this by recursively
3046             computing the rank for all internal nodes by taking the average rank of its
3047             children. This results in the following orderings:
3048              
3049             (a,b,c,d,e,f); => $tree->sort_tips( [ qw(a c b f d e) ] ) => (a,c,b,f,d,e);
3050            
3051             (a,b,(c,d),e,f); => $tree->sort_tips( [ qw(a b e d c f) ] ); => (a,b,(e,(d,c)),f);
3052            
3053             ((a,b),((c,d),e),f); => $tree->sort_tips( [ qw(a e d c b f) ] ); => ((e,(d,c)),(a,b),f);
3054              
3055             Type : Tree manipulator
3056             Title : sort_tips
3057             Usage : $tree->sort_tips($ordering);
3058             Function: Sorts nodes
3059             Returns : The modified invocant.
3060             Args : Required, an array reference (or taxa object) whose ordering to match
3061              
3062             =cut
3063              
3064             sub sort_tips {
3065 4     4 1 15 my ( $self, $taxa ) = @_;
3066             my @taxa =
3067             UNIVERSAL::can( $taxa, 'get_entities' )
3068 0         0 ? @{ $taxa->get_entities }
3069 4 50       19 : @{$taxa};
  4         17  
3070             my @names =
3071 4 50       13 map { UNIVERSAL::can( $_, 'get_name' ) ? $_->get_name : $_ } @taxa;
  24         103  
3072 4         10 my $i = 1;
3073 4         8 my %rank = map { $_ => $i++ } @names;
  24         53  
3074             $self->visit_depth_first(
3075             '-post' => sub {
3076 34     34   52 my $node = shift;
3077 34         45 my @children = @{ $node->get_children };
  34         91  
3078 34 100       74 if (@children) {
3079 10         18 my @ranks = map { $_->get_generic('rank') } @children;
  30         68  
3080 10         38 my $sum = sum @ranks;
3081 10         26 my $mean = $sum / scalar(@ranks);
3082 10         31 $node->set_generic( 'rank' => $mean );
3083 10         46 $node->clear;
3084             $node->insert(
3085             sort {
3086 10         31 $a->get_generic('rank') <=> $b->get_generic('rank')
  32         79  
3087             } @children
3088             );
3089             }
3090             else {
3091 24         59 $node->set_generic( 'rank' => $rank{ $node->get_name } );
3092             }
3093             }
3094 4         49 );
3095 4         43 return $self->_analyze;
3096             }
3097              
3098             =item exponentiate()
3099              
3100             Raises branch lengths to argument.
3101              
3102             Type : Tree manipulator
3103             Title : exponentiate
3104             Usage : $tree->exponentiate($power);
3105             Function: Raises branch lengths to $power.
3106             Returns : The modified invocant.
3107             Args : A $power in any of perl's number formats.
3108              
3109             =cut
3110              
3111             sub exponentiate {
3112 0     0 1 0 my ( $tree, $power ) = @_;
3113 0 0       0 if ( !looks_like_number $power ) {
3114 0         0 throw 'BadNumber' => "Power \"$power\" is a bad number";
3115             }
3116             else {
3117 0         0 foreach my $node ( @{ $tree->get_entities } ) {
  0         0  
3118 0         0 my $bl = $node->get_branch_length;
3119 0         0 $node->set_branch_length( $bl**$power );
3120             }
3121             }
3122 0         0 return $tree;
3123             }
3124              
3125             =item multiply()
3126              
3127             Multiples branch lengths by argument.
3128              
3129             Type : Tree manipulator
3130             Title : multiply
3131             Usage : $tree->multiply($num);
3132             Function: Multiplies branch lengths by $num.
3133             Returns : The modified invocant.
3134             Args : A $number in any of perl's number formats.
3135              
3136             =cut
3137            
3138             sub multiply {
3139 0     0 1 0 my ( $tree, $num ) = @_;
3140 0 0       0 if ( !looks_like_number $num ) {
3141 0         0 throw 'BadNumber' => "Number '$num' is a bad number";
3142             }
3143             $tree->visit(sub{
3144 0     0   0 my $node = shift;
3145 0         0 my $length = $node->get_branch_length;
3146 0 0       0 if ( $length ) {
3147 0         0 $node->set_branch_length( $length * $num );
3148             }
3149 0         0 });
3150 0         0 return $tree;
3151             }
3152              
3153             =item log_transform()
3154              
3155             Log argument base transform branch lengths.
3156              
3157             Type : Tree manipulator
3158             Title : log_transform
3159             Usage : $tree->log_transform($base);
3160             Function: Log $base transforms branch lengths.
3161             Returns : The modified invocant.
3162             Args : A $base in any of perl's number formats.
3163              
3164             =cut
3165              
3166             sub log_transform {
3167 0     0 1 0 my ( $tree, $base ) = @_;
3168 0 0       0 if ( !looks_like_number $base ) {
3169 0         0 throw 'BadNumber' => "Base \"$base\" is a bad number";
3170             }
3171             else {
3172 0         0 foreach my $node ( @{ $tree->get_entities } ) {
  0         0  
3173 0         0 my $bl = $node->get_branch_length;
3174 0         0 my $newbl;
3175 0         0 eval { $newbl = ( log $bl ) / ( log $base ); };
  0         0  
3176 0 0       0 if ($@) {
3177 0         0 throw 'OutOfBounds' =>
3178             "Invalid input for log transform: $@";
3179             }
3180             else {
3181 0         0 $node->set_branch_length($newbl);
3182             }
3183             }
3184             }
3185 0         0 return $tree;
3186             }
3187              
3188             =item remove_unbranched_internals()
3189              
3190             Collapses internal nodes with fewer than 2 children.
3191              
3192             Type : Tree manipulator
3193             Title : remove_unbranched_internals
3194             Usage : $tree->remove_unbranched_internals;
3195             Function: Collapses internal nodes
3196             with fewer than 2 children.
3197             Returns : The modified invocant.
3198             Args : NONE
3199             Comments:
3200              
3201             =cut
3202              
3203             sub remove_unbranched_internals {
3204 6     6 1 36 my $self = shift;
3205 6         13 my @delete;
3206             $self->visit_depth_first(
3207             '-post' => sub {
3208 1141     1141   1528 my $node = shift;
3209 1141         1270 my @children = @{ $node->get_children };
  1141         1892  
3210            
3211             # the node is interior, now need to check for each child
3212             # if it's interior as well
3213 1141 100       2858 if ( @children ) {
3214            
3215             # special case for the root with unbranched child
3216 558 50 66     1132 if ( $node->is_root and 1 == @children ) {
3217 0         0 my ($child) = @children;
3218 0         0 for my $gchild ( @{ $child->get_children } ) {
  0         0  
3219            
3220             # compute the new branch length for $gchild
3221 0         0 my $clength = $child->get_branch_length;
3222 0         0 my $glength = $gchild->get_branch_length;
3223 0 0       0 my $length = $clength if defined $clength;
3224 0 0       0 $length += $glength if defined $glength;
3225 0 0       0 $gchild->set_branch_length($length) if defined $length;
3226            
3227             # connect grandchild to root
3228 0         0 $gchild->set_parent($node);
3229 0         0 $node->delete($child);
3230            
3231             # will delete these nodes from the tree array
3232             # after the recursion
3233 0         0 push @delete, $child;
3234             }
3235             }
3236             else {
3237            
3238             # iterate over children
3239 558         904 for my $child ( @children ) {
3240 1186         2130 my $child_name = $child->get_name;
3241 1186         1512 my @grandchildren = @{ $child->get_children };
  1186         2012  
3242            
3243             # $child is an unbranched internal, so $grandchildren[0]
3244             # needs to be connected to $node
3245 1186 100       3399 if ( 1 == scalar @grandchildren ) {
3246 1         3 my $gchild = $grandchildren[0];
3247            
3248             # compute the new branch length for $gchild
3249 1         4 my $clength = $child->get_branch_length;
3250 1         3 my $glength = $gchild->get_branch_length;
3251 1 50       4 my $length = $clength if defined $clength;
3252 1 50       4 $length += $glength if defined $glength;
3253 1 50       3 $gchild->set_branch_length($length) if defined $length;
3254            
3255 1         4 $gchild->set_parent($node);
3256 1         3 $node->delete($child);
3257            
3258             # will delete these nodes from the tree array
3259             # after the recursion
3260 1         5 push @delete, $child;
3261             }
3262             }
3263             }
3264             }
3265             }
3266 6         82 );
3267 6         99 $self->delete($_) for @delete;
3268 6         32 return $self;
3269             }
3270              
3271             =item remove_orphans()
3272              
3273             Removes all unconnected nodes.
3274              
3275             Type : Tree manipulator
3276             Title : remove_orphans
3277             Usage : $tree->remove_orphans;
3278             Function: Removes all unconnected nodes
3279             Returns : The modified invocant.
3280             Args : NONE
3281             Comments:
3282              
3283             =cut
3284              
3285             sub remove_orphans {
3286 0     0 1 0 my $self = shift;
3287            
3288             # collect all nodes that are topologically connected
3289 0         0 my %seen;
3290             $self->visit_depth_first(
3291             '-pre' => sub {
3292 0     0   0 $seen{ shift->get_id }++;
3293             }
3294 0         0 );
3295            
3296             # collect all nodes
3297 0         0 my @delete;
3298             $self->visit(sub {
3299 0     0   0 my $node = shift;
3300 0 0       0 push @delete, $node if not $seen{$node->get_id};
3301 0         0 });
3302 0         0 $self->delete($_) for @delete;
3303            
3304             # notify user
3305 0 0       0 if ( scalar @delete ) {
3306 0         0 $logger->warn("deleted ".scalar(@delete)." orphaned nodes");
3307             }
3308            
3309 0         0 return $self;
3310             }
3311              
3312             =item deroot()
3313              
3314             Collapses one of the children of a basal bifurcation
3315              
3316             Type : Tree manipulator
3317             Title : deroot
3318             Usage : $tree->deroot;
3319             Function: Removes root
3320             Returns : The modified invocant.
3321             Args : Optional: node to collapse
3322             Comments:
3323              
3324             =cut
3325              
3326             sub deroot {
3327 0     0 1 0 my ($self,$collapsible) = @_;
3328 0         0 my $root = $self->get_root;
3329 0         0 my @children = @{ $root->get_children };
  0         0  
3330 0 0       0 if ( scalar @children < 3 ) {
3331 0 0       0 if ( not $collapsible) {
3332 0         0 ($collapsible) = grep { $_->is_internal } @children;
  0         0  
3333             }
3334 0         0 $collapsible->collapse;
3335 0         0 return $self;
3336             }
3337             else {
3338 0         0 return $self;
3339             }
3340             }
3341              
3342             =back
3343              
3344             =head2 UTILITY METHODS
3345              
3346             =over
3347              
3348             =item clone()
3349              
3350             Clones invocant.
3351              
3352             Type : Utility method
3353             Title : clone
3354             Usage : my $clone = $object->clone;
3355             Function: Creates a copy of the invocant object.
3356             Returns : A copy of the invocant.
3357             Args : Optional: a hash of code references to
3358             override reflection-based getter/setter copying
3359              
3360             my $clone = $object->clone(
3361             'set_forest' => sub {
3362             my ( $self, $clone ) = @_;
3363             for my $forest ( @{ $self->get_forests } ) {
3364             $clone->set_forest( $forest );
3365             }
3366             },
3367             'set_matrix' => sub {
3368             my ( $self, $clone ) = @_;
3369             for my $matrix ( @{ $self->get_matrices } ) {
3370             $clone->set_matrix( $matrix );
3371             }
3372             );
3373              
3374             Comments: Cloning is currently experimental, use with caution.
3375             It works on the assumption that the output of get_foo
3376             called on the invocant is to be provided as argument
3377             to set_foo on the clone - such as
3378             $clone->set_name( $self->get_name ). Sometimes this
3379             doesn't work, for example where this symmetry doesn't
3380             exist, or where the return value of get_foo isn't valid
3381             input for set_foo. If such a copy fails, a warning is
3382             emitted. To make sure all relevant attributes are copied
3383             into the clone, additional code references can be
3384             provided, as in the example above. Typically, this is
3385             done by overrides of this method in child classes.
3386              
3387             =cut
3388              
3389             sub clone {
3390 2     2 1 13 my $self = shift;
3391 2         10 $logger->info("cloning $self");
3392 2         4 my %subs = @_;
3393              
3394             # override, because we'll handle insert
3395 2     0   10 $subs{'set_root'} = sub { };
3396 2     0   7 $subs{'set_root_node'} = sub { };
3397              
3398             # we'll clone node objects, so no raw copying
3399             $subs{'insert'} = sub {
3400 0     0   0 my ( $self, $clone ) = @_;
3401 0         0 my %clone_of;
3402 0         0 for my $node ( @{ $self->get_entities } ) {
  0         0  
3403 0         0 my $cloned_node = $node->clone;
3404 0         0 $clone_of{ $node->get_id } = $cloned_node;
3405 0         0 $clone->insert($cloned_node);
3406             }
3407 0         0 for my $node ( @{ $self->get_entities } ) {
  0         0  
3408 0         0 my $cloned_node = $clone_of{ $node->get_id };
3409 0 0       0 if ( my $parent = $node->get_parent ) {
3410 0         0 my $cloned_parent_node = $clone_of{ $parent->get_id };
3411 0         0 $cloned_node->set_parent($cloned_parent_node);
3412             }
3413             }
3414 2         10 };
3415 2         11 return $self->SUPER::clone(%subs);
3416             }
3417              
3418             =back
3419              
3420             =head2 SERIALIZERS
3421              
3422             =over
3423              
3424             =item to_nexus()
3425              
3426             Serializes invocant to nexus string.
3427              
3428             Type : Stringifier
3429             Title : to_nexus
3430             Usage : my $string = $tree->to_nexus;
3431             Function: Turns the invocant tree object
3432             into a nexus string
3433             Returns : SCALAR
3434             Args : Any arguments that can be passed to Bio::Phylo::Forest::to_nexus
3435              
3436             =cut
3437              
3438             sub to_nexus {
3439 0     0 1 0 my $self = shift;
3440 0         0 my $forest = $fac->create_forest;
3441 0         0 $forest->insert($self);
3442 0         0 return $forest->to_nexus(@_);
3443             }
3444              
3445             =item to_newick()
3446              
3447             Serializes invocant to newick string.
3448              
3449             Type : Stringifier
3450             Title : to_newick
3451             Usage : my $string = $tree->to_newick;
3452             Function: Turns the invocant tree object
3453             into a newick string
3454             Returns : SCALAR
3455             Args : NONE
3456              
3457             =cut
3458              
3459             sub to_newick {
3460 25     25 1 60 my $self = shift;
3461 25         100 my %args = @_;
3462 25         178 my $newick = unparse( '-format' => 'newick', '-phylo' => $self, %args );
3463 25         325 return $newick;
3464             }
3465              
3466             =item to_xml()
3467              
3468             Serializes invocant to xml.
3469              
3470             Type : Serializer
3471             Title : to_xml
3472             Usage : my $xml = $obj->to_xml;
3473             Function: Turns the invocant object into an XML string.
3474             Returns : SCALAR
3475             Args : NONE
3476              
3477             =cut
3478              
3479             sub to_xml {
3480 0     0 1 0 my $self = shift;
3481 0         0 my $xsi_type = 'nex:IntTree';
3482 0         0 for my $node ( @{ $self->get_entities } ) {
  0         0  
3483 0         0 my $length = $node->get_branch_length;
3484 0 0 0     0 if ( defined $length and $length !~ /^[+-]?\d+$/ ) {
3485 0         0 $xsi_type = 'nex:FloatTree';
3486             }
3487             }
3488 0         0 $self->set_attributes( 'xsi:type' => $xsi_type );
3489 0         0 my $xml = $self->get_xml_tag;
3490 0 0       0 if ( my $root = $self->get_root ) {
3491 0         0 $xml .= $root->to_xml;
3492             }
3493 0         0 $xml .= $self->sets_to_xml . sprintf('</%s>', $self->get_tag);
3494 0         0 return $xml;
3495             }
3496              
3497             =item to_svg()
3498              
3499             Serializes invocant to SVG.
3500              
3501             Type : Serializer
3502             Title : to_svg
3503             Usage : my $svg = $obj->to_svg;
3504             Function: Turns the invocant object into an SVG string.
3505             Returns : SCALAR
3506             Args : Same args as the Bio::Phylo::Treedrawer constructor
3507             Notes : This will only work if you have the SVG module
3508             from CPAN installed on your system.
3509              
3510             =cut
3511              
3512             sub to_svg {
3513 0     0 1 0 my $self = shift;
3514 0         0 my $drawer = $fac->create_drawer(@_);
3515 0         0 $drawer->set_tree($self);
3516 0         0 return $drawer->draw;
3517             }
3518              
3519             =item to_dom()
3520              
3521             Type : Serializer
3522             Title : to_dom
3523             Usage : $tree->to_dom($dom)
3524             Function: Generates a DOM subtree from the invocant
3525             and its contained objects
3526             Returns : an Element object
3527             Args : DOM factory object
3528              
3529             =cut
3530              
3531             sub to_dom {
3532 0     0 1 0 my ( $self, $dom ) = @_;
3533 0   0     0 $dom ||= $Bio::Phylo::NeXML::DOM::DOM;
3534 0 0       0 unless ( looks_like_object $dom, _DOMCREATOR_ ) {
3535 0         0 throw 'BadArgs' => 'DOM factory object not provided';
3536             }
3537 0         0 my $xsi_type = 'nex:IntTree';
3538 0         0 for my $node ( @{ $self->get_entities } ) {
  0         0  
3539 0         0 my $length = $node->get_branch_length;
3540 0 0 0     0 if ( defined $length and $length !~ /^[+-]?\d+$/ ) {
3541 0         0 $xsi_type = 'nex:FloatTree';
3542             }
3543             }
3544 0         0 $self->set_attributes( 'xsi:type' => $xsi_type );
3545 0         0 my $elt = $self->get_dom_elt($dom);
3546 0 0       0 if ( my $root = $self->get_root ) {
3547 0         0 $elt->set_child($_) for $root->to_dom($dom);
3548             }
3549 0         0 return $elt;
3550             }
3551              
3552             =begin comment
3553              
3554             Type : Internal method
3555             Title : _consolidate
3556             Usage : $tree->_consolidate;
3557             Function: Does pre-order traversal, only keeps
3558             nodes seen during traversal in tree,
3559             in order of traversal
3560             Returns :
3561             Args :
3562              
3563             =end comment
3564              
3565             =cut
3566              
3567             sub _consolidate {
3568 0     0   0 my $self = shift;
3569 0         0 my @nodes;
3570 0     0   0 $self->visit_depth_first( '-pre' => sub { push @nodes, shift } );
  0         0  
3571 0         0 $self->clear;
3572 0         0 $self->insert(@nodes);
3573             }
3574              
3575             =begin comment
3576              
3577             Type : Internal method
3578             Title : _container
3579             Usage : $tree->_container;
3580             Function:
3581             Returns : CONSTANT
3582             Args :
3583              
3584             =end comment
3585              
3586             =cut
3587              
3588 451     451   786 sub _container { $CONTAINER_CONSTANT }
3589              
3590             =begin comment
3591              
3592             Type : Internal method
3593             Title : _type
3594             Usage : $tree->_type;
3595             Function:
3596             Returns : CONSTANT
3597             Args :
3598              
3599             =end comment
3600              
3601             =cut
3602              
3603 29342     29342   46701 sub _type { $TYPE_CONSTANT }
3604 2     2   6 sub _tag { 'tree' }
3605              
3606             =back
3607              
3608             =cut
3609              
3610             # podinherit_insert_token
3611              
3612             =head1 SEE ALSO
3613              
3614             There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo>
3615             for any user or developer questions and discussions.
3616              
3617             =over
3618              
3619             =item L<Bio::Phylo::Listable>
3620              
3621             The L<Bio::Phylo::Forest::Tree|Bio::Phylo::Forest::Tree> object inherits from
3622             the L<Bio::Phylo::Listable|Bio::Phylo::Listable> object, so the methods defined
3623             therein also apply to trees.
3624              
3625             =item L<Bio::Phylo::Manual>
3626              
3627             Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>.
3628              
3629             =back
3630              
3631             =head1 CITATION
3632              
3633             If you use Bio::Phylo in published research, please cite it:
3634              
3635             B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
3636             and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
3637             I<BMC Bioinformatics> B<12>:63.
3638             L<http://dx.doi.org/10.1186/1471-2105-12-63>
3639              
3640             =cut
3641              
3642             }
3643             1;
3644             __DATA__
3645             sub get_nodes {
3646             my $self = shift;
3647             my $order = 'depth';
3648             my @nodes;
3649             if ( @_ ) {
3650             my %args = @_;
3651             if ( $args{'-order'} and $args{'-order'} =~ m/^b/ ) {
3652             $order = 'breadth';
3653             }
3654             }
3655             if ( my $root = $self->get_root ) {
3656             if ( $order eq 'depth' ) {
3657             $root->visit_depth_first(
3658             -pre => sub { push @nodes, shift }
3659             );
3660             }
3661             else {
3662             $root->visit_level_order( sub { push @nodes, shift } ); # XXX bioperl is wrong
3663             }
3664             }
3665             return @nodes;
3666             }
3667              
3668             sub set_root {
3669             my ( $self, $node ) = @_;
3670             my @nodes = ($node);
3671             if ( my $desc = $node->get_descendants ) {
3672             push @nodes, @{ $desc };
3673             }
3674             $self->clear;
3675             $self->insert(@nodes);
3676             return $node;
3677             }
3678              
3679             *set_root_node = \&set_root;
3680              
3681             *as_string = \&to_newick;
3682              
3683             sub get_root_node{ shift->get_root }
3684              
3685             sub number_nodes { shift->calc_number_of_nodes }
3686              
3687             sub total_branch_length { shift->calc_tree_length }
3688              
3689             sub height {
3690             my $self = shift;
3691             my $nodect = $self->calc_number_of_nodes;
3692             return 0 if( ! $nodect );
3693             return log($nodect) / log(2);
3694             }
3695              
3696             sub id {
3697             my $self = shift;
3698             if ( @_ ) {
3699             $self->set_name(shift);
3700             }
3701             return $self->get_name;
3702             }
3703              
3704             sub score {
3705             my $self = shift;
3706             if ( @_ ) {
3707             $self->set_score(shift);
3708             }
3709             return $self->get_score;
3710             }
3711              
3712             sub get_leaf_nodes {
3713             my $self = shift;
3714             my $tips = $self->get_terminals;
3715             if ( $tips ) {
3716             return @{ $tips };
3717             }
3718             return;
3719             }
3720              
3721             sub _parse_newick {
3722             my $self = shift;
3723             my $newick = join ('', @{ $_[0] } ) . ';';
3724             my $forest = Bio::Phylo::IO::parse( '-format' => 'newick', '-string' => $newick );
3725             my $tree = $forest->first;
3726             my @nodes = @{ $tree->get_entities };
3727             for my $node ( @nodes ) {
3728             $self->insert($node);
3729             $tree->delete($node);
3730             }
3731             $tree->DESTROY;
3732             $forest->DESTROY;
3733             }
3734              
3735             sub find_node {
3736             my $self = shift;
3737             if( ! @_ ) {
3738             $logger->warn("Must request a either a string or field and string when searching");
3739             }
3740             my ( $field, $value );
3741             if ( @_ == 1 ) {
3742             ( $field, $value ) = ( 'id', shift );
3743             }
3744             elsif ( @_ == 2 ) {
3745             ( $field, $value ) = @_;
3746             $field =~ s/^-//;
3747             }
3748             my @nodes;
3749             $self->visit(
3750             sub {
3751             my $node = shift;
3752             push @nodes, $node if $node->$field and $node->$field eq $value;
3753             }
3754             );
3755             if ( wantarray) {
3756             return @nodes;
3757             }
3758             else {
3759             if( @nodes > 1 ) {
3760             $logger->warn("More than 1 node found but caller requested scalar, only returning first node");
3761             }
3762             return shift @nodes;
3763             }
3764             }
3765              
3766             sub verbose {
3767             my ( $self, $level ) = @_;
3768             $level = 0 if $level < 0;
3769             $self->VERBOSE( -level => $level );
3770             }
3771              
3772             sub reroot {
3773             my ( $self, $node ) = @_;
3774             my $id = $node->get_id;
3775             my $new_root = $node->set_root_below;
3776             if ( $new_root ) {
3777             my @children = grep { $_->get_id != $id } @{ $new_root->get_children };
3778             $node->set_child($_) for @children;
3779             return 1;
3780             }
3781             else {
3782             return 0;
3783             }
3784             }
3785              
3786             sub remove_Node {
3787             my ( $self, $node ) = @_;
3788             if ( not ref $node ) {
3789             ($node) = grep { $_->get_name eq $node } @{ $self->get_entities };
3790             }
3791             if ( $node->is_terminal ) {
3792             $node->get_parent->prune_child( $node );
3793             }
3794             else {
3795             $node->collapse;
3796             }
3797             $self->delete($node);
3798             }
3799              
3800             sub splice {
3801             my ( $self, @args ) = @_;
3802             if ( ref($args[0]) ) {
3803             $_->collapse for @args;
3804             }
3805             else {
3806             my %args = @args;
3807             my ( @keep, @remove );
3808             for my $key ( keys %args ) {
3809             if ( $key =~ /^-keep_(.+)$/ ) {
3810             my $field = $1;
3811             my %val;
3812             if ( ref $args{$key} ) {
3813             %val = map { $_ => 1 } @{ $args{$key} };
3814             }
3815             else {
3816             %val = ( $args{$key} => 1 );
3817             }
3818             push @keep, grep { $val{ $_->$field } } @{ $self->get_entities };
3819             }
3820             elsif ( $key =~ /^-remove_(.+)$/ ) {
3821             my $field = $1;
3822             my %val;
3823             if ( ref $args{$key} ) {
3824             %val = map { $_ => 1 } @{ $args{$key} };
3825             }
3826             else {
3827             %val = ( $args{$key} => 1 );
3828             }
3829             push @remove, grep { $val{ $_->$field } } @{ $self->get_entities };
3830             }
3831             }
3832             my @netto;
3833             REMOVE: for my $remove ( @remove ) {
3834             for my $keep ( @keep ) {
3835             next REMOVE if $remove->get_id == $keep->get_id;
3836             }
3837             push @netto, $remove;
3838             }
3839             my @names = map { $_->id } @netto;
3840             my @keep_names = map { $_->id } @keep;
3841             if ( @names ) {
3842             $self->prune_tips(\@names);
3843             }
3844             elsif ( @keep_names ) {
3845             $self->keep_tips( \@keep_names );
3846             }
3847             }
3848             }
3849              
3850             sub move_id_to_bootstrap {
3851             my $self = shift;
3852             $self->visit(
3853             sub {
3854             my $node = shift;
3855             $node->bootstrap( $node->id ) if defined $node->id;
3856             $node->id("");
3857             }
3858             );
3859             }