File Coverage

Bio/Tree/Statistics.pm
Criterion Covered Total %
statement 225 260 86.5
branch 67 88 76.1
condition 28 65 43.0
subroutine 23 25 92.0
pod 15 15 100.0
total 358 453 79.0


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tree::Statistics
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tree::Statistics - Calculate certain statistics for a Tree
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tree::Statistics;
21              
22             =head1 DESCRIPTION
23              
24             This should be where Tree statistics are calculated. It was
25             previously where statistics from a Coalescent simulation.
26              
27             It now contains several methods for calculating L
28             statistics>.
29              
30             =head1 FEEDBACK
31              
32             =head2 Mailing Lists
33              
34             User feedback is an integral part of the evolution of this and other
35             Bioperl modules. Send your comments and suggestions preferably to
36             the Bioperl mailing list. Your participation is much appreciated.
37              
38             bioperl-l@bioperl.org - General discussion
39             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
40              
41             =head2 Support
42              
43             Please direct usage questions or support issues to the mailing list:
44              
45             I
46              
47             rather than to the module maintainer directly. Many experienced and
48             reponsive experts will be able look at the problem and quickly
49             address it. Please include a thorough description of the problem
50             with code and data examples if at all possible.
51              
52             =head2 Reporting Bugs
53              
54             Report bugs to the Bioperl bug tracking system to help us keep track
55             of the bugs and their resolution. Bug reports can be submitted via
56             the web:
57              
58             https://github.com/bioperl/bioperl-live/issues
59              
60             =head1 AUTHOR - Jason Stajich
61              
62             Email jason AT bioperl.org
63              
64             =head1 CONTRIBUTORS
65              
66             Heikki Lehvaslaiho, heikki at bioperl dot org
67              
68             =head1 APPENDIX
69              
70             The rest of the documentation details each of the object methods.
71             Internal methods are usually preceded with a _
72              
73             =cut
74              
75              
76             # Let the code begin...
77              
78              
79             package Bio::Tree::Statistics;
80 2     2   2035 use strict;
  2         2  
  2         50  
81              
82              
83 2     2   5 use base qw(Bio::Root::Root);
  2         2  
  2         3373  
84              
85             =head2 new
86              
87             Title : new
88             Usage : my $obj = Bio::Tree::Statistics->new();
89             Function: Builds a new Bio::Tree::Statistics object
90             Returns : Bio::Tree::Statistics
91             Args :
92              
93             =head2 assess_bootstrap
94              
95             Title : assess_bootstrap
96             Usage : my $tree_with_bs = $stats->assess_bootstrap(\@bs_trees,$guide_tree);
97             Function: Calculates the bootstrap for internal nodes based on the percentage
98             of times \@bs_trees agree with each internal node
99             Returns : L
100             Args : Arrayref of Ls
101             Guide tree, Ls
102              
103             =cut
104              
105             sub assess_bootstrap{
106 0     0 1 0 my ($self,$bs_trees,$guide_tree) = @_;
107 0         0 my @consensus;
108              
109             # internal nodes are defined by their children
110              
111 0         0 my (%lookup,%internal);
112 0         0 my $i = 0;
113 0         0 for my $tree ( $guide_tree, @$bs_trees ) {
114             # Do this as a top down approach, can probably be
115             # improved by caching internal node states, but not going
116             # to worry about it right now.
117              
118 0         0 my @allnodes = $tree->get_nodes;
119 0         0 my @internalnodes = grep { ! $_->is_Leaf } @allnodes;
  0         0  
120 0         0 for my $node ( @internalnodes ) {
121 0         0 my @tips = sort map { $_->id }
122 0         0 grep { $_->is_Leaf() } $node->get_all_Descendents;
  0         0  
123 0         0 my $id = "(".join(",", @tips).")";
124 0 0       0 if( $i == 0 ) {
125 0         0 $internal{$id} = $node->internal_id;
126             } else {
127 0         0 $lookup{$id}++;
128             }
129             }
130 0         0 $i++;
131             }
132 0         0 my @save;
133 0         0 for my $l ( keys %lookup ) {
134 0 0       0 if( defined $internal{$l} ) {#&& $lookup{$l} > $min_seen ) {
135 0         0 my $intnode = $guide_tree->find_node(-internal_id => $internal{$l});
136 0         0 $intnode->bootstrap(sprintf("%d",100 * $lookup{$l} / $i));
137             }
138             }
139 0         0 return $guide_tree;
140             }
141              
142              
143             =head2 cherries
144              
145             Example : cherries($tree, $node);
146             Description: Count number of paired leaf nodes
147             in a binary tree
148             Returns : integer
149             Exceptions :
150             Args : 1. Bio::Tree::TreeI object
151             2. Bio::Tree::NodeI object within the tree, optional
152              
153             Commonly used statistics assume a binary tree, but this methods
154             returns a value even for trees with polytomies.
155              
156             =cut
157              
158             sub cherries ($;$) {
159 22     22 1 16 my $self = shift;
160 22         15 my $tree = shift;
161 22   66     28 my $node = shift || $tree->get_root_node;
162              
163 22         13 my $cherries = 0;
164 22         24 my @descs = $node->each_Descendent;
165              
166 22 100 66     32 if ($descs[0]->is_Leaf and $descs[1]->is_Leaf) {
167 12 50       11 if ($descs[3]) { #polytomy at leaf level
168 0         0 $cherries = 0;
169             } else {
170 12         8 $cherries = 1;
171             }
172             } else {
173             # recurse
174 10         12 foreach my $desc (@descs) {
175 20         25 $cherries += $self->cherries($tree, $desc);
176             }
177             }
178 22         34 return $cherries;
179             }
180              
181              
182             =head2 Tree-Trait statistics
183              
184             The following methods produce descriptors of trait distribution among
185             leaf nodes within the trees. They require that a trait has been set
186             for each leaf node. The tag methods of Bio::Tree::Node are used to
187             store them as key/value pairs. In this way, one tree can store more
188             than one trait.
189              
190             Trees have method add_traits() to set trait values from a file. See the
191             add_trait() method in L.
192              
193             =head2 fitch
194              
195             Example : fitch($tree, $key, $node);
196             Description: Calculates Parsimony Score (PS) and internal trait
197             values using the Fitch 1971 parsimony algorithm for
198             the subtree a defined by the (internal) node.
199             Node defaults to the root.
200             Returns : true on success
201             Exceptions : leaf nodes have to have the trait defined
202             Args : 1. Bio::Tree::TreeI object
203             2. trait name string
204             3. Bio::Tree::NodeI object within the tree, optional
205              
206             Runs first L that calculates parsimony scores and then
207             L that should resolve most of the trait/character state
208             ambiguities.
209              
210             Fitch, W.M., 1971. Toward defining the course of evolution: minimal
211             change for a specific tree topology. Syst. Zool. 20, 406-416.
212              
213             You can access calculated parsimony values using:
214              
215             $score = $node->->get_tag_values('ps_score');
216              
217             and the trait value with:
218              
219             $traitvalue = $node->->get_tag_values('ps_trait'); # only the first
220             @traitvalues = $node->->get_tag_values('ps_trait');
221              
222             Note that there can be more that one trait value, especially for the
223             root node.
224              
225             =cut
226              
227             sub fitch {
228 0     0 1 0 my $self = shift;
229 0         0 my $tree = shift;
230 0   0     0 my $key = shift || $self->throw("Trait name is needed");
231 0   0     0 my $node = shift || $tree->get_root_node;
232              
233 0         0 $self->fitch_up($tree, $key, $node);
234 0         0 $self->fitch_down($tree, $node);
235             }
236              
237              
238             =head2 ps
239              
240             Example : ps($tree, $key, $node);
241             Description: Calculates Parsimony Score (PS) from Fitch 1971
242             parsimony algorithm for the subtree as defined
243             by the (internal) node.
244             Node defaults to the root.
245             Returns : integer, 1 < PS < n, where n is number of branches
246             Exceptions : leaf nodes have to have the trait defined
247             Args : 1. Bio::Tree::TreeI object
248             2. trait name string
249             3. Bio::Tree::NodeI object within the tree, optional
250              
251             This is the first half of the Fitch algorithm that is enough for
252             calculating the resolved parsimony values. The trait/chararacter
253             states are commonly left in ambiguous state. To resolve them, run
254             L.
255              
256             =cut
257              
258 2     2 1 438 sub ps { shift->fitch_up(@_) }
259              
260              
261             =head2 fitch_up
262              
263             Example : fitch_up($tree, $key, $node);
264             Description: Calculates Parsimony Score (PS) from the Fitch 1971
265             parsimony algorithm for the subtree as defined
266             by the (internal) node.
267             Node defaults to the root.
268             Returns : integer, 1< PS < n, where n is number of branches
269             Exceptions : leaf nodes have to have the trait defined
270             Args : 1. Bio::Tree::TreeI object
271             2. trait name string
272             3. Bio::Tree::NodeI object within the tree, optional
273              
274             This is a more generic name for L and indicates that it performs
275             the first bottom-up tree traversal that calculates the parsimony score
276             but usually leaves trait/character states ambiguous. If you are
277             interested in internal trait states, running L should
278             resolve most of the ambiguities.
279              
280             =cut
281              
282             sub fitch_up {
283 46     46 1 30 my $self = shift;
284 46         32 my $tree = shift;
285 46   33     54 my $key = shift || $self->throw("Trait name is needed");
286 46   66     53 my $node = shift || $tree->get_root_node;
287              
288 46 100       48 if ($node->is_Leaf) {
289 24 50       29 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
290             unless $node->has_tag($key);
291 24         35 $node->set_tag_value('ps_trait', $node->get_tag_values($key) );
292 24         35 $node->set_tag_value('ps_score', 0 );
293 24         29 return; # end of recursion
294             }
295              
296 22         31 foreach my $child ($node->each_Descendent) {
297 44         55 $self->fitch_up($tree, $key, $child);
298             }
299              
300 22         24 my %intersection;
301             my %union;
302 0         0 my $score;
303              
304 22         28 foreach my $child ($node->each_Descendent) {
305 44         51 foreach my $trait ($child->get_tag_values('ps_trait') ) {
306 48 100       63 $intersection{$trait}++ if $union{$trait};
307 48         54 $union{$trait}++;
308             }
309 44         54 $score += $child->get_tag_values('ps_score');
310             }
311              
312 22 100       39 if (keys %intersection) {
313 17         22 $node->set_tag_value('ps_trait', keys %intersection);
314 17         24 $node->set_tag_value('ps_score', $score);
315             } else {
316 5         11 $node->set_tag_value('ps_trait', keys %union);
317 5         8 $node->set_tag_value('ps_score', $score+1);
318             }
319              
320 22 50       38 if ($self->verbose) {
321 0         0 print "-- node --------------------------\n";
322 0         0 print "iID: ", $node->internal_id, " (", $node->id, ")\n";
323 0         0 print "Trait: ", join (', ', $node->get_tag_values('ps_trait') ), "\n";
324 0         0 print "length :", scalar($node->get_tag_values('ps_score')) , "\n";
325             }
326 22         34 return scalar $node->get_tag_values('ps_score');
327             }
328              
329              
330             =head2 fitch_down
331              
332             Example : fitch_down($tree, $node);
333             Description: Runs the second pass from Fitch 1971
334             parsimony algorithm to resolve ambiguous
335             trait states left by first pass.
336             by the (internal) node.
337             Node defaults to the root.
338             Returns : true
339             Exceptions : dies unless the trait is defined in all nodes
340             Args : 1. Bio::Tree::TreeI object
341             2. Bio::Tree::NodeI object within the tree, optional
342              
343             Before running this method you should have ran L (alias to
344             L ). Note that it is not guaranteed that all states are completely
345             resolved.
346              
347             =cut
348              
349             sub fitch_down {
350 15     15 1 475 my $self = shift;
351 15         12 my $tree = shift;
352 15   66     20 my $node = shift || $tree->get_root_node;
353              
354 15         13 my $key = 'ps_trait';
355 15 50       18 $self->throw ("ERROR: ". $node->internal_id. " needs a value for $key")
356             unless $node->has_tag($key);
357              
358 15         11 my $nodev;
359 15         17 foreach my $trait ($node->get_tag_values($key) ) {
360 19         19 $nodev->{$trait}++;
361             }
362              
363 15         22 foreach my $child ($node->each_Descendent) {
364 30 100       32 next if $child->is_Leaf; # end of recursion
365              
366 14         11 my $intersection;
367 14         16 foreach my $trait ($child->get_tag_values($key) ) {
368 17 100       33 $intersection->{$trait}++ if $nodev->{$trait};
369             }
370              
371 14         21 $self->fitch_down($tree, $child);
372 14         24 $child->set_tag_value($key, keys %$intersection);
373             }
374 15         20 return 1; # success
375             }
376              
377              
378             =head2 persistence
379              
380             Example : persistence($tree, $node);
381             Description: Calculates the persistence
382             for node in the subtree defined by the (internal)
383             node. Node defaults to the root.
384             Returns : int, number of generations trait value has to remain same
385             Exceptions : all the nodes need to have the trait defined
386             Args : 1. Bio::Tree::TreeI object
387             2. Bio::Tree::NodeI object within the tree, optional
388              
389             Persistence measures the stability that the trait value has in a
390             tree. It expresses the number of generations the trait value remains
391             the same. All the decendants of the root in the same generation have
392             to share the same value.
393              
394             Depends on Fitch's parsimony score (PS).
395              
396             =cut
397              
398             sub _persistence {
399 18     18   14 my $self = shift;
400 18         10 my $tree = shift;
401 18         11 my $node = shift;
402 18   33     23 my $value = shift || $self->throw("Value is needed");
403              
404              
405 18         12 my $key = 'ps_trait';
406              
407 18 50       30 $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
408              
409 18 100       19 return 0 unless $node->get_tag_values($key) eq $value; # wrong value
410 16 100       21 return 1 if $node->is_Leaf; # end of recursion
411              
412 7         8 my $persistence = 10000000; # an arbitrarily large number
413 7         8 foreach my $child ($node->each_Descendent) {
414 14         19 my $pers = $self->_persistence($tree, $child, $value);
415 14 100       33 $persistence = $pers if $pers < $persistence;
416             }
417 7         9 return $persistence + 1;
418             }
419              
420             sub persistence {
421 4     4 1 17 my $self = shift;
422 4         2 my $tree = shift;
423 4   33     9 my $node = shift || $tree->get_root_node;
424 4 50       10 $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
425              
426 4         4 my $key = 'ps_trait';
427 4         8 my $value = $node->get_tag_values($key);
428              
429             #calculate
430 4         8 my $persistence = $self->_persistence($tree, $node, $value);
431 4         7 $node->set_tag_value('persistance', $persistence);
432 4         13 return $persistence;
433             }
434              
435              
436             =head2 count_subclusters
437              
438             Example : count_clusters($tree, $node);
439             Description: Calculates the number of sub-clusters
440             in the subtree defined by the (internal)
441             node. Node defaults to the root.
442             Returns : int, count
443             Exceptions : all the nodes need to have the trait defined
444             Args : 1. Bio::Tree::TreeI object
445             2. Bio::Tree::NodeI object within the tree, optional
446              
447             Depends on Fitch's parsimony score (PS).
448              
449             =cut
450              
451             sub _count_subclusters {
452 22     22   18 my $self = shift;
453 22         12 my $tree = shift;
454 22         14 my $node = shift;
455 22   33     25 my $value = shift || $self->throw("Value is needed");
456              
457 22         15 my $key = 'ps_trait';
458              
459 22 50       24 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
460             unless $node->has_tag($key);
461              
462 22 100       41 if ($node->get_tag_values($key) eq $value) {
463 18 100       18 if ($node->get_tag_values('ps_score') == 0) {
464 9         16 return 0;
465             } else {
466 9         6 my $count = 0;
467 9         12 foreach my $child ($node->each_Descendent) {
468 18         26 $count += $self->_count_subclusters($tree, $child, $value);
469             }
470 9         18 return $count;
471             }
472             }
473 4         7 return 1;
474             }
475              
476             sub count_subclusters {
477 4     4 1 17 my $self = shift;
478 4         4 my $tree = shift;
479 4   33     7 my $node = shift || $tree->get_root_node;
480 4 50       10 $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
481              
482 4         3 my $key = 'ps_trait';
483 4         6 my $value = $node->get_tag_values($key);
484              
485 4         7 return $self->_count_subclusters($tree, $node, $value);
486             }
487              
488              
489             =head2 count_leaves
490              
491             Example : count_leaves($tree, $node);
492             Description: Calculates the number of leaves with same trait
493             value as root in the subtree defined by the (internal)
494             node. Requires an unbroken line of identical trait values.
495             Node defaults to the root.
496             Returns : int, number of leaves with this trait value
497             Exceptions : all the nodes need to have the trait defined
498             Args : 1. Bio::Tree::TreeI object
499             2. Bio::Tree::NodeI object within the tree, optional
500              
501             Depends on Fitch's parsimony score (PS).
502              
503             =cut
504              
505             sub _count_leaves {
506 204     204   136 my $self = shift;
507 204         108 my $tree = shift;
508 204   33     230 my $node = shift || $tree->get_root_node;
509 204         128 my $value = shift;
510              
511 204         118 my $key = 'ps_trait';
512              
513 204 50       197 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
514             unless $node->has_tag($key);
515              
516 204 100       221 if ($node->get_tag_values($key) eq $value) {
517             #print $node->id, ": ", $node->get_tag_values($key), "\n";
518 184 100       188 return 1 if $node->is_Leaf; # end of recursion
519              
520 67         48 my $count = 0;
521 67         72 foreach my $child ($node->each_Descendent) {
522 134         141 $count += $self->_count_leaves($tree, $child, $value);
523             }
524 67         118 return $count;
525             }
526 20         27 return 0;
527             }
528              
529             sub count_leaves {
530 6     6 1 15 my $self = shift;
531 6         5 my $tree = shift;
532 6   33     11 my $node = shift || $tree->get_root_node;
533 6 50       16 $self->throw("Node is needed") unless $node->isa('Bio::Tree::NodeI');
534              
535 6         7 my $key = 'ps_trait';
536 6         7 my $value = $node->get_tag_values($key);
537              
538 6         10 return $self->_count_leaves($tree, $node, $value);
539             }
540              
541              
542             =head2 phylotype_length
543              
544             Example : phylotype_length($tree, $node);
545             Description: Sums up the branch lengths within phylotype
546             exluding the subclusters where the trait values
547             are different
548             Returns : float, length
549             Exceptions : all the nodes need to have the trait defined
550             Args : 1. Bio::Tree::TreeI object
551             2. Bio::Tree::NodeI object within the tree, optional
552              
553             Depends on Fitch's parsimony score (PS).
554              
555             =cut
556              
557             sub _phylotype_length {
558 45     45   45 my $self = shift;
559 45         26 my $tree = shift;
560 45         29 my $node = shift;
561 45         23 my $value = shift;
562              
563 45         37 my $key = 'ps_trait';
564              
565 45 50       48 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
566             unless $node->has_tag($key);
567              
568 45 100       52 return 0 if $node->get_tag_values($key) ne $value;
569 40 100       48 return $node->branch_length if $node->is_Leaf; # end of recursion
570              
571 20         15 my $length = 0;
572 20         22 foreach my $child ($node->each_Descendent) {
573 40         47 my $sub_len = $self->_phylotype_length($tree, $child, $value);
574 40         36 $length += $sub_len;
575 40 100 100     38 $length += $child->branch_length if not $child->is_Leaf and $sub_len;
576             }
577 20         36 return $length;
578             }
579              
580             sub phylotype_length {
581 5     5 1 15 my $self = shift;
582 5         5 my $tree = shift;
583 5   33     11 my $node = shift || $tree->get_root_node;
584              
585 5         1 my $key = 'ps_trait';
586 5         11 my $value = $node->get_tag_values($key);
587              
588 5         11 return $self->_phylotype_length($tree, $node, $value);
589             }
590              
591              
592             =head2 sum_of_leaf_distances
593              
594             Example : sum_of_leaf_distances($tree, $node);
595             Description: Sums up the branch lengths from root to leaf
596             exluding the subclusters where the trait values
597             are different
598             Returns : float, length
599             Exceptions : all the nodes need to have the trait defined
600             Args : 1. Bio::Tree::TreeI object
601             2. Bio::Tree::NodeI object within the tree, optional
602              
603             Depends on Fitch's parsimony score (PS).
604              
605             =cut
606              
607             sub _sum_of_leaf_distances {
608 71     71   45 my $self = shift;
609 71         47 my $tree = shift;
610 71         42 my $node = shift;
611 71         51 my $value = shift;
612              
613 71         40 my $key = 'ps_trait';
614              
615 71 50       74 $self->throw ("ERROR: ". $node->internal_id. " needs a value for trait $key")
616             unless $node->has_tag($key);
617 71 100       82 return 0 if $node->get_tag_values($key) ne $value;
618             #return $node->branch_length if $node->is_Leaf; # end of recursion
619 64 100       75 return 0 if $node->is_Leaf; # end of recursion
620              
621 32         25 my $length = 0;
622 32         39 foreach my $child ($node->each_Descendent) {
623 64         68 $length += $self->_count_leaves($tree, $child, $value) * $child->branch_length +
624             $self->_sum_of_leaf_distances($tree, $child, $value);
625             }
626 32         58 return $length;
627             }
628              
629             sub sum_of_leaf_distances {
630 7     7 1 18 my $self = shift;
631 7         6 my $tree = shift;
632 7   33     12 my $node = shift || $tree->get_root_node;
633              
634 7         4 my $key = 'ps_trait';
635 7         13 my $value = $node->get_tag_values($key);
636              
637 7         11 return $self->_sum_of_leaf_distances($tree, $node, $value);
638             }
639              
640              
641             =head2 genetic_diversity
642              
643             Example : genetic_diversity($tree, $node);
644             Description: Diversity is the sum of root to leaf distances
645             within the phylotype normalised by number of leaf
646             nodes
647             Returns : float, value of genetic diversity
648             Exceptions : all the nodes need to have the trait defined
649             Args : 1. Bio::Tree::TreeI object
650             2. Bio::Tree::NodeI object within the tree, optional
651              
652             Depends on Fitch's parsimony score (PS).
653              
654             =cut
655              
656             sub genetic_diversity {
657 2     2 1 2 my $self = shift;
658 2         3 my $tree = shift;
659 2   33     4 my $node = shift || $tree->get_root_node;
660              
661 2         4 return $self->sum_of_leaf_distances($tree, $node) /
662             $self->count_leaves($tree, $node);
663             }
664              
665              
666             =head2 statratio
667              
668             Example : statratio($tree, $node);
669             Description: Ratio of the stem length and the genetic diversity of the
670             phylotype L
671             Returns : float, separation score
672             Exceptions : all the nodes need to have the trait defined
673             Args : 1. Bio::Tree::TreeI object
674             2. Bio::Tree::NodeI object within the tree, optional
675              
676             Statratio gives a measure of separation and variability within the phylotype.
677             Larger values identify more rapidly evolving and recent phylotypes.
678              
679             Depends on Fitch's parsimony score (PS).
680              
681             =cut
682              
683             sub statratio {
684 1     1 1 2 my $self = shift;
685 1         1 my $tree = shift;
686 1   33     2 my $node = shift || $tree->get_root_node;
687              
688 1         3 my $div = $self->genetic_diversity($tree, $node);
689 1 50       4 return 0 if $div == 0;
690 1         2 return $node->branch_length / $div;
691              
692             }
693              
694              
695             =head2 ai
696              
697             Example : ai($tree, $key, $node);
698             Description: Calculates the Association Index (AI) of Whang et
699             al. 2001 for the subtree defined by the (internal)
700             node. Node defaults to the root.
701             Returns : real
702             Exceptions : leaf nodes have to have the trait defined
703             Args : 1. Bio::Tree::TreeI object
704             2. trait name string
705             3. Bio::Tree::NodeI object within the tree, optional
706              
707             Association index (AI) gives a more fine grained results than PS since
708             the result is a real number. ~0 E= AI.
709              
710             Wang, T.H., Donaldson, Y.K., Brettle, R.P., Bell, J.E., Simmonds, P.,
711             2001. Identification of shared populations of human immunodeficiency
712             Virus Type 1 infecting microglia and tissue macrophages outside the
713             central nervous system. J. Virol. 75 (23), 11686-11699.
714              
715             =cut
716              
717             sub _node_ai {
718 20     20   14 my $self = shift;
719 20         15 my $node = shift;
720 20         14 my $key = shift;
721              
722 20         12 my $traits;
723 20         15 my $leaf_count = 0;
724 20         25 for my $desc ( $node->get_all_Descendents ) {
725 88 100       90 next unless $desc->is_Leaf;
726 64         45 $leaf_count++;
727 64 50       72 $self->throw ("Node ". $desc->id. " needs a value for trait [$key]")
728             unless $desc->has_tag($key);
729 64         77 my $trait = $desc->get_tag_values($key);
730 64         78 $traits->{$trait}++;
731             }
732 20         17 my $most_common = 0;
733 20         31 foreach ( keys %$traits) {
734 28 100       44 $most_common = $traits->{$_} if $traits->{$_} > $most_common;
735             }
736 20         129 return sprintf "%1.6f", (1 - ($most_common/$leaf_count) ) / (2**($leaf_count-1) );
737             }
738              
739             sub ai {
740 2     2 1 4 my $self = shift;
741 2         2 my $tree = shift;
742 2   33     5 my $key = shift || $self->throw("Trait name is needed");
743 2   66     6 my $start_node = shift || $tree->get_root_node;
744 2 50       3 return unless $start_node;
745              
746 2         3 my $sum = 0;
747 2         5 for my $node ( $start_node->get_all_Descendents ) {
748 44 100       52 next if $node->is_Leaf;
749 20         29 $sum += $self->_node_ai($node, $key);
750             }
751 2         8 return $sum;
752             }
753              
754              
755             =head2 mc
756              
757             Example : mc($tree, $key, $node);
758             Description: Calculates the Monophyletic Clade (MC) size statistics
759             for the subtree a defined by the (internal) node.
760             Node defaults to the root;
761             Returns : hashref with trait values as keys
762             Exceptions : leaf nodes have to have the trait defined
763             Args : 1. Bio::Tree::TreeI object
764             2. trait name string
765             3. Bio::Tree::NodeI object within the tree, optional
766              
767             Monophyletic Clade (MC) size statistics by Salemi at al 2005. It is
768             calculated for each trait value. 1 E= MC E= nx, where nx is the
769             number of tips with value x:
770              
771             pick the internal node with maximim value for
772             number of of tips with only trait x
773              
774             MC was defined by Parker et al 2008.
775              
776             Salemi, M., Lamers, S.L., Yu, S., de Oliveira, T., Fitch, W.M., McGrath, M.S.,
777             2005. Phylodynamic analysis of Human Immunodeficiency Virus Type 1 in
778             distinct brain compartments provides a model for the neuropathogenesis of
779             AIDS. J. Virol. 79 (17), 11343-11352.
780              
781             Parker, J., Rambaut A., Pybus O., 2008. Correlating viral phenotypes
782             with phylogeny: Accounting for phylogenetic uncertainty Infection,
783             Genetics and Evolution 8 (2008), 239-246.
784              
785             =cut
786              
787             sub _node_mc {
788 16     16   9 my $self = shift;
789 16         13 my $node = shift;
790 16         12 my $key = shift;
791              
792 16         9 my $traits;
793 16         14 my $leaf_count = 0;
794 16         18 for my $node2 ( $node->get_all_Descendents ) {
795 72 100       80 next unless $node2->is_Leaf;
796 52         36 $leaf_count++;
797 52         60 my $trait = $node2->get_tag_values($key);
798 52         58 $traits->{$trait}++;
799             }
800 16         16 return $traits;
801             }
802              
803             sub mc {
804 2     2 1 7 my $self = shift;
805 2         2 my $tree = shift;
806 2   50     5 my $key = shift || die "Trait name is needed";
807 2   66     7 my $start_node = shift || $tree->get_root_node;
808 2 50       4 return unless $start_node;
809              
810 2         3 my $sum; # hashref, keys are trait values
811             my $keys; # hashref, keys are trait values
812 2         3 foreach my $node ( $start_node->get_all_Descendents ) {
813 36 100       41 next if $node->is_Leaf;
814 16         20 my $traits = $self->_node_mc($node, $key);
815 16 100       23 if (scalar keys %$traits == 1) {
816 9         10 my ($value) = keys %$traits;
817 2     2   13 no warnings;
  2         3  
  2         193  
818             $sum->{$value} = $traits->{$value}
819 9 100       28 if $sum->{$value} < $traits->{$value};
820             } else {
821 7         9 map { $keys->{$_} = 1 } keys %$traits;
  14         18  
822             }
823             }
824             # check for cases where there are no clusters
825 2         6 foreach my $value (keys %$keys) {
826 2 50       4 $sum->{$value} = 1 unless defined $sum->{$value};
827             }
828 2         5 return $sum;
829             }
830              
831              
832             1;