File Coverage

Bio/Tree/RandomFactory.pm
Criterion Covered Total %
statement 113 162 69.7
branch 24 42 57.1
condition 11 31 35.4
subroutine 14 17 82.3
pod 8 12 66.6
total 170 264 64.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tree::RandomFactory
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tree::RandomFactory - TreeFactory for generating Random Trees
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tree::RandomFactory
21             my @taxonnames;
22             my $factory = Bio::Tree::RandomFactory->new( -taxa => \@taxonnames,
23             -maxcount => 10);
24              
25             # or for anonymous samples
26              
27             my $factory = Bio::Tree::RandomFactory->new( -num_taxa => 6,
28             -maxcount => 50);
29              
30              
31             my $tree = $factory->next_tree;
32              
33             =head1 DESCRIPTION
34              
35             Builds a random tree every time next_tree is called or up to -maxcount times.
36              
37             This module was originally written for Coalescent simulations see
38             L. I've left the next_tree
39             method intact although it is not generating random trees in the
40             phylogenetic sense. I would be happy for someone to provide
41             alternative implementations which can be used here. As written it
42             will generate random topologies but the branch lengths are built from
43             assumptions in the coalescent and are not appropriate for phylogenetic
44             analyses.
45              
46             This algorithm is based on the make_tree algorithm from Richard Hudson 1990.
47              
48             Hudson, R. R. 1990. Gene genealogies and the coalescent
49             process. Pp. 1-44 in D. Futuyma and J. Antonovics, eds. Oxford
50             surveys in evolutionary biology. Vol. 7. Oxford University
51             Press, New York
52              
53             Sanderson, M ...
54              
55             =head1 FEEDBACK
56              
57             =head2 Mailing Lists
58              
59             User feedback is an integral part of the evolution of this and other
60             Bioperl modules. Send your comments and suggestions preferably to
61             the Bioperl mailing list. Your participation is much appreciated.
62              
63             bioperl-l@bioperl.org - General discussion
64             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
65              
66             =head2 Support
67              
68             Please direct usage questions or support issues to the mailing list:
69              
70             I
71              
72             rather than to the module maintainer directly. Many experienced and
73             reponsive experts will be able look at the problem and quickly
74             address it. Please include a thorough description of the problem
75             with code and data examples if at all possible.
76              
77             =head2 Reporting Bugs
78              
79             Report bugs to the Bioperl bug tracking system to help us keep track
80             of the bugs and their resolution. Bug reports can be submitted via
81             the web:
82              
83             https://github.com/bioperl/bioperl-live/issues
84              
85             =head1 AUTHOR - Jason Stajich
86              
87             Email jason-AT-bioperl.org
88              
89             =head1 CONTRIBUTORS
90              
91             Matthew Hahn, Ematthew.hahn@duke.eduE
92             Mike Sanderson
93              
94             =head1 APPENDIX
95              
96             The rest of the documentation details each of the object methods.
97             Internal methods are usually preceded with a _
98              
99             =cut
100              
101              
102             # Let the code begin...
103              
104              
105             package Bio::Tree::RandomFactory;
106 2     2   590 use vars qw($PRECISION_DIGITS $DefaultNodeType %Defaults);
  2         5  
  2         140  
107 2     2   13 use strict;
  2         4  
  2         111  
108              
109             $PRECISION_DIGITS = 3; # Precision for the branchlength
110             $DefaultNodeType = 'Bio::Tree::Node';
111             %Defaults = ('YuleRate' => 1.0, # as set by Sanderson in Rates
112             'Speciation' => 1.0, #
113             'DefaultTreeMethod' => 'yule',
114             );
115              
116 2     2   521 use Bio::Tools::RandomDistFunctions;
  2         4  
  2         72  
117 2     2   328 use Bio::Tree::Tree;
  2         6  
  2         89  
118              
119 2     2   26 use base qw(Bio::Root::Root Bio::Factory::TreeFactoryI);
  2         6  
  2         2016  
120              
121             =head2 new
122              
123             Title : new
124             Usage : my $factory = Bio::Tree::RandomFactory->new(-samples => \@samples,
125             -maxcount=> $N);
126             Function: Initializes a Bio::Tree::RandomFactory object
127             Returns : Bio::Tree::RandomFactory
128             Args : -nodetype => Type of Nodes to create [default Bio::Tree::Node]
129             -maxcount => [optional] Maximum num trees to create
130             -randtype => Type of random trees so far support
131             - yule/backward_yule/BY [default]
132             - forward_yule/FY
133             - birthdeath_forward/BDF
134             - birthdeath_backwards/BDB
135              
136              
137             ONE of the following must be specified
138             -taxa => $arrayref of taxa names
139             -num_taxa => integer indicating number of taxa in the tree
140              
141             =cut
142              
143             sub new{
144 2     2 1 23 my ($class,@args) = @_;
145 2         17 my $self = $class->SUPER::new(@args);
146            
147 2         5 $self->{'_treecounter'} = 0;
148 2         5 $self->{'_maxcount'} = 0;
149 2         15 my ($nodetype,$randtype,
150             $maxcount, $samps,$samplesize,
151             $taxa, $num_taxa) = $self->_rearrange([qw(NODETYPE
152             RANDTYPE
153             MAXCOUNT
154             SAMPLES
155             SAMPLE_SIZE
156             TAXA
157             NUM_TAXA)],
158             @args);
159 2         4 my @taxa;
160 2   33     9 $nodetype ||= $DefaultNodeType;
161 2         9 $self->nodetype($nodetype);
162 2 50 33     8 $taxa = $samps if defined $samps && ! defined $taxa;
163 2 100 66     12 $num_taxa = $samplesize if $samplesize && ! $num_taxa;
164 2 100       8 if( ! defined $taxa ) {
165 1 50 33     6 if( ! defined $num_taxa || $num_taxa <= 0 ) {
166 0         0 $self->throw("Must specify a valid num_taxa if parameter -TAXA is not specified");
167             }
168 1         4 foreach ( 1..$num_taxa ) { push @taxa, "Taxon$_"; }
  5         11  
169             } else {
170 1 50       4 if( ref($taxa) !~ /ARRAY/i ) {
171 0         0 $self->throw("Must specify a valid ARRAY reference to the parameter -TAXA, did you forget a leading '\\'? for $taxa");
172             }
173 1         3 @taxa = @$taxa;
174             }
175            
176 2         8 $self->taxa(\@taxa);
177 2 50       5 defined $maxcount && $self->maxcount($maxcount);
178 2         4 $self->{'_count'} = 0;
179 2         7 return $self;
180             }
181              
182             =head2 next_tree
183              
184             Title : next_tree
185             Usage : my $tree = $factory->next_tree
186             Function: Returns a random tree based on the initialized number of nodes
187             NOTE: if maxcount is not specified on initialization or
188             set to a valid integer, subsequent calls to next_tree will
189             continue to return random trees and never return undef
190              
191             Returns : Bio::Tree::TreeI object
192             Args : none
193              
194             =cut
195              
196              
197             sub next_tree{
198 480     480 1 1448 my ($self,%options) = @_;
199             return if $self->maxcount &&
200 480 50 33     2139 $self->{'_count'}++ >= $self->maxcount;
201 480   33     3335 my $rand_type = $options{'randtype'} || $self->random_tree_method;
202 480         1833 my $nodetype = $self->nodetype;
203 480         1320 my $treearray;
204              
205 480 50 33     11774 if( $rand_type =~ /(birthdeath_forward|birth|BDF)/i ) {
    50          
    50          
206              
207             } elsif ( $rand_type =~ /(birthdeath_backward|BDB)/i ) {
208 0         0 $treearray = $self->rand_birthdeath_backwards_tree;
209             } elsif( $rand_type =~ /(BY|backwards_yule)/i ||
210             $rand_type =~ /^yule/i ) {
211 480         1417 my $speciation = $options{'speciation'}; # can be undef
212 480         1951 $treearray = $self->rand_yule_c_tree($speciation);
213             } else {
214 0         0 $self->warn("unrecognized random type $rand_type");
215             }
216            
217 480         1162 my @nodes = ();
218 480         1414 foreach my $n ( @$treearray ) {
219 2404         4295 for my $k ( qw(desc1 desc2) ) {
220 4808 100       10150 next unless defined $n->{$k};
221 1924         2191 push @{$n->{'descendents'}}, $nodes[$n->{$k}];
  1924         4637  
222             }
223             push @nodes,
224             $nodetype->new(-id => $n->{'nodenum'},
225             -branch_length => $n->{'time'},
226 2404         14705 -descendents => $n->{'descendents'},
227             );
228             }
229 480         5253 my $T = Bio::Tree::Tree->new(-root => pop @nodes );
230 480         4518 return $T;
231             }
232              
233              
234             =head2 maxcount
235              
236             Title : maxcount
237             Usage : $obj->maxcount($newval)
238             Function:
239             Returns : Maxcount value
240             Args : newvalue (optional)
241              
242              
243             =cut
244              
245             sub maxcount{
246 480     480 1 1373 my ($self,$value) = @_;
247 480 50       1789 if( defined $value) {
248 0 0       0 if( $value =~ /^(\d+)/ ) {
249 0         0 $self->{'_maxcount'} = $1;
250             } else {
251 0         0 $self->warn("Must specify a valid Positive integer to maxcount");
252 0         0 $self->{'_maxcount'} = 0;
253             }
254             }
255 480         2656 return $self->{'_maxcount'};
256             }
257              
258              
259             =head2 reset_tree_count
260              
261             Title : reset_tree_count
262             Usage : $factory->reset_tree_count;
263             Function: Reset the tree counter
264             Returns : none
265             Args : none
266              
267              
268             =cut
269              
270             sub reset_count{
271 0     0 0 0 shift->{'_count'} = 0;
272             }
273              
274             =head2 taxa
275              
276             Title : taxa
277             Usage : $obj->taxa($newval)
278             Function: Set the leaf node names
279             Returns : value of taxa
280             Args : Arrayref of Taxon names
281              
282              
283             =cut
284              
285             sub taxa {
286 482     482 1 1327 my ($self,$value) = @_;
287 482 100       1601 if( defined $value) {
288 2 50       11 if( ref($value) !~ /ARRAY/i ) {
289 0         0 $self->warn("Must specify a valid array ref to the method 'taxa'");
290 0         0 $value = [];
291             }
292 2         3 $self->{'_taxa'} = $value;
293 2         5 $self->{'_num_taxa'} = scalar @$value;
294             }
295 482         1850 return $self->{'_taxa'};
296              
297             }
298              
299             =head2 num_taxa
300              
301             Title : num_taxa
302             Usage : $obj->num_taxa($newval)
303             Function: Get the number of Taxa
304             Returns : value of num_taxa
305             Args : none
306              
307              
308             =cut
309              
310             sub num_taxa {
311 480     480 1 992 my ($self) = @_;
312 480         1870 return $self->{'_num_taxa'};
313             }
314              
315             # alias old methods
316             *num_samples = \&num_taxa;
317             *samples = \&taxa;
318              
319             =head2 random
320              
321             Title : random
322             Usage : my $rfloat = $node->random($size)
323             Function: Generates a random number between 0 and $size
324             This is abstracted so that someone can override and provide their
325             own special RNG. This is expected to be a uniform RNG.
326             Returns : Floating point random
327             Args : $maximum size for random number (defaults to 1)
328              
329              
330             =cut
331              
332             sub random{
333 962     962 1 1810 my ($self,$max) = @_;
334 962         2170 return rand($max);
335             }
336              
337              
338             =head2 random_tree_method
339              
340             Title : random_tree_method
341             Usage : $obj->random_tree_method($newval)
342             Function:
343             Example :
344             Returns : value of random_tree_method (a scalar)
345             Args : on set, new value (a scalar or undef, optional)
346              
347              
348             =cut
349              
350             sub random_tree_method{
351 480     480 1 1189 my $self = shift;
352              
353 480 50       1462 return $self->{'random_tree_method'} = shift if @_;
354 480   33     4050 return $self->{'random_tree_method'} || $Defaults{'DefaultTreeMethod'};
355             }
356              
357             =head2 nodetype
358              
359             Title : nodetype
360             Usage : $obj->nodetype($newval)
361             Function:
362             Example :
363             Returns : value of nodetype (a scalar)
364             Args : on set, new value (a scalar or undef, optional)
365              
366              
367             =cut
368              
369             sub nodetype{
370 962     962 1 2207 my ($self,$value) = @_;
371 962 100       2139 if( defined $value) {
372 2         82 eval "require $value";
373 2 50       13 if( $@ ) { $self->throw("$@: Unrecognized Node type for ".ref($self).
  0         0  
374             "'$value'");}
375            
376 2         6 my $a = bless {},$value;
377 2 50       21 unless( $a->isa('Bio::Tree::NodeI') ) {
378 0         0 $self->throw("Must provide a valid Bio::Tree::NodeI or child class to SeqFactory Not $value");
379             }
380 2         11 $self->{'nodetype'} = $value;
381             }
382 962         2296 return $self->{'nodetype'};
383             }
384              
385              
386             # The assignment of times are based on Mike Sanderson's r8s code
387             # The topology assignment code is based on Richard Hudson's
388             # make_trees
389              
390              
391             sub rand_yule_c_tree {
392 480     480 0 1386 my ($self,$speciation) = @_;
393 480   33     3046 $speciation ||= $Defaults{'Speciation'};
394 480         2182 my $n_taxa = $self->num_taxa;
395 480   50     1742 my $taxa = $self->taxa || [];
396 480         1188 my $nodetype = $self->nodetype;
397            
398 480         4538 my $randfuncs = Bio::Tools::RandomDistFunctions->new();
399 480         1590 my $rate = $Defaults{'YuleRate'};
400 480         1013 my (@tree,@list,@times,$i,$in);
401 480         1645 my $max = 2 * $n_taxa - 1;
402 480         1638 for($in=0;$in < $max; $in++ ) {
403 2404         8194 push @tree, { 'nodenum' => "Node$in" };
404             }
405             # setup leaf nodes
406 480         1925 for($in=0;$in < $n_taxa;$in++) {
407 1442         2591 $tree[$in]->{'time'} = 0;
408 1442         2151 $tree[$in]->{'desc1'} = undef;
409 1442         2134 $tree[$in]->{'desc2'} = undef;
410 1442 50       3215 if( my $r = $taxa->[$in] ) {
411 1442         1969 $tree[$in]->{'nodenum'} = $r;
412             }
413 1442         3258 push @list, $in;
414             }
415            
416 480         1763 for( $i = 0; $i < $n_taxa - 1; $i++ ) {
417             # draw random interval times
418 962         3384 push @times, $randfuncs->rand_birth_distribution($speciation);
419             }
420             # sort smallest to largest
421 480         2323 @times = sort {$a <=> $b} @times;
  483         1682  
422             # topology generation
423 480         1915 for ($in = $n_taxa; $in > 1; $in-- ) {
424 962         1498 my $time = shift @times;
425            
426 962         2382 my $pick = int $self->random($in);
427 962         1463 my $nodeindex = $list[$pick];
428 962         1626 $tree[$list[$pick]]->{'time'} = $time;
429 962         1708 my $swap = 2 * $n_taxa - $in;
430 962         1664 $tree[$swap]->{'desc1'} = $nodeindex;
431 962         1636 $list[$pick] = $list[$in-1];
432            
433 962         1765 $pick = int rand($in - 1);
434 962         1442 $nodeindex = $list[$pick];
435 962         1933 $tree[$list[$pick]]->{'time'} = $time;
436 962         1499 $tree[$swap]->{'desc2'} = $nodeindex;
437 962         2256 $list[$pick] = $swap;
438             }
439 480         1218 $tree[-1]->{'time'} = shift @times;
440 480         2392 return \@tree;
441             }
442              
443              
444              
445             sub rand_birthdeath_backwards_tree {
446 0     0 0   my ($self) = @_;
447 0           my $n_taxa = $self->num_taxa;
448 0   0       my $taxa = $self->taxa || [];
449            
450 0           my $randfuncs = Bio::Tools::RandomDistFunctions->new();
451 0           my $rate = $Defaults{'YuleRate'};
452 0           my (@tree,@list,@times,$i,$in);
453 0           my $max = 2 * $n_taxa - 1;
454 0           for($in=0;$in < $max; $in++ ) {
455 0           push @tree, { 'nodenum' => "Node$in" };
456             }
457             # setup leaf nodes
458 0           for($in=0;$in < $n_taxa;$in++) {
459 0           $tree[$in]->{'time'} = 0;
460 0           $tree[$in]->{'desc1'} = undef;
461 0           $tree[$in]->{'desc2'} = undef;
462 0 0         if( my $r = $taxa->[$in] ) {
463             # deal with pre-labeled nodes
464 0           $tree[$in]->{'nodenum'} = $r;
465             }
466 0           push @list, $in;
467             }
468 0           my ($time) = (0);
469              
470             # topology generation
471 0           for ($in = $n_taxa; $in > 1; $in-- ) {
472 0           my $pick = int $self->random($in);
473 0           my $nodeindex = $list[$pick];
474 0           my $swap = 2 * $n_taxa - $in;
475 0           $time += $randfuncs->rand_geometric_distribution($n_taxa * $rate);;
476 0           $tree[$list[$pick]]->{'time'} = $time;
477 0           $tree[$swap]->{'desc1'} = $nodeindex;
478 0           $list[$pick] = $list[$in-1];
479            
480 0           $pick = int rand($in - 1);
481 0           $nodeindex = $list[$pick];
482 0           $tree[$list[$pick]]->{'time'} = $time;
483 0           $tree[$swap]->{'desc2'} = $nodeindex;
484 0           $list[$pick] = $swap;
485             }
486 0           my $root = $tree[-1];
487 0           $time += $randfuncs->rand_geometric_distribution($n_taxa * $rate);;
488 0           $root->{'time'} = $time;
489              
490             # Normalize times by the root node...
491 0           for my $node ( @tree ) {
492 0           $node->{'time'} /= $root->{'time'};
493             }
494 0           return \@tree;
495             }
496              
497              
498             # The assignment of times are based on Mike Sanderson's r8s code
499             # The topology assignment code is based on Richard Hudson's
500             # make_trees
501              
502       0 0   sub rand_birth_death_tree {
503             # Still need to finish
504             # my ($self,$spec_rate,$extinct_rate,$char_rate) = @_;
505             # my $n_taxa = $self->num_taxa;
506             # my $dt = 0.1 / $n_taxa;
507             # my @tree;
508             # my $max = 3 * $n_taxa - 1;
509             # # setup leaf nodes
510            
511             # for($in=0;$in < $size;$in++) {
512             # push @tree, { 'nodenum' => $taxa->[$in] || "Node$in",
513             # 'time' => 0,
514             # 'desc1' => undef,
515             # 'desc2' => undef,
516             # };
517             # }
518             # my $time = $dt;
519             # my $idx = 0;
520             # while( $n_taxa > 1 ) {
521             # if ( event($dt * $spec_rate, $n_taxa) ) {
522             # my $pick = int $self->random($n_taxa);
523             # my $pick2 = int $self->random($n_taxa);
524             # while( $pick2 == $pick ) {
525             # $pick2 = int $self->random($n_taxa);
526             # }
527             # to finish....
528            
529             # $tree[$swap]->{'desc1'} = $nodeindex;
530             # }
531             # }
532              
533            
534              
535             # $list[$pick] = $list[$in-1];
536            
537             # $pick = int rand($in - 1);
538             # $nodeindex = $list[$pick];
539             # $tree[$swap]->{'desc2'} = $nodeindex;
540             # $list[$pick] = $swap;
541             # $tree[$swap]->{'time'} = $times[$ix++];
542             # }
543             }
544              
545              
546             1;