File Coverage

Bio/Tree/RandomFactory.pm
Criterion Covered Total %
statement 113 162 69.7
branch 24 42 57.1
condition 11 31 35.4
subroutine 14 17 82.3
pod 8 12 66.6
total 170 264 64.3


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Tree::RandomFactory
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Jason Stajich
7             #
8             # Copyright Jason Stajich
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tree::RandomFactory - TreeFactory for generating Random Trees
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tree::RandomFactory
21             my @taxonnames;
22             my $factory = Bio::Tree::RandomFactory->new( -taxa => \@taxonnames,
23             -maxcount => 10);
24              
25             # or for anonymous samples
26              
27             my $factory = Bio::Tree::RandomFactory->new( -num_taxa => 6,
28             -maxcount => 50);
29              
30              
31             my $tree = $factory->next_tree;
32              
33             =head1 DESCRIPTION
34              
35             Builds a random tree every time next_tree is called or up to -maxcount times.
36              
37             This module was originally written for Coalescent simulations see
38             L. I've left the next_tree
39             method intact although it is not generating random trees in the
40             phylogenetic sense. I would be happy for someone to provide
41             alternative implementations which can be used here. As written it
42             will generate random topologies but the branch lengths are built from
43             assumptions in the coalescent and are not appropriate for phylogenetic
44             analyses.
45              
46             This algorithm is based on the make_tree algorithm from Richard Hudson 1990.
47              
48             Hudson, R. R. 1990. Gene genealogies and the coalescent
49             process. Pp. 1-44 in D. Futuyma and J. Antonovics, eds. Oxford
50             surveys in evolutionary biology. Vol. 7. Oxford University
51             Press, New York
52              
53             Sanderson, M ...
54              
55             =head1 FEEDBACK
56              
57             =head2 Mailing Lists
58              
59             User feedback is an integral part of the evolution of this and other
60             Bioperl modules. Send your comments and suggestions preferably to
61             the Bioperl mailing list. Your participation is much appreciated.
62              
63             bioperl-l@bioperl.org - General discussion
64             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
65              
66             =head2 Support
67              
68             Please direct usage questions or support issues to the mailing list:
69              
70             I
71              
72             rather than to the module maintainer directly. Many experienced and
73             reponsive experts will be able look at the problem and quickly
74             address it. Please include a thorough description of the problem
75             with code and data examples if at all possible.
76              
77             =head2 Reporting Bugs
78              
79             Report bugs to the Bioperl bug tracking system to help us keep track
80             of the bugs and their resolution. Bug reports can be submitted via
81             the web:
82              
83             https://github.com/bioperl/bioperl-live/issues
84              
85             =head1 AUTHOR - Jason Stajich
86              
87             Email jason-AT-bioperl.org
88              
89             =head1 CONTRIBUTORS
90              
91             Matthew Hahn, Ematthew.hahn@duke.eduE
92             Mike Sanderson
93              
94             =head1 APPENDIX
95              
96             The rest of the documentation details each of the object methods.
97             Internal methods are usually preceded with a _
98              
99             =cut
100              
101              
102             # Let the code begin...
103              
104              
105             package Bio::Tree::RandomFactory;
106 2     2   629 use vars qw($PRECISION_DIGITS $DefaultNodeType %Defaults);
  2         3  
  2         146  
107 2     2   10 use strict;
  2         3  
  2         97  
108              
109             $PRECISION_DIGITS = 3; # Precision for the branchlength
110             $DefaultNodeType = 'Bio::Tree::Node';
111             %Defaults = ('YuleRate' => 1.0, # as set by Sanderson in Rates
112             'Speciation' => 1.0, #
113             'DefaultTreeMethod' => 'yule',
114             );
115              
116 2     2   696 use Bio::Tools::RandomDistFunctions;
  2         5  
  2         69  
117 2     2   445 use Bio::Tree::Tree;
  2         4  
  2         67  
118              
119 2     2   16 use base qw(Bio::Root::Root Bio::Factory::TreeFactoryI);
  2         4  
  2         1836  
120              
121             =head2 new
122              
123             Title : new
124             Usage : my $factory = Bio::Tree::RandomFactory->new(-samples => \@samples,
125             -maxcount=> $N);
126             Function: Initializes a Bio::Tree::RandomFactory object
127             Returns : Bio::Tree::RandomFactory
128             Args : -nodetype => Type of Nodes to create [default Bio::Tree::Node]
129             -maxcount => [optional] Maximum num trees to create
130             -randtype => Type of random trees so far support
131             - yule/backward_yule/BY [default]
132             - forward_yule/FY
133             - birthdeath_forward/BDF
134             - birthdeath_backwards/BDB
135              
136              
137             ONE of the following must be specified
138             -taxa => $arrayref of taxa names
139             -num_taxa => integer indicating number of taxa in the tree
140              
141             =cut
142              
143             sub new{
144 2     2 1 19 my ($class,@args) = @_;
145 2         17 my $self = $class->SUPER::new(@args);
146            
147 2         4 $self->{'_treecounter'} = 0;
148 2         3 $self->{'_maxcount'} = 0;
149 2         16 my ($nodetype,$randtype,
150             $maxcount, $samps,$samplesize,
151             $taxa, $num_taxa) = $self->_rearrange([qw(NODETYPE
152             RANDTYPE
153             MAXCOUNT
154             SAMPLES
155             SAMPLE_SIZE
156             TAXA
157             NUM_TAXA)],
158             @args);
159 2         6 my @taxa;
160 2   33     13 $nodetype ||= $DefaultNodeType;
161 2         7 $self->nodetype($nodetype);
162 2 50 33     11 $taxa = $samps if defined $samps && ! defined $taxa;
163 2 100 66     9 $num_taxa = $samplesize if $samplesize && ! $num_taxa;
164 2 100       8 if( ! defined $taxa ) {
165 1 50 33     5 if( ! defined $num_taxa || $num_taxa <= 0 ) {
166 0         0 $self->throw("Must specify a valid num_taxa if parameter -TAXA is not specified");
167             }
168 1         3 foreach ( 1..$num_taxa ) { push @taxa, "Taxon$_"; }
  5         8  
169             } else {
170 1 50       7 if( ref($taxa) !~ /ARRAY/i ) {
171 0         0 $self->throw("Must specify a valid ARRAY reference to the parameter -TAXA, did you forget a leading '\\'? for $taxa");
172             }
173 1         2 @taxa = @$taxa;
174             }
175            
176 2         10 $self->taxa(\@taxa);
177 2 50       4 defined $maxcount && $self->maxcount($maxcount);
178 2         5 $self->{'_count'} = 0;
179 2         5 return $self;
180             }
181              
182             =head2 next_tree
183              
184             Title : next_tree
185             Usage : my $tree = $factory->next_tree
186             Function: Returns a random tree based on the initialized number of nodes
187             NOTE: if maxcount is not specified on initialization or
188             set to a valid integer, subsequent calls to next_tree will
189             continue to return random trees and never return undef
190              
191             Returns : Bio::Tree::TreeI object
192             Args : none
193              
194             =cut
195              
196              
197             sub next_tree{
198 1029     1029 1 1965 my ($self,%options) = @_;
199             return if $self->maxcount &&
200 1029 50 33     3307 $self->{'_count'}++ >= $self->maxcount;
201 1029   33     5228 my $rand_type = $options{'randtype'} || $self->random_tree_method;
202 1029         3500 my $nodetype = $self->nodetype;
203 1029         1299 my $treearray;
204              
205 1029 50 33     19266 if( $rand_type =~ /(birthdeath_forward|birth|BDF)/i ) {
    50          
    50          
206              
207             } elsif ( $rand_type =~ /(birthdeath_backward|BDB)/i ) {
208 0         0 $treearray = $self->rand_birthdeath_backwards_tree;
209             } elsif( $rand_type =~ /(BY|backwards_yule)/i ||
210             $rand_type =~ /^yule/i ) {
211 1029         1836 my $speciation = $options{'speciation'}; # can be undef
212 1029         2624 $treearray = $self->rand_yule_c_tree($speciation);
213             } else {
214 0         0 $self->warn("unrecognized random type $rand_type");
215             }
216            
217 1029         1591 my @nodes = ();
218 1029         1886 foreach my $n ( @$treearray ) {
219 5149         5779 for my $k ( qw(desc1 desc2) ) {
220 10298 100       17025 next unless defined $n->{$k};
221 4120         3177 push @{$n->{'descendents'}}, $nodes[$n->{$k}];
  4120         8445  
222             }
223             push @nodes,
224             $nodetype->new(-id => $n->{'nodenum'},
225             -branch_length => $n->{'time'},
226 5149         28403 -descendents => $n->{'descendents'},
227             );
228             }
229 1029         8763 my $T = Bio::Tree::Tree->new(-root => pop @nodes );
230 1029         6681 return $T;
231             }
232              
233              
234             =head2 maxcount
235              
236             Title : maxcount
237             Usage : $obj->maxcount($newval)
238             Function:
239             Returns : Maxcount value
240             Args : newvalue (optional)
241              
242              
243             =cut
244              
245             sub maxcount{
246 1029     1029 1 1523 my ($self,$value) = @_;
247 1029 50       2258 if( defined $value) {
248 0 0       0 if( $value =~ /^(\d+)/ ) {
249 0         0 $self->{'_maxcount'} = $1;
250             } else {
251 0         0 $self->warn("Must specify a valid Positive integer to maxcount");
252 0         0 $self->{'_maxcount'} = 0;
253             }
254             }
255 1029         4300 return $self->{'_maxcount'};
256             }
257              
258              
259             =head2 reset_tree_count
260              
261             Title : reset_tree_count
262             Usage : $factory->reset_tree_count;
263             Function: Reset the tree counter
264             Returns : none
265             Args : none
266              
267              
268             =cut
269              
270             sub reset_count{
271 0     0 0 0 shift->{'_count'} = 0;
272             }
273              
274             =head2 taxa
275              
276             Title : taxa
277             Usage : $obj->taxa($newval)
278             Function: Set the leaf node names
279             Returns : value of taxa
280             Args : Arrayref of Taxon names
281              
282              
283             =cut
284              
285             sub taxa {
286 1031     1031 1 1560 my ($self,$value) = @_;
287 1031 100       2645 if( defined $value) {
288 2 50       10 if( ref($value) !~ /ARRAY/i ) {
289 0         0 $self->warn("Must specify a valid array ref to the method 'taxa'");
290 0         0 $value = [];
291             }
292 2         3 $self->{'_taxa'} = $value;
293 2         4 $self->{'_num_taxa'} = scalar @$value;
294             }
295 1031         3346 return $self->{'_taxa'};
296              
297             }
298              
299             =head2 num_taxa
300              
301             Title : num_taxa
302             Usage : $obj->num_taxa($newval)
303             Function: Get the number of Taxa
304             Returns : value of num_taxa
305             Args : none
306              
307              
308             =cut
309              
310             sub num_taxa {
311 1029     1029 1 1354 my ($self) = @_;
312 1029         2146 return $self->{'_num_taxa'};
313             }
314              
315             # alias old methods
316             *num_samples = \&num_taxa;
317             *samples = \&taxa;
318              
319             =head2 random
320              
321             Title : random
322             Usage : my $rfloat = $node->random($size)
323             Function: Generates a random number between 0 and $size
324             This is abstracted so that someone can override and provide their
325             own special RNG. This is expected to be a uniform RNG.
326             Returns : Floating point random
327             Args : $maximum size for random number (defaults to 1)
328              
329              
330             =cut
331              
332             sub random{
333 2060     2060 1 2469 my ($self,$max) = @_;
334 2060         3624 return rand($max);
335             }
336              
337              
338             =head2 random_tree_method
339              
340             Title : random_tree_method
341             Usage : $obj->random_tree_method($newval)
342             Function:
343             Example :
344             Returns : value of random_tree_method (a scalar)
345             Args : on set, new value (a scalar or undef, optional)
346              
347              
348             =cut
349              
350             sub random_tree_method{
351 1029     1029 1 1505 my $self = shift;
352              
353 1029 50       2565 return $self->{'random_tree_method'} = shift if @_;
354 1029   33     6769 return $self->{'random_tree_method'} || $Defaults{'DefaultTreeMethod'};
355             }
356              
357             =head2 nodetype
358              
359             Title : nodetype
360             Usage : $obj->nodetype($newval)
361             Function:
362             Example :
363             Returns : value of nodetype (a scalar)
364             Args : on set, new value (a scalar or undef, optional)
365              
366              
367             =cut
368              
369             sub nodetype{
370 2060     2060 1 2116 my ($self,$value) = @_;
371 2060 100       3877 if( defined $value) {
372 2         80 eval "require $value";
373 2 50       7 if( $@ ) { $self->throw("$@: Unrecognized Node type for ".ref($self).
  0         0  
374             "'$value'");}
375            
376 2         6 my $a = bless {},$value;
377 2 50       21 unless( $a->isa('Bio::Tree::NodeI') ) {
378 0         0 $self->throw("Must provide a valid Bio::Tree::NodeI or child class to SeqFactory Not $value");
379             }
380 2         13 $self->{'nodetype'} = $value;
381             }
382 2060         3935 return $self->{'nodetype'};
383             }
384              
385              
386             # The assignment of times are based on Mike Sanderson's r8s code
387             # The topology assignment code is based on Richard Hudson's
388             # make_trees
389              
390              
391             sub rand_yule_c_tree {
392 1029     1029 0 1566 my ($self,$speciation) = @_;
393 1029   33     4390 $speciation ||= $Defaults{'Speciation'};
394 1029         2488 my $n_taxa = $self->num_taxa;
395 1029   50     3519 my $taxa = $self->taxa || [];
396 1029         1781 my $nodetype = $self->nodetype;
397            
398 1029         8001 my $randfuncs = Bio::Tools::RandomDistFunctions->new();
399 1029         2182 my $rate = $Defaults{'YuleRate'};
400 1029         1254 my (@tree,@list,@times,$i,$in);
401 1029         2854 my $max = 2 * $n_taxa - 1;
402 1029         2910 for($in=0;$in < $max; $in++ ) {
403 5149         16382 push @tree, { 'nodenum' => "Node$in" };
404             }
405             # setup leaf nodes
406 1029         3174 for($in=0;$in < $n_taxa;$in++) {
407 3089         3491 $tree[$in]->{'time'} = 0;
408 3089         3322 $tree[$in]->{'desc1'} = undef;
409 3089         2693 $tree[$in]->{'desc2'} = undef;
410 3089 50       5731 if( my $r = $taxa->[$in] ) {
411 3089         3040 $tree[$in]->{'nodenum'} = $r;
412             }
413 3089         5771 push @list, $in;
414             }
415            
416 1029         2881 for( $i = 0; $i < $n_taxa - 1; $i++ ) {
417             # draw random interval times
418 2060         5581 push @times, $randfuncs->rand_birth_distribution($speciation);
419             }
420             # sort smallest to largest
421 1029         2887 @times = sort {$a <=> $b} @times;
  1032         2534  
422             # topology generation
423 1029         3203 for ($in = $n_taxa; $in > 1; $in-- ) {
424 2060         2132 my $time = shift @times;
425            
426 2060         4197 my $pick = int $self->random($in);
427 2060         2560 my $nodeindex = $list[$pick];
428 2060         2959 $tree[$list[$pick]]->{'time'} = $time;
429 2060         2329 my $swap = 2 * $n_taxa - $in;
430 2060         2337 $tree[$swap]->{'desc1'} = $nodeindex;
431 2060         2500 $list[$pick] = $list[$in-1];
432            
433 2060         2000 $pick = int rand($in - 1);
434 2060         1818 $nodeindex = $list[$pick];
435 2060         2147 $tree[$list[$pick]]->{'time'} = $time;
436 2060         2134 $tree[$swap]->{'desc2'} = $nodeindex;
437 2060         4377 $list[$pick] = $swap;
438             }
439 1029         1729 $tree[-1]->{'time'} = shift @times;
440 1029         3437 return \@tree;
441             }
442              
443              
444              
445             sub rand_birthdeath_backwards_tree {
446 0     0 0   my ($self) = @_;
447 0           my $n_taxa = $self->num_taxa;
448 0   0       my $taxa = $self->taxa || [];
449            
450 0           my $randfuncs = Bio::Tools::RandomDistFunctions->new();
451 0           my $rate = $Defaults{'YuleRate'};
452 0           my (@tree,@list,@times,$i,$in);
453 0           my $max = 2 * $n_taxa - 1;
454 0           for($in=0;$in < $max; $in++ ) {
455 0           push @tree, { 'nodenum' => "Node$in" };
456             }
457             # setup leaf nodes
458 0           for($in=0;$in < $n_taxa;$in++) {
459 0           $tree[$in]->{'time'} = 0;
460 0           $tree[$in]->{'desc1'} = undef;
461 0           $tree[$in]->{'desc2'} = undef;
462 0 0         if( my $r = $taxa->[$in] ) {
463             # deal with pre-labeled nodes
464 0           $tree[$in]->{'nodenum'} = $r;
465             }
466 0           push @list, $in;
467             }
468 0           my ($time) = (0);
469              
470             # topology generation
471 0           for ($in = $n_taxa; $in > 1; $in-- ) {
472 0           my $pick = int $self->random($in);
473 0           my $nodeindex = $list[$pick];
474 0           my $swap = 2 * $n_taxa - $in;
475 0           $time += $randfuncs->rand_geometric_distribution($n_taxa * $rate);;
476 0           $tree[$list[$pick]]->{'time'} = $time;
477 0           $tree[$swap]->{'desc1'} = $nodeindex;
478 0           $list[$pick] = $list[$in-1];
479            
480 0           $pick = int rand($in - 1);
481 0           $nodeindex = $list[$pick];
482 0           $tree[$list[$pick]]->{'time'} = $time;
483 0           $tree[$swap]->{'desc2'} = $nodeindex;
484 0           $list[$pick] = $swap;
485             }
486 0           my $root = $tree[-1];
487 0           $time += $randfuncs->rand_geometric_distribution($n_taxa * $rate);;
488 0           $root->{'time'} = $time;
489              
490             # Normalize times by the root node...
491 0           for my $node ( @tree ) {
492 0           $node->{'time'} /= $root->{'time'};
493             }
494 0           return \@tree;
495             }
496              
497              
498             # The assignment of times are based on Mike Sanderson's r8s code
499             # The topology assignment code is based on Richard Hudson's
500             # make_trees
501              
502       0 0   sub rand_birth_death_tree {
503             # Still need to finish
504             # my ($self,$spec_rate,$extinct_rate,$char_rate) = @_;
505             # my $n_taxa = $self->num_taxa;
506             # my $dt = 0.1 / $n_taxa;
507             # my @tree;
508             # my $max = 3 * $n_taxa - 1;
509             # # setup leaf nodes
510            
511             # for($in=0;$in < $size;$in++) {
512             # push @tree, { 'nodenum' => $taxa->[$in] || "Node$in",
513             # 'time' => 0,
514             # 'desc1' => undef,
515             # 'desc2' => undef,
516             # };
517             # }
518             # my $time = $dt;
519             # my $idx = 0;
520             # while( $n_taxa > 1 ) {
521             # if ( event($dt * $spec_rate, $n_taxa) ) {
522             # my $pick = int $self->random($n_taxa);
523             # my $pick2 = int $self->random($n_taxa);
524             # while( $pick2 == $pick ) {
525             # $pick2 = int $self->random($n_taxa);
526             # }
527             # to finish....
528            
529             # $tree[$swap]->{'desc1'} = $nodeindex;
530             # }
531             # }
532              
533            
534              
535             # $list[$pick] = $list[$in-1];
536            
537             # $pick = int rand($in - 1);
538             # $nodeindex = $list[$pick];
539             # $tree[$swap]->{'desc2'} = $nodeindex;
540             # $list[$pick] = $swap;
541             # $tree[$swap]->{'time'} = $times[$ix++];
542             # }
543             }
544              
545              
546             1;