File Coverage

blib/lib/Bio/Phylo/Generator.pm
Criterion Covered Total %
statement 159 159 100.0
branch 14 14 100.0
condition 13 27 48.1
subroutine 24 24 100.0
pod 8 8 100.0
total 218 232 93.9


line stmt bran cond sub pod time code
1             package Bio::Phylo::Generator;
2 1     1   487 use strict;
  1         2  
  1         26  
3 1     1   5 use Bio::Phylo::Util::CONSTANT 'looks_like_hash';
  1         2  
  1         44  
4 1     1   6 use Bio::Phylo::Util::Exceptions 'throw';
  1         2  
  1         33  
5 1     1   4 use Bio::Phylo::Util::Logger;
  1         2  
  1         28  
6 1     1   269 use Bio::Phylo::Util::Dependency 'Math::Random';
  1         2  
  1         4  
7 1     1   281 use Bio::Phylo::Factory;
  1         2  
  1         5  
8             Math::Random->import(qw'random_exponential random_uniform');
9             {
10             my $logger = Bio::Phylo::Util::Logger->new;
11             my $factory = Bio::Phylo::Factory->new;
12              
13             =head1 NAME
14              
15             Bio::Phylo::Generator - Generator of tree topologies
16              
17             =head1 SYNOPSIS
18              
19             use Bio::Phylo::Factory;
20             my $fac = Bio::Phylo::Factory->new;
21             my $gen = $fac->create_generator;
22             my $trees = $gen->gen_rand_pure_birth(
23             '-tips' => 10,
24             '-model' => 'yule',
25             '-trees' => 10,
26             );
27              
28             # prints 'Bio::Phylo::Forest'
29             print ref $trees;
30              
31             =head1 DESCRIPTION
32              
33             The generator module is used to simulate trees under various models.
34              
35             =head1 METHODS
36              
37             =head2 CONSTRUCTOR
38              
39             =over
40              
41             =item new()
42              
43             Generator constructor.
44              
45             Type : Constructor
46             Title : new
47             Usage : my $gen = Bio::Phylo::Generator->new;
48             Function: Initializes a Bio::Phylo::Generator object.
49             Returns : A Bio::Phylo::Generator object.
50             Args : NONE
51              
52             =cut
53              
54             sub new {
55              
56             # could be child class
57 1     1 1 6 my $class = shift;
58              
59             # notify user
60 1         5 $logger->info("constructor called for '$class'");
61              
62             # the object turns out to be stateless
63 1         2 my $self = bless \$class, $class;
64 1         5 return $self;
65             }
66              
67             =back
68              
69             =head2 GENERATOR
70              
71             =over
72              
73             =item gen_rand_pure_birth()
74              
75             This method generates a Bio::Phylo::Forest
76             object populated with Yule/Hey trees.
77              
78             Type : Generator
79             Title : gen_rand_pure_birth
80             Usage : my $trees = $gen->gen_rand_pure_birth(
81             '-tips' => 10,
82             '-model' => 'yule',
83             '-trees' => 10,
84             );
85             Function: Generates markov tree shapes,
86             with branch lengths sampled
87             from a user defined model of
88             clade growth, for a user defined
89             number of tips.
90             Returns : A Bio::Phylo::Forest object.
91             Args : -tips => number of terminal nodes (default: 10),
92             -model => either 'yule' or 'hey',
93             -trees => number of trees to generate (default: 10)
94             Optional: -factory => a Bio::Phylo::Factory object
95              
96             =cut
97              
98             sub _yule_rand_bl {
99 41     41   65 my $i = shift;
100 41         112 return random_exponential( 1, 1 / ( $i + 1 ) );
101             }
102              
103             sub _hey_rand_bl {
104 19     19   21 my $i = shift;
105 19         57 random_exponential( 1, ( 1 / ( $i * ( $i + 1 ) ) ) );
106             }
107              
108             sub _make_split {
109 98     98   226 my ( $parent, $length, $fac, $nodes ) = @_;
110 98         115 my @tips;
111 98         168 for ( 1 .. 2 ) {
112 196         748 my $node = $fac->create_node;
113 196         484 $node->set_branch_length($length);
114 196         496 $node->set_parent($parent);
115 196         351 $nodes->{ $node->get_id } = $node;
116 196         435 push @tips, $node;
117             }
118 98         185 return @tips;
119             }
120              
121             sub gen_rand_pure_birth {
122 3     3 1 17 my $random = shift;
123 3         13 my %options = looks_like_hash @_;
124 3         7 my $model = $options{'-model'};
125 3 100       19 if ( $model =~ m/yule/i ) {
    100          
126 1         3 return $random->_gen_pure_birth(
127             '-blgen' => \&_yule_rand_bl,
128             @_,
129             );
130             }
131             elsif ( $model =~ m/hey/i ) {
132 1         7 return $random->_gen_pure_birth(
133             '-blgen' => \&_hey_rand_bl,
134             @_,
135             );
136             }
137             else {
138 1         6 throw 'BadFormat' => "model '$model' not implemented";
139             }
140             }
141              
142             sub _gen_pure_birth {
143 5     5   10 my $random = shift;
144 5         15 my %options = looks_like_hash @_;
145 5   33     30 my $factory = $options{'-factory'} || $factory;
146 5         10 my $blgen = $options{'-blgen'};
147 5   100     23 my $killrate = $options{'-killrate'} || 0;
148 5   50     12 my $ntips = $options{'-tips'} || 10;
149 5   50     34 my $ntrees = $options{'-trees'} || 10;
150 5         39 my $forest = $factory->create_forest;
151 5         20 for ( 0 .. ( $ntrees - 1 ) ) {
152              
153             # instantiate root node
154 5         24 my $root = $factory->create_node;
155 5         21 $root->set_branch_length(0);
156 5         12 my %nodes = ( $root->get_id => $root );
157              
158             # make the first split, insert new tips in @tips, from
159             # which we will draw (without replacement) a new tip
160             # to split until we've reached target number
161 5         21 push my @tips, _make_split( $root, $blgen->(1), $factory, \%nodes );
162              
163             # start growing the tree
164 5         10 my $i = 2;
165 5         6 my @extinct;
166 5         7 while (1) {
167 93 100       267 if ( rand(1) < $killrate ) {
168 3         8 my $extinct_index = int rand scalar @tips;
169 3         7 my $extinct = splice @tips, $extinct_index, 1;
170 3         4 push @extinct, $extinct;
171 3         8 delete $nodes{ $extinct->get_id };
172             }
173              
174             # obtain candidate parent of current split
175 93         126 my $parent;
176 93         165 ( $parent, @tips ) = _fetch_equiprobable(@tips);
177              
178             # generate branch length
179 93         173 my $bl = $blgen->( $i++ );
180              
181             # stretch all remaining tips to the present
182 93         645 for my $tip (@tips) {
183 869         1445 my $oldbl = $tip->get_branch_length;
184 869         1641 $tip->set_branch_length( $oldbl + $bl );
185             }
186              
187             # add new nodes to tips array
188 93         179 push @tips, _make_split( $parent, $bl, $factory, \%nodes );
189 93 100       200 last if scalar @tips >= $ntips;
190             }
191 5         27 my $tree = $factory->create_tree;
192             $tree->insert(
193 198         230 map { $_->[0] }
194 831         897 sort { $a->[1] <=> $b->[1] }
195 5         26 map { [ $_, $_->get_id ] } values %nodes
  198         297  
196             );
197 5         55 $tree->prune_tips( \@extinct );
198 5         31 $tree->_analyze;
199 5         27 $forest->insert($tree);
200             }
201 5         38 return $forest;
202             }
203              
204             =item gen_rand_birth_death()
205              
206             This method generates a Bio::Phylo::Forest
207             object populated under a birth/death model
208              
209             Type : Generator
210             Title : gen_rand_birth_death
211             Usage : my $trees = $gen->gen_rand_birth_death(
212             '-tips' => 10,
213             '-killrate' => 0.2,
214             '-trees' => 10,
215             );
216             Function: Generates trees where any growing lineage is equally
217             likely to split at any one time, and is equally likely
218             to go extinct at '-killrate'
219             Returns : A Bio::Phylo::Forest object.
220             Args : -tips => number of terminal nodes (default: 10),
221             -killrate => extinction over speciation rate (default: 0.2)
222             -trees => number of trees to generate (default: 10)
223             Optional: -factory => a Bio::Phylo::Factory object
224             Comments: Past extinction events are retained as unbranched internal
225             nodes in the produced trees.
226              
227             =cut
228              
229             sub gen_rand_birth_death {
230 1     1 1 10 my $random = shift;
231 1         5 my %options = looks_like_hash @_;
232             return $random->_gen_pure_birth(
233             '-blgen' => \&_yule_rand_bl,
234 1   50     10 '-killrate' => $options{'-killrate'} || 0.2,
235             @_,
236             );
237             }
238              
239             =item gen_exp_pure_birth()
240              
241             This method generates a Bio::Phylo::Forest object
242             populated with Yule/Hey trees whose branch lengths
243             are proportional to the expected waiting times (i.e.
244             not sampled from a distribution).
245              
246             Type : Generator
247             Title : gen_exp_pure_birth
248             Usage : my $trees = $gen->gen_exp_pure_birth(
249             '-tips' => 10,
250             '-model' => 'yule',
251             '-trees' => 10,
252             );
253             Function: Generates markov tree shapes,
254             with branch lengths following
255             the expectation under a user
256             defined model of clade growth,
257             for a user defined number of tips.
258             Returns : A Bio::Phylo::Forest object.
259             Args : -tips => number of terminal nodes (default: 10),
260             -model => either 'yule' or 'hey'
261             -trees => number of trees to generate (default: 10)
262             Optional: -factory => a Bio::Phylo::Factory object
263              
264             =cut
265              
266             sub _yule_exp_bl {
267 19     19   24 my $i = shift;
268 19         38 return 1 / ( $i + 1 );
269             }
270              
271             sub _hey_exp_bl {
272 19     19   27 my $i = shift;
273 19         41 return 1 / ( $i * ( $i + 1 ) );
274             }
275              
276             sub gen_exp_pure_birth {
277 3     3 1 24 my $random = shift;
278 3         14 my %options = looks_like_hash @_;
279 3         9 my $model = $options{'-model'};
280 3 100       22 if ( $model =~ m/yule/i ) {
    100          
281 1         5 return $random->_gen_pure_birth(
282             '-blgen' => \&_yule_exp_bl,
283             @_,
284             );
285             }
286             elsif ( $model =~ m/hey/i ) {
287 1         6 return $random->_gen_pure_birth(
288             '-blgen' => \&_hey_exp_bl,
289             @_,
290             );
291             }
292             else {
293 1         7 throw 'BadFormat' => "model '$model' not implemented";
294             }
295             }
296              
297             =item gen_coalescent()
298              
299             This method generates coalescent trees for a given effective population size
300             (popsize) and number of alleles (tips) such that the probability of coalescence
301             in the previous generation for any pair of alleles is 1 / ( 2 * popsize ).
302              
303             Type : Generator
304             Title : gen_coalescent
305             Usage : my $trees = $gen->gen_coalescent(
306             '-tips' => 10,
307             '-popsize' => 100,
308             '-trees' => 10,
309             );
310             Function: Generates coalescent trees.
311             Returns : A Bio::Phylo::Forest object.
312             Args : -tips => number of terminal nodes (default: 10)
313             -popsize => effective population size (default: 100)
314             -trees => number of trees to generate (default: 10)
315             Optional: -factory => a Bio::Phylo::Factory object
316              
317             =cut
318              
319             sub gen_coalescent {
320 1     1 1 8 my $self = shift;
321 1         6 my %args = looks_like_hash @_;
322 1   50     6 my $popsize = $args{'-popsize'} || 100;
323 1   50     3 my $ntips = $args{'-tips'} || 10;
324 1   50     4 my $ntrees = $args{'-trees'} || 10;
325 1   33     6 my $factory = $args{'-factory'} || $factory;
326 1         8 my $forest = $factory->create_forest;
327 1         3 my $cutoff = 1 / ( 2 * $popsize );
328 1         4 for my $i ( 1 .. $ntrees ) {
329 1         1 my $ngen = 1;
330 1         2 my ( @tips, @nodes );
331 1         6 push @tips, $factory->create_node() for 1 .. $ntips;
332              
333             # starting from a pool of all tips, we iterate over all
334             # possible pairs, and for each pair we test to see if
335             # the coalesce at generation $ngen, at probability
336             # 1/2N. When they do, we create a parent for the pair,
337             # take the pair out of the pool and put the parent in it
338 1         4 while ( scalar @tips > 1 ) {
339 527         579 my $poolsize = $#tips;
340 527         554 my $j = 0;
341 527         666 while ( $j < $poolsize ) {
342 1133         1193 my $k = $j + 1;
343 1133         1475 while ( $k <= $poolsize ) {
344 3530         4646 my $rand = random_uniform();
345 3530 100       11175 if ( $rand <= $cutoff ) {
346 19         32 my $tip2 = splice @tips, $k, 1;
347 19         31 my $tip1 = splice @tips, $j, 1;
348 19         100 my $parent = $factory->create_node(
349             '-generic' => { 'age' => $ngen } );
350 19         64 unshift @nodes,
351             $tip1->set_parent($parent),
352             $tip2->set_parent($parent);
353 19         30 push @tips, $parent;
354 19         29 $poolsize--;
355             }
356 3530         4766 $k++;
357             }
358 1133         1528 $j++;
359             }
360 527         753 $ngen++;
361             }
362 1         2 push @nodes, shift @tips;
363 1         7 my $tree = $factory->create_tree()->insert(@nodes);
364 1         13 $tree->agetobl;
365 1         7 $forest->insert($tree);
366             }
367 1         5 return $forest;
368             }
369              
370             =item gen_equiprobable()
371              
372             This method draws tree shapes at random,
373             such that all shapes are equally probable.
374              
375             Type : Generator
376             Title : gen_equiprobable
377             Usage : my $trees = $gen->gen_equiprobable( '-tips' => 10 );
378             Function: Generates an equiprobable tree
379             shape, with branch lengths = 1;
380             Returns : A Bio::Phylo::Forest object.
381             Args : Optional: -tips => number of terminal nodes (default: 10),
382             Optional: -trees => number of trees to generate (default: 1),
383             Optional: -factory => a Bio::Phylo::Factory object
384              
385             =cut
386              
387             sub _fetch_equiprobable {
388 131     131   240 my @tips = @_;
389 131         260 my $tip_index = int rand scalar @tips;
390 131         228 my $tip = splice @tips, $tip_index, 1;
391 131         341 return $tip, @tips;
392             }
393              
394             sub _fetch_balanced {
395 30     30   79 return @_;
396             }
397              
398             sub _fetch_ladder {
399 30     30   36 my $tip = pop;
400 30         64 return $tip, @_;
401             }
402              
403             sub _gen_simple {
404 3     3   6 my $random = shift;
405 3         11 my %options = looks_like_hash @_;
406 3         7 my $fetcher = $options{'-fetcher'};
407 3   33     16 my $factory = $options{'-factory'} || $factory;
408 3   50     9 my $ntrees = $options{'-trees'} || 1;
409 3   50     8 my $ntips = $options{'-tips'} || 10;
410 3         17 my $forest = $factory->create_forest;
411 3         10 for my $i ( 1 .. $ntrees ) {
412 3         12 my $tree = $factory->create_tree;
413 3         7 my ( @tips, @nodes );
414              
415             # each iteration, we will remove two "tips" from this
416             # and add their newly created parent to it
417             push @tips, $factory->create_node( '-branch_length' => 1, )
418 3         19 for ( 1 .. $ntips );
419              
420             # this stays above 0 because the root ends up in it
421 3         10 while ( @tips > 1 ) {
422 49         208 my $parent = $factory->create_node( '-branch_length' => 1, );
423 49         136 $tree->insert($parent);
424 49         85 for ( 1 .. 2 ) {
425 98         115 my $tip;
426 98         162 ( $tip, @tips ) = $fetcher->(@tips);
427 98         221 $tree->insert( $tip->set_parent($parent) );
428             }
429              
430             # the parent becomes a new candidate tip
431 49         101 push @tips, $parent;
432             }
433 3         15 $forest->insert($tree);
434             }
435 3         15 return $forest;
436             }
437              
438             sub gen_equiprobable {
439 1     1 1 11 return _gen_simple( @_, '-fetcher' => \&_fetch_equiprobable );
440             }
441              
442             =item gen_balanced()
443              
444             This method creates the most balanced topology possible given the number of tips
445              
446             Type : Generator
447             Title : gen_balanced
448             Usage : my $trees = $gen->gen_balanced( '-tips' => 10 );
449             Function: Generates the most balanced topology
450             possible, with branch lengths = 1;
451             Returns : A Bio::Phylo::Forest object.
452             Args : Optional: -tips => number of terminal nodes (default: 10),
453             Optional: -trees => number of trees to generate (default: 1),
454             Optional: -factory => a Bio::Phylo::Factory object
455              
456             =cut
457              
458             sub gen_balanced {
459 1     1 1 11 return _gen_simple( @_, '-fetcher' => \&_fetch_balanced );
460             }
461              
462             =item gen_ladder()
463              
464             This method creates a ladder tree for the number of tips
465              
466             Type : Generator
467             Title : gen_ladder
468             Usage : my $trees = $gen->gen_ladder( '-tips' => 10 );
469             Function: Generates the least balanced topology
470             (a ladder), with branch lengths = 1;
471             Returns : A Bio::Phylo::Forest object.
472             Args : Optional: -tips => number of terminal nodes (default: 10),
473             Optional: -trees => number of trees to generate (default: 1),
474             Optional: -factory => a Bio::Phylo::Factory object
475              
476             =cut
477              
478             sub gen_ladder {
479 1     1 1 8 return _gen_simple( @_, '-fetcher' => \&_fetch_ladder );
480             }
481              
482             =back
483              
484             =head1 SEE ALSO
485              
486             There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo>
487             for any user or developer questions and discussions.
488              
489             =over
490              
491             =item L<Bio::Phylo::Manual>
492              
493             Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>.
494              
495             =back
496              
497             =head1 CITATION
498              
499             If you use Bio::Phylo in published research, please cite it:
500              
501             B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
502             and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
503             I<BMC Bioinformatics> B<12>:63.
504             L<http://dx.doi.org/10.1186/1471-2105-12-63>
505              
506             =cut
507              
508             }
509             1;