File Coverage

blib/lib/Bio/Phylo/EvolutionaryModels.pm
Criterion Covered Total %
statement 399 822 48.5
branch 94 258 36.4
condition 37 143 25.8
subroutine 35 47 74.4
pod 13 21 61.9
total 578 1291 44.7


line stmt bran cond sub pod time code
1             package Bio::Phylo::EvolutionaryModels;
2 1     1   753 use strict;
  1         2  
  1         31  
3 1     1   8 use warnings;
  1         3  
  1         40  
4 1     1   6 use base 'Exporter';
  1         1  
  1         82  
5 1     1   331 use Bio::Phylo::Forest::Tree;
  1         3  
  1         8  
6 1     1   390 use Bio::Phylo::Forest;
  1         2  
  1         9  
7 1     1   7 use Math::CDF qw'qnorm qbeta';
  1         2  
  1         64  
8 1     1   6 use List::Util qw'min max';
  1         3  
  1         51  
9 1     1   5 use POSIX qw'ceil floor';
  1         2  
  1         6  
10 1     1   1347 use Config; #Use to check whether multi-threading is available
  1         2  
  1         45  
11              
12             BEGIN {
13              
14 1     1   5 use Bio::Phylo;
  1         1  
  1         4  
15 1     1   1056 our @EXPORT_OK =
16             qw(&sample &constant_rate_birth &constant_rate_birth_death &clade_shifts);
17             }
18              
19             =head1 NAME
20              
21             Bio::Phylo::EvolutionaryModels - Evolutionary models for phylogenetic trees and methods to sample these
22             Klaas Hartmann, September 2007
23              
24             =head1 SYNOPSIS
25              
26             #For convenience we import the sample routine (so we can write sample(...) instead of
27             #Bio::Phylo::EvolutionaryModels::sample(...).
28             use Bio::Phylo::EvolutionaryModels qw (sample);
29            
30             #Example#A######################################################################
31             #Simulate a single tree with ten species from the constant rate birth model with parameter 0.5
32             my $tree = Bio::Phylo::EvolutionaryModels::constant_rate_birth(birth_rate => .5, tree_size => 10);
33            
34             #Example#B######################################################################
35             #Sample 5 trees with ten species from the constant rate birth model using the b algorithm
36             my ($sample,$stats) = sample(sample_size =>5,
37             tree_size => 10,
38             algorithm => 'b',
39             algorithm_options => {rate => 1},
40             model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth,
41             model_options => {birth_rate=>.5});
42              
43            
44             #Print a newick string for the 4th sampled tree
45             print $sample->[3]->to_newick."\n";
46            
47             #Example#C######################################################################
48             #Sample 5 trees with ten species from the constant rate birth and death model using
49             #the bd algorithm and two threads (useful for dual core processors)
50             #NB: we must specify an nstar here, an appropriate choice will depend on the birth_rate
51             # and death_rate we are giving the model
52            
53             my ($sample,$stats) = sample(sample_size =>5,
54             tree_size => 10,
55             threads => 2,
56             algorithm => 'bd',
57             algorithm_options => {rate => 1, nstar => 30},
58             model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth_death,
59             model_options => {birth_rate=>1,death_rate=>.8});
60            
61             #Example#D######################################################################
62             #Sample 5 trees with ten species from the constant rate birth and death model using
63             #incomplete taxon sampling
64             #
65             #sampling_probability is set so that the true tree has 10 species with 50% probability,
66             #11 species with 30% probability and 12 species with 20% probability
67             #
68             #NB: we must specify an mstar here this will depend on the model parameters and the
69             # incomplete taxon sampling parameters
70              
71             my $algorithm_options = {rate => 1,
72             nstar => 30,
73             mstar => 12,
74             sampling_probability => [.5, .3, .2]};
75            
76             my ($sample,$stats) = sample(sample_size =>5,
77             tree_size => 10,
78             algorithm => 'incomplete_sampling_bd',
79             algorithm_options => $algorithm_options,
80             model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth_death,
81             model_options => {birth_rate=>1,death_rate=>.8});
82              
83             #Example#E######################################################################
84             #Sample 5 trees with ten species from a Yule model using the memoryless_b algorithm
85            
86             #First we define the random function for the shortest pendant edge for a Yule model
87             my $random_pendant_function = sub {
88             %options = @_;
89             return -log(rand)/$options{birth_rate}/$options{tree_size};
90             };
91            
92             #Then we produce our sample
93             my ($sample,$stats) = sample(sample_size =>5,
94             tree_size => 10,
95             algorithm => 'memoryless_b',
96             algorithm_options => {pendant_dist => $random_pendant_function},
97             model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth,
98             model_options => {birth_rate=>1});
99              
100             #Example#F#######################################################################
101             #Sample 5 trees with ten species from a constant birth death rate model using the
102             #constant_rate_bd algorithm
103             my ($sample) = sample(sample_size => 5,
104             tree_size => 10,
105             algorithm => 'constant_rate_bd',
106             model_options => {birth_rate=>1,death_rate=>.8});
107              
108             =head1 DESCRIPTION
109              
110             This package contains evolutionary models for phylogenetic trees and
111             algorithms for sampling from these models. It is a non-OO module that
112             optionally exports the 'sample', 'constant_rate_birth' and
113             'constant_rate_birth_death' subroutines into the caller's namespace,
114             using the C<< use Bio::Phylo::EvolutionaryModels qw(sample constant_rate_birth constant_rate_birth_death); >>
115             directive. Alternatively, you can call the subroutines as class methods,
116             as in the synopsis.
117              
118             The initial set of algorithms available in this package corresponds to those in:
119              
120             Sampling trees from evolutionary models
121             Klaas Hartmann, Dennis Wong, Tanja Gernhard
122             Systematic Biology, in press
123              
124             Some comments and code refers back to this paper.
125             Further algorithms and evolutionary are encouraged
126             and welcome.
127              
128             To make this code as straightforward as possible to read some of the
129             algorithms have been implemented in a less than optimal manner. The code
130             also follows the structure of an earlier version of the manuscript so
131             there is some redundancy (eg. the birth algorithm is just a specific
132             instance of the birth_death algorithm)
133              
134             =head1 SAMPLING
135              
136             All sampling algorithms should be accessed through the generic sample
137             interface.
138              
139             =head2 Generic sampling interface: sample()
140              
141             Type : Interface
142             Title : sample
143             Usage : see SYNOPSIS
144             Function: Samples phylogenetic trees from an evolutionary model
145             Returns : A sample of phylogenetic trees and statistics from the
146             sampling algorithm
147             Args : Sampling parameters in a hash
148              
149             This method acts as a gateway to the various sampling algorithms. The
150             argument is a single hash containing the options for the sampling run.
151              
152              
153             Sampling parameters (* denotes optional parameters):
154            
155             sample_size The number of trees to return (more trees may be returned)
156             tree_size The size that returned trees should be
157             model The evolutionary model (should be a function reference)
158             model_options A hash pointer for model options (see individual models)
159             algorithm The algorithm to use (omit the preceding sample_)
160             algorithm_options A hash pointer for options for the algorithm (see individual algorithms for details)
161             threads* The number of threads to use (default is 1)
162             output_format* Set to newick for newick trees (default is Bio::Phylo::Forest::Tree)
163             remove_extinct Set to true to remove extinct species
164              
165              
166              
167             Available algorithms (algorithm names in the paper are given in brackets):
168              
169             b For all pure birth models (simplified GSA)
170             bd For all birth and death models (GSA)
171             incomplete_sampling_bd As above, with incomplete taxon sampling (extended GSA)
172             memoryless_b For memoryless pure birth models (PBMSA)
173             constant_rate_bd For birth and death models with constant rates (BDSA)
174              
175              
176             Model
177              
178             If you create your own model it must accept an options hash as its input.
179             This options hash can contain any parameters you desire. Your model should
180             simulate a tree until it becomes extinct or the size/age limit as specified
181             in the options has been reached. Respectively these options are tree_size
182             and tree_age.
183              
184              
185             Multi-threading
186              
187             Multi-thread support is very simplistic. The number of threads you specify
188             are created and each is assigned the task of finding sample_size/threads
189             samples. I had problems with using Bio::Phylo::Forest::Tree in a multi-
190             threaded setting. Hence the sampled trees are returned as newick strings to
191             the main routine where (if required) Tree objects are recreated from the
192             strings. For most applications this overhead seems negligible in contrast
193             to the sampling times.
194              
195              
196             From a code perspective this function (sample):
197              
198             Checks input arguments
199             Handles multi-threading
200             Calls the individual algorithms to perform sampling
201             Reformats data
202              
203             =cut
204              
205             my @threads;
206              
207             sub sample {
208 5     5 1 110 my %options = @_;
209 5         51 my %methods_require = (
210             b => ['rate'],
211             bd => [ 'rate', 'nstar' ],
212             incomplete_sampling_bd =>
213             [ 'rate', 'nstar', 'mstar', 'sampling_probability' ],
214             memoryless_b => ['pendant_dist'],
215             constant_rate_bd => [],
216             );
217              
218             #Default is to sample a single tree
219 5 50       22 $options{sample_size} = 1 unless defined $options{sample_size};
220              
221             #Default is to use a single thread
222 5 100       20 $options{threads} = 1 unless defined $options{threads};
223              
224             #Check that multiple threads are actually supported
225 5 50 33     25 if ( $options{threads} > 1 && !$Config{useithreads} ) {
226 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
227             "your perl installation does not support multiple threads" );
228             }
229              
230             #Check an algorithm was specified
231 5 50       20 unless ( defined $options{algorithm} ) {
232 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw(
233             'error' => "an algorithm type must be specified" );
234             }
235              
236             #Check the algorithm type is valid
237 5 50       22 unless ( defined $methods_require{ $options{algorithm} } ) {
238 0         0 Bio::Phylo::Util::Exceptions::BadFormat->throw(
239             'error' => "'$options{algorithm}' is not a valid algorithm" );
240             }
241              
242             #Check the algorithm options
243 5         13 foreach ( @{ $methods_require{ $options{algorithm} } } ) {
  5         16  
244 8 50       23 unless ( defined $options{algorithm_options}->{$_} ) {
245 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
246             "'$_' must be specified for the '$options{algorithm}' algorithm"
247             );
248             }
249             }
250              
251             #If we are doing incomplete taxon sampling the sampling probability must be specified
252 5 0 33     17 if ( defined $options{incomplete_sampling}
      0        
253             && $options{incomplete_sampling}
254             && !( defined $options{algorithm_options}->{sampling_probability} ) )
255             {
256 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
257             "'sampling_probability' must be specified for post hoc incomplete sampling to be applied"
258             );
259             }
260              
261             #Check that a model has been specified
262 5 50 66     25 unless ( defined $options{model}
263             || $options{algorithm} eq 'constant_rate_bd' )
264             {
265 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw(
266             'error' => "a model must be specified" );
267             }
268              
269             #Get a function pointer for the algorithm
270 5         16 my $algorithm = 'sample_' . $options{algorithm};
271 5         23 $algorithm = \&$algorithm;
272 5         12 my @output;
273              
274             #Run the algorithm, different method for multiple threads
275 5 50       17 if ( $options{threads} > 1 ) {
276 0         0 require threads;
277 0         0 require threads::shared;
278 0         0 @output = ( [], [] );
279             $SIG{'KILL'} = sub {
280 0     0   0 foreach (@threads) { $_->kill('KILL')->detach(); }
  0         0  
281 0         0 threads->exit();
282 0         0 };
283              
284             #Start the threads
285 0         0 for ( ( 1 .. $options{threads} ) ) {
286 0         0 @_ = ();
287              
288             #Note the list context of the return argument here determines
289             #the data type returned from the thread.
290             ( $threads[ $_ - 1 ] ) = threads->new( \&_sample_newick, %options,
291             sample_size => ceil( $options{sample_size} / $options{threads} )
292 0         0 );
293             }
294              
295             #Wait for them to finish and combine the data
296 0         0 for ( ( 1 .. $options{threads} ) ) {
297 0         0 until ( $threads[ $_ - 1 ]->is_joinable() ) { sleep(0.1); }
  0         0  
298 0         0 my @thread_data = $threads[ $_ - 1 ]->join;
299 0         0 for ( my $index = 0 ; $index < scalar @thread_data ; $index++ ) {
300 0 0       0 $output[$index] = [] if scalar @output < $index;
301             $output[$index] =
302 0         0 [ @{ $output[$index] }, @{ $thread_data[$index] } ];
  0         0  
  0         0  
303             }
304             }
305              
306             #Turn newick strings back into tree objects
307 0 0 0     0 unless ( defined $options{output_format}
308             && $options{output_format} eq 'newick' )
309             {
310              
311             #Convert to newick trees
312 0         0 for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
  0         0  
313 0         0 $output[0]->[$index] = Bio::Phylo::IO->parse(
314             -format => 'newick',
315             -string => $output[0]->[$index]
316             )->first;
317             }
318             }
319             }
320             else {
321              
322             #Get the samples
323 5         30 @output = &$algorithm(%options);
324              
325             #Turn into newick trees if requested
326 5 50 66     261 if ( defined $options{output_format}
    100 66        
327             && $options{output_format} eq 'newick' )
328             {
329              
330             #Convert to newick trees
331 0         0 for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
  0         0  
332 0         0 $output[0]->[$index] =
333             $output[0]->[$index]->to_newick( '-nodelabels' => 1 );
334             }
335             }
336             elsif ( defined $options{output_format}
337             && $options{output_format} eq 'forest' )
338             {
339              
340             # save as a forest
341 4         25 my $forest = Bio::Phylo::Forest->new;
342 4         16 for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
  24         55  
343 20         53 $forest->insert( $output[0]->[$index] );
344             }
345 4         15 $output[0] = $forest;
346             }
347             }
348 5         71 return @output;
349             }
350              
351             =begin comment
352              
353             Type : Internal method
354             Title : _sample_newick
355             Usage : ($thread) = threads->new(\&_sample_newick, %options);
356             @thread_output = $thread->join;
357             Function: Wrapper for sampling routines used for multi-threading
358             Returns : Output from sampling algorithms with trees replaced by newick strings
359             Args : %options to pass to sampling algorithm
360              
361             =end comment
362              
363             =cut
364              
365             sub _sample_newick {
366 0     0   0 my %options = @_;
367 0         0 my $algorithm = 'sample_' . $options{algorithm};
368              
369             # Thread 'cancellation' signal handler
370 0     0   0 $SIG{'KILL'} = sub { threads->exit(); };
  0         0  
371 0         0 $algorithm = \&$algorithm;
372              
373             #Perform the sampling
374 0         0 my @output = ( &$algorithm(%options) );
375              
376             #Convert to newick trees
377 0         0 for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
  0         0  
378 0         0 $output[0]->[$index] =
379             $output[0]->[$index]->to_newick( '-nodelabels' => 1 );
380             }
381 0         0 return @output;
382             }
383              
384             =head2 Sampling algorithms
385              
386             These algorithms should be accessed through the sampling interface (sample()).
387             Additional parameters need to be passed to these algorithms as described for
388             each algorithm.
389              
390             =over
391              
392             =item sample_b()
393              
394             Sample from any birth model
395              
396             Type : Sampling algorithm
397             Title : sample_b
398             Usage : see sample
399             Function: Samples trees from a pure birth model
400             Returns : see sample
401             Args : %algorithm_options requires the field:
402             rate => sampling rate
403              
404             =cut
405              
406             sub sample_b {
407 1     1 1 4 my %options = @_;
408              
409             #The sample of trees
410 1         1 my @sample;
411              
412             #A list of the expected number of samples
413             my @expected_summary;
414 1         2 my $model = $options{model};
415 1         2 my $rate = $options{algorithm_options}->{rate};
416              
417             #While we have insufficient samples
418 1         3 while ( scalar @sample < $options{sample_size} ) {
419              
420             #Generate a candidate model run
421 12         90 my $candidate = &$model( %{ $options{model_options} },
422 12         23 tree_size => $options{tree_size} );
423              
424             #Check that the tree has no extinctions
425 12 50       54 unless ( $candidate->is_ultrametric(1e-6) ) {
426 0         0 Bio::Phylo::Util::Exceptions::BadFormat->throw(
427             'error' => "the model must be a pure birth process" );
428             }
429              
430             #Get the lineage through time data
431 12         40 my ( $time, $count ) = lineage_through_time($candidate);
432              
433             #The expected number of samples we want
434 12         34 my $expected_samples = $rate * ( $time->[-1] - $time->[-2] );
435 12         24 push( @expected_summary, $expected_samples );
436              
437             #Get the random number of samples from this candidate tree
438 12         28 while ( $expected_samples > 0 ) {
439              
440             #If the number of samples remaining is greater than one
441             #we take a sample, otherwise we take a sample with probability
442             #according to the remaining number of samples
443 12 100 66     58 if ( $expected_samples > 1 || rand(1) < $expected_samples ) {
444 5         17 my $tree = copy_tree($candidate);
445              
446             #Truncate the tree at a random point during which it had tree_size species
447 5         21 truncate_tree_time( $tree,
448             ( $time->[-1] - $time->[-2] ) * rand(1) + $time->[-2] );
449              
450             #Add the tree to our sample
451 5         14 push( @sample, $tree );
452              
453             #Update the sample counter (for GUI or other applications that want to know how
454             #many samples have been obtained)
455 5 50       17 if ( defined $options{counter} ) {
456 0         0 my $counter = $options{counter};
457 0         0 &$counter(1);
458             }
459             }
460 12         78 $expected_samples--;
461             }
462             }
463 1         7 return ( \@sample, \@expected_summary );
464             }
465              
466             =item sample_bd()
467              
468             Sample from any birth and death model for which nstar exists
469              
470             Type : Sampling algorithm
471             Title : sample_bd
472             Usage : see sample
473             Function: Samples trees from a birth and death model
474             Returns : see sample
475             Args : %algorithm_options requires the fields:
476             nstar => once a tree has nstar species there should be
477             a negligible chance of returning to tree_size species
478             rate => sampling rate
479              
480             =cut
481              
482             sub sample_bd {
483 1     1 1 6 my %options = @_;
484              
485             #The sample of trees
486 1         2 my @sample;
487              
488             #A list of the expected number of samples
489             my @expected_summary;
490              
491             #Convenience variables
492 1         3 my $model = $options{model};
493 1         3 my $nstar = $options{algorithm_options}->{nstar};
494 1         3 my $rate = $options{algorithm_options}->{rate};
495              
496             #While we have insufficient samples
497 1         5 while ( scalar @sample < $options{sample_size} ) {
498              
499             #Generate a candidate model run
500             my $candidate =
501 31         65 &$model( %{ $options{model_options} }, tree_size => $nstar );
  31         198  
502              
503             #Get the lineage through time data
504 31         191 my ( $time, $count ) = lineage_through_time($candidate);
505              
506             #Reorganise the lineage through time data
507             #@duration contains the length of the intervals with the right number of species
508             #@start contains the starting times of these intervals
509             #@prob contains the cumulative probability of each interval being selected
510             #$total_duration contains the sum of the interval lengths
511 31         69 my ( @duration, @start, @prob, $total_duration );
512 31         86 for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) {
  2046         3024  
513 2015 100       3311 if ( $count->[$index] == $options{tree_size} ) {
514 79         133 push( @duration, $time->[ $index + 1 ] - $time->[$index] );
515 79         102 push( @start, $time->[$index] );
516 79         96 $total_duration += $duration[-1];
517 79         119 push( @prob, $total_duration );
518             }
519             }
520 1     1   10 no warnings 'uninitialized'; # FIXME
  1         4  
  1         47  
521 31 100       261 next if $total_duration == 0;
522 1     1   6 use warnings;
  1         2  
  1         350  
523 10         34 for ( my $index = 0 ; $index < scalar @prob ; $index++ ) {
524 79         133 $prob[$index] /= $total_duration;
525             }
526              
527             #The expected number of samples we want
528 10         21 my $expected_samples = $rate * $total_duration;
529 10         31 push( @expected_summary, $expected_samples );
530              
531             #Get the random number of samples from this candidate tree
532 10         31 while ( $expected_samples > 0 ) {
533              
534             #If the number of samples remaining is greater than one
535             #we take a sample, otherwise we take a sample with probability
536             #according to the remaining number of samples
537 12 100 100     69 if ( $expected_samples > 1 || rand(1) < $expected_samples ) {
538 5         26 my $tree = copy_tree($candidate);
539              
540             #Get a random interval
541 5         29 my $interval_choice = rand(1);
542 5         12 my $interval;
543 5         52 for (
544             $interval = 0 ;
545             $interval_choice > $prob[$interval] ;
546             $interval++
547             )
548             {
549             }
550              
551             #Truncate the tree at a random point during this interval
552 5         32 truncate_tree_time( $tree,
553             $duration[$interval] * rand(1) + $start[$interval] );
554              
555             #Add the tree to our sample
556 5         22 push( @sample, $tree );
557              
558             #Update the sample counter (for GUI or other applications that want to know how
559             #many samples have been obtained)
560 5 50       26 if ( defined $options{counter} ) {
561 0         0 my $counter = $options{counter};
562 0         0 &$counter(1);
563             }
564             }
565 12         213 $expected_samples--;
566             }
567             }
568 1         9 return ( \@sample, \@expected_summary );
569             }
570              
571             =item sample_incomplete_sampling_bd()
572              
573             Sample from any birth and death model with incomplete taxon sampling
574              
575             Type : Sampling algorithm
576             Title : sample_incomplete_sampling_bd
577             Usage : see sample
578             Function: Samples trees from a birth and death model with incomplete taxon sampling
579             Returns : see sample
580             Args : %algorithm_options requires the fields:
581             rate => sampling rate
582             nstar => once a tree has nstar species there should be
583             a negligible chance of returning to mstar species
584             mstar => trees with more than mstar species form a negligible
585             contribution to the final sample.
586             sampling_probability => see below.
587            
588             sampling_probability
589              
590             vector: must have length (mstar-tree_size+1) The ith element gives the probability
591             of not sampling i species.
592             scalar: the probability of sampling any individual species. Is used to calculate
593             a vector as discussed in the paper.
594              
595             =cut
596              
597             sub sample_incomplete_sampling_bd {
598 1     1 1 6 my %options = @_;
599              
600             # %options = (%options, %{$options{algorithm_options}});
601             #The sample of trees
602 1         3 my @sample;
603              
604             #A list of the expected number of samples
605             my @expected_summary;
606              
607             # #Convenience variables
608 1         3 my $model = $options{model};
609 1         2 my $nstar = $options{algorithm_options}->{nstar};
610 1         2 my $mstar = $options{algorithm_options}->{mstar};
611 1         3 my $rate = $options{algorithm_options}->{rate};
612             my $sampling_probability =
613 1         2 $options{algorithm_options}->{sampling_probability};
614              
615             #If sampling_probability is a list check it's length
616 1 50 33     8 if ( ref $sampling_probability
617 1         7 && scalar @{$sampling_probability} !=
618             ( $mstar - $options{tree_size} + 1 ) )
619             {
620 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
621             "'sampling_probability' must be a scalar or a list with m_star-tree_size+1 items"
622             );
623             }
624              
625             #If a single sampling probability was given we calculate the
626             #probability of sub-sampling given numbers of species.
627 1 50       5 unless ( ref $sampling_probability ) {
628 0         0 my $p = $sampling_probability;
629 0         0 my @vec;
630 0         0 my $total = 0;
631 0         0 foreach ( ( $options{tree_size} .. $mstar ) ) {
632              
633             #The probability of sampling tree_size species from a tree containing $_ species
634             push( @vec,
635             nchoosek( $_, $options{tree_size} ) *
636             ( $p**$options{tree_size} ) *
637 0         0 ( ( 1 - $p )**( $_ - $options{tree_size} ) ) );
638 0         0 $total += $vec[-1];
639             }
640 0         0 for ( my $ii = 0 ; $ii < scalar @vec ; $ii++ ) { $vec[$ii] /= $total; }
  0         0  
641 0         0 $sampling_probability = \@vec;
642             }
643              
644             #We now normalise the sampling_probability list so that it sums to unity
645             #this allows comparable sampling rates to be used here and in sample_bd
646 1         2 my $total_sp = 0;
647 1         2 foreach ( @{$sampling_probability} ) { $total_sp += $_; }
  1         3  
  3         9  
648 1     1   6 no warnings; # FIXME
  1         2  
  1         59  
649 1         9 foreach ( my $index = 1 .. scalar @{$sampling_probability} ) {
  0         0  
  0         0  
650 1     1   5 no warnings; # FIXME
  1         2  
  1         29  
651 1         7 $sampling_probability->[ $index - 1 ] /= $total_sp;
652 1     1   4 use warnings;
  1         2  
  1         27  
653             }
654 1     1   4 use warnings;
  1         2  
  1         91  
655              
656             #While we have insufficient samples
657 1         6 while ( scalar @sample < $options{sample_size} ) {
658              
659             #Generate a candidate model run
660             my $candidate =
661 17         40 &$model( %{ $options{model_options} }, tree_size => $nstar );
  17         118  
662              
663             #Get the lineage through time data
664 17         101 my ( $time, $count ) = lineage_through_time($candidate);
665 17         51 my @size_stats;
666 17         55 for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) {
  1748         2704  
667 1     1   9 no warnings 'uninitialized'; # FIXME
  1         4  
  1         127  
668 1731 100       2687 if ( $count->[$index] >= $options{tree_size} ) {
669             $size_stats[ $count->[$index] - $options{tree_size} ] +=
670             $time->[$index] *
671             $sampling_probability->[ $count->[$index] -
672 1115         1835 $options{tree_size} ];
673             }
674             }
675              
676             #Reorganise the lineage through time data
677             #@duration contains the length of intervals with more than tree_size species
678             #@start contains the starting times of these intervals
679             #@prob contains the cumulative probability of each interval being selected
680             #$total_prob contains the sum of @prob before normalisation
681 17         44 my ( @duration, @start, @prob, $total_prob );
682 17         36 $total_prob = 0;
683 17         45 for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) {
  1748         2590  
684 1731 100       2596 if ( $count->[$index] >= $options{tree_size} ) {
685 1115         1612 push( @duration, $time->[ $index + 1 ] - $time->[$index] );
686 1115         1405 push( @start, $time->[$index] );
687 1     1   6 no warnings 'uninitialized'; # FIXME
  1         1  
  1         4372  
688             $total_prob +=
689             $duration[-1] *
690             $sampling_probability->[ $count->[$index] -
691 1115         1484 $options{tree_size} ];
692 1115         1635 push( @prob, $total_prob );
693             }
694             }
695 17 100       111 next if $total_prob == 0;
696 9         44 for ( my $index = 0 ; $index < scalar @prob ; $index++ ) {
697 1115         1846 $prob[$index] /= $total_prob;
698             }
699              
700             #The expected number of samples we want
701 9         24 my $expected_samples = $rate * $total_prob;
702 9         34 push( @expected_summary, $expected_samples );
703             $expected_samples = $options{sample_size} - scalar @sample
704 9 100       49 if $expected_samples > $options{sample_size} - scalar @sample;
705              
706             #Get the random number of samples from this candidate tree
707 9         39 while ( $expected_samples > 0 ) {
708              
709             #If the number of samples remaining is greater than one
710             #we take a sample, otherwise we take a sample with probability
711             #according to the remaining number of samples
712 9 100 66     85 if ( $expected_samples > 1 || rand(1) < $expected_samples ) {
713 5         29 my $tree = copy_tree($candidate);
714              
715             #Get a random interval
716 5         25 my $interval_choice = rand(1);
717 5         12 my $interval;
718 5         78 for (
719             $interval = 0 ;
720             $interval_choice > $prob[$interval] ;
721             $interval++
722             )
723             {
724             }
725              
726             #Truncate the tree at a random point during this interval
727 5         38 truncate_tree_time( $tree,
728             $duration[$interval] * rand(1) + $start[$interval] );
729              
730             #Remove random species so it has the right size
731 5         31 truncate_tree_size( $tree, $options{tree_size} );
732              
733             #Add the tree to our sample
734 5         18 push( @sample, $tree );
735              
736             #Update the sample counter (for GUI or other applications that want to know how
737             #many samples have been obtained)
738 5 50       26 if ( defined $options{counter} ) {
739 0         0 my $counter = $options{counter};
740 0         0 &$counter(1);
741             }
742             }
743 9         564 $expected_samples--;
744             }
745             }
746 1         10 return ( \@sample, \@expected_summary );
747             }
748              
749             =item sample_memoryless_b()
750              
751             Sample from a memoryless birth model
752              
753             Type : Sampling algorithm
754             Title : sample_memoryless_b
755             Usage : see sample
756             Function: Samples trees from a memoryless birth model
757             Returns : see sample
758             Args : %algorithm_options with fields:
759             pendant_dist => function reference for generating random
760             shortest pendant edges
761              
762             NB: The function pointed to by pendant_dist is given model_options
763             as it's input argument with an added field tree_size. It must return
764             a random value from the probability density for the shortest pendant
765             edges.
766              
767             =cut
768              
769             sub sample_memoryless_b {
770 1     1 1 6 my %options = @_;
771              
772             #The sample of trees
773 1         2 my @sample;
774              
775             #A list of the expected number of samples
776             my @expected_summary;
777              
778             #The user specified functions
779 1         3 my $model = $options{model};
780 1         3 my $pendant_dist = $options{algorithm_options}->{pendant_dist};
781              
782             #While we have insufficient samples
783 1         5 while ( scalar @sample < $options{sample_size} ) {
784              
785             #Generate a tree ending just after the last speciation event
786 5         30 my $tree = &$model( %{ $options{model_options} },
787 5         11 tree_size => $options{tree_size} );
788              
789             #Check that the tree has no extinctions
790 5 50       31 unless ( $tree->is_ultrametric(1e-6) ) {
791 0         0 Bio::Phylo::Util::Exceptions::BadFormat->throw(
792             'error' => "the model must be a pure birth process" );
793             }
794              
795             #Get the random length to add after the last speciation event
796 5         48 my $pendant_add = &$pendant_dist( %{ $options{model_options} },
797 5         12 tree_size => $options{tree_size} );
798              
799             #Add the final length
800 5         72 foreach ( @{ $tree->get_terminals } ) {
  5         27  
801 50         109 $_->set_branch_length( $_->get_branch_length + $pendant_add );
802             }
803              
804             #Add to the sample
805 5         18 push( @sample, $tree );
806 5         12 push( @expected_summary, 1 );
807              
808             #Update the sample counter (for GUI or other applications that want to know how
809             #many samples have been obtained)
810 5 50       33 if ( defined $options{counter} ) {
811 0         0 my $counter = $options{counter};
812 0         0 &$counter(1);
813             }
814             }
815 1         7 return ( \@sample, \@expected_summary );
816             }
817              
818             =item sample_constant_rate_bd()
819              
820             Sample from a constant rate birth and death model
821              
822             Type : Sampling algorithm
823             Title : sample_constant_rate_bd
824             Usage : see sample
825             Function: Samples trees from a memoryless birth model
826             Returns : see sample
827             Args : no specific algorithm options but see below
828              
829             NB: This algorithm only applies to constant rate birth and death
830             processes. Consequently a model does not need to be specified (and
831             will be ignored if it is). But birth_rate and death_rate model
832             options must be given.
833              
834             =cut
835              
836             sub sample_constant_rate_bd {
837 1     1 1 5 my %options = @_;
838              
839             #Store parameters in shorter variables (for clarity)
840             my ( $br, $dr, $n ) = (
841             $options{model_options}->{birth_rate},
842             $options{model_options}->{death_rate},
843             $options{tree_size}
844 1         4 );
845 1         3 my @sample;
846              
847             #Loop for sampling each tree
848 1         5 while ( scalar @sample < $options{sample_size} ) {
849 5         12 my @nodes;
850              
851             #Compute the random tree age from the inverse CDF (different formulas for
852             #birth rate == death rate and otherwise)
853             my $tree_age;
854              
855             #The uniform random variable
856 5         21 my $r = rand;
857 5 50       20 if ( $br == $dr ) {
858 0         0 $tree_age = 1 / ( $br * ( $r**( -1 / $n ) - 1 ) );
859             }
860             else {
861 5         55 $tree_age =
862             1 /
863             ( $br - $dr ) *
864             log(
865             ( 1 - $dr / $br * $r**( 1 / $n ) ) / ( 1 - $r**( 1 / $n ) ) );
866             }
867              
868             #Find the random speciation times
869 5         12 my @speciation;
870 5         29 foreach ( 0 .. ( $n - 2 ) ) {
871 45 50       65 if ( $br == $dr ) {
872 0         0 my $r = rand;
873 0         0 $speciation[$_] =
874             $r * $tree_age / ( 1 + $br * $tree_age * ( 1 - $r ) );
875             }
876             else {
877              
878             #Two repeated parts of the inverse CDF for clarity
879 45         84 my $a = $br - $dr * exp( ( $dr - $br ) * $tree_age );
880 45         66 my $b = ( 1 - exp( ( $dr - $br ) * $tree_age ) ) * rand;
881              
882             #The random speciation time from the inverse CDF
883 45         99 $speciation[$_] =
884             1 /
885             ( $br - $dr ) *
886             log( ( $a - $dr * $b ) / ( $a - $br * $b ) );
887             }
888             }
889              
890             #Create the initial terminals and a vector for their ages
891 5         12 my @terminals;
892             my @ages;
893 5         13 foreach ( 0 .. ( $n - 1 ) ) {
894              
895             #Add a new terminal
896 50         138 $terminals[$_] = Bio::Phylo::Forest::Node->new();
897 50         198 $terminals[$_]->set_name( 'ID' . $_ );
898 50         115 $ages[$_] = 0;
899             }
900 5         20 @nodes = @terminals;
901              
902             #Sort the speciation times
903 5         32 my @sorted_speciation = sort { $a <=> $b } @speciation;
  94         129  
904              
905             #Make a hash for easily finding the index of a given speciation event
906 5         9 my %speciation_hash;
907 5         18 foreach ( 0 .. ( $n - 2 ) ) {
908 45         170 $speciation_hash{ $speciation[$_] } = $_;
909             }
910              
911             #Construct the tree
912 5         16 foreach my $index ( 0 .. ( $n - 2 ) ) {
913              
914             #Create the parent node
915 45         125 my $parent = Bio::Phylo::Forest::Node->new();
916 45         183 $parent->set_name( 'ID' . ( $n + $index ) );
917 45         99 push( @nodes, $parent );
918              
919             #An index for this speciation event back into the unsorted vectors
920 45         215 my $spec_index = $speciation_hash{ $sorted_speciation[$index] };
921              
922             #Add the children to the parent node
923 45         149 $parent->set_child( $terminals[$spec_index] );
924 45         132 $terminals[$spec_index]->set_parent($parent);
925 45         127 $parent->set_child( $terminals[ $spec_index + 1 ] );
926 45         130 $terminals[ $spec_index + 1 ]->set_parent($parent);
927              
928             #Set the children's branch lengths
929 45         169 $terminals[$spec_index]->set_branch_length(
930             $sorted_speciation[$index] - $ages[$spec_index] );
931 45         154 $terminals[ $spec_index + 1 ]->set_branch_length(
932             $sorted_speciation[$index] - $ages[ $spec_index + 1 ] );
933              
934             #Replace the two terminals with the new one
935 45         98 splice( @terminals, $spec_index, 2, $parent );
936 45         94 splice( @ages, $spec_index, 2, $sorted_speciation[$index] );
937              
938             #Update the mapping for the sorted speciation times
939 45         166 foreach ( keys %speciation_hash ) {
940 405 100       699 $speciation_hash{$_}-- if $speciation_hash{$_} > $spec_index;
941             }
942             }
943              
944             #Add the nodes to a tree
945 5         36 my $tree = Bio::Phylo::Forest::Tree->new();
946 5         19 foreach ( reverse(@nodes) ) { $tree->insert($_); }
  95         203  
947 5         12 push( @sample, $tree );
948              
949             #Update the sample counter (for GUI or other applications that want to know how
950             #many samples have been obtained)
951 5 50       90 if ( defined $options{counter} ) {
952 0         0 my $counter = $options{counter};
953 0         0 &$counter(1);
954             }
955             }
956 1         8 return ( \@sample, [] );
957             }
958              
959             =back
960              
961             =head1 EVOLUTIONARY MODELS
962              
963             All evolutionary models take a options hash as their input argument
964             and return a Bio::Phylo::Forest::Tree. This tree may contain extinct
965             lineages (lineages that end prior to the end of the tree).
966              
967             The options hash contains any model specific parameters (see the
968             individual model descriptions) and one or both terminating conditions:
969             tree_size => the number of extant species at which to terminate the tree
970             tree_age => the age of the tree at which to terminate the process
971              
972             Note that if the model stops due to the tree_size condition then the
973             tree ends immediately after the speciation event that created the last
974             species.
975              
976             =over
977              
978             =item constant_rate_birth()
979              
980             A constant rate birth model (Yule/ERM)
981              
982             Type : Evolutionary model
983             Title : constant_rate_birth
984             Usage : $tree = constant_rate_birth(%options)
985             Function: Produces a tree from the model terminating at a given size/time
986             Returns : Bio::Phylo::Forest::Tree
987             Args : %options with fields:
988             birth_rate The birth rate parameter (default 1)
989             tree_size The size of the tree at which to terminate
990             tree_age The age of the tree at which to terminate
991              
992             NB: At least one of tree_size and tree_age must be specified
993              
994             =cut
995              
996             sub constant_rate_birth {
997 18     18 1 546 my %options = @_;
998 18         48 $options{death_rate} = 0;
999 18         71 return constant_rate_birth_death(%options);
1000             }
1001              
1002             =item external_model()
1003              
1004             A dummy model that takes as input a set of newick_trees and randomly samples
1005             these.
1006              
1007             Type : Evolutionary model
1008             Title : external_model
1009             Usage : $tree = $external_model(%options)
1010             Function: Returns a random tree that was given as input
1011             Returns : Bio::Phylo::Forest::Tree
1012             Args : %options with fields:
1013             trees An array of newick strings. One of these is returned at random.
1014              
1015             NB: The usual parameters tree_size and tree_age will be ignored. When sampling
1016             using this model the trees array must contain trees adhering to the requirements
1017             of the sampling algorithm. This is NOT checked automatically.
1018              
1019             =cut
1020              
1021             sub external_model {
1022 0     0 1 0 my %options = @_;
1023 0         0 my $choice = int( rand( scalar @{ $options{trees} } ) );
  0         0  
1024              
1025             #Pick a newick string and turn it in to a Bio::Phylo::Forest::Tree object
1026             my $tree = Bio::Phylo::IO->parse(
1027             -format => 'newick',
1028 0         0 -string => $options{trees}->[$choice]
1029             )->first;
1030 0         0 return $tree;
1031             }
1032              
1033             =item constant_rate_birth_death()
1034              
1035             A constant rate birth and death model
1036              
1037             Type : Evolutionary model
1038             Title : constant_rate_birth_death
1039             Usage : $tree = constant_rate_birth_death(%options)
1040             Function: Produces a tree from the model terminating at a given size/time
1041             Returns : Bio::Phylo::Forest::Tree
1042             Args : %options with fields:
1043             birth_rate The birth rate parameter (default 1)
1044             death_rate The death rate parameter (default no extinction)
1045             tree_size The size of the tree at which to terminate
1046             tree_age The age of the tree at which to terminate
1047              
1048             NB: At least one of tree_size and tree_age must be specified
1049              
1050             =cut
1051              
1052             sub constant_rate_birth_death {
1053 66     66 1 303 my %options = @_;
1054              
1055             #Check that we have a termination condition
1056 66 50 33     259 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1057              
1058             #Error here.
1059 0         0 return undef;
1060             }
1061              
1062             #Set the undefined condition to infinity
1063 66 50       190 $options{tree_size} = 1e6 unless defined $options{tree_size};
1064 66 50       222 $options{tree_age} = 1e6 unless defined $options{tree_age};
1065              
1066             #Set default rates
1067 66 50       162 $options{birth_rate} = 1 unless defined( $options{birth_rate} );
1068             delete $options{death_rate}
1069 66 100 66     410 if defined( $options{death_rate} ) && $options{death_rate} == 0;
1070              
1071             #Each node gets an ID number this tracks these
1072 66         127 my $node_id = 0;
1073              
1074             #Create a new tree with a root, start the list of terminal species
1075 66         331 my $tree = Bio::Phylo::Forest::Tree->new();
1076 66         274 my $root = Bio::Phylo::Forest::Node->new();
1077 66         292 $root->set_branch_length(0);
1078 66         683 $root->set_name( 'ID' . $node_id++ );
1079 66         299 $tree->insert($root);
1080 66         444 my @terminals = ($root);
1081 66         133 my ( $next_extinction, $next_speciation );
1082 66         108 my $time = 0;
1083 66         92 my $tree_size = 1;
1084              
1085             #Check whether we have a non-zero root edge
1086 66 50 33     263 if ( defined $options{root_edge} && $options{root_edge} ) {
1087              
1088             #Non-zero root. We set the time to the first speciation event
1089 0         0 $next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
1090             }
1091             else {
1092              
1093             #Zero root, we want a speciation event straight away
1094 66         132 $next_speciation = 0;
1095             }
1096              
1097             #Time of the first extinction event. If no extinction we always
1098             #set the extinction event after the current speciation event
1099 66 100       163 if ( defined $options{death_rate} ) {
1100 48         330 $next_extinction = -log(rand) / $options{death_rate} / $tree_size;
1101             }
1102             else {
1103 18         37 $next_extinction = $next_speciation + 1;
1104             }
1105              
1106             #While the tree has not become extinct and the termination criterion
1107             #has not been achieved we create new speciation and extinction events
1108 66   66     460 while ($tree_size > 0
      66        
1109             && $tree_size < $options{tree_size}
1110             && $time < $options{tree_age} )
1111             {
1112              
1113             #Add the time since the last event to all terminal species
1114 3938         6886 foreach (@terminals) {
1115             $_->set_branch_length(
1116             $_->get_branch_length + min(
1117             $next_extinction, $next_speciation,
1118 50484         93620 $options{tree_age} - $time
1119             )
1120             );
1121             }
1122              
1123             #Update the time
1124 3938         7854 $time += min( $next_extinction, $next_speciation );
1125              
1126             #If the tree exceeds the time limit we are done
1127 3938 50       8204 return $tree if ( $time > $options{tree_age} );
1128              
1129             #Get the species effected by this event and remove it from the terminals list
1130 3938         10134 my $effected =
1131             splice( @terminals, int( rand( scalar @terminals ) ), 1 );
1132              
1133             #If we have a speciation event we add two new species
1134 3938 100 66     11735 if ( $next_speciation < $next_extinction || !defined $next_extinction )
1135             {
1136 2296         4752 foreach ( 1, 2 ) {
1137              
1138             #Create a new species
1139 4592         13528 my $child = Bio::Phylo::Forest::Node->new();
1140 4592         19646 $child->set_name( 'ID' . $node_id++ );
1141              
1142             #Give it a zero edge length
1143 4592         14974 $child->set_branch_length(0);
1144              
1145             #Add it as a child to the speciating species
1146 4592         12351 $effected->set_child($child);
1147              
1148             #Add it to the tree
1149 4592         13419 $tree->insert($child);
1150              
1151             #Add it to the terminals list
1152 4592         10206 push( @terminals, $child );
1153             }
1154             }
1155              
1156             #We calculate the time that the next extinction and speciation
1157             #events will occur (only the earliest of these will actually
1158             #happen). NB: this approach is only appropriate for models where
1159             #speciation and extinction times are exponentially distributed.
1160             #Windows sometimes returns 0 values for rand...
1161 3938         7672 my ( $r1, $r2 ) = ( 0, 0 );
1162 3938         10836 $r1 = rand until $r1;
1163 3938         8013 $r2 = rand until $r2;
1164 3938         5366 $tree_size = scalar @terminals;
1165 3938 100       7036 return $tree unless $tree_size;
1166 3908         12883 $next_speciation = -log($r1) / $options{birth_rate} / $tree_size;
1167 3908 100       7597 if ( defined $options{death_rate} ) {
1168 3746         20923 $next_extinction = -log($r2) / $options{death_rate} / $tree_size;
1169             }
1170             else {
1171 162         758 $next_extinction = $next_speciation + 1;
1172             }
1173             }
1174 36         241 return $tree;
1175             }
1176              
1177              
1178             =item diversity_dependent_speciation()
1179              
1180             A birth and death model with speciation rate dependent on diversity as per
1181             Etienne et. al. 2012
1182              
1183             Type : Evolutionary model
1184             Title : diversity_dependent_speciation
1185             Usage : $tree = diversity_dependent_speciation(%options)
1186             Function: Produces a tree from the model terminating at a given size/time
1187             Returns : Bio::Phylo::Forest::Tree
1188             Args : %options with fields:
1189             maximal_birth_rate The maximal birth rate parameter (default 1)
1190             death_rate The death rate parameter (default no extinction)
1191             K_dash The modified carrying capacity (no default)
1192             tree_size The size of the tree at which to terminate
1193             tree_age The age of the tree at which to terminate
1194              
1195             NB: At least one of tree_size and tree_age must be specified
1196              
1197             Reference:
1198             Rampal S. Etienne, Bart Haegeman, Tanja Stadler, Tracy Aze, Paul N. Pearson,
1199             Andy Purvis and Albert B. Phillimore. "Diversity-dependence brings molecular
1200             phylogenies closer to agreement with the fossil record"
1201             doi: 10.1098/rspb.2011.1439
1202              
1203             =cut
1204              
1205             sub diversity_dependent_speciation {
1206 0     0 1 0 my %options = @_;
1207              
1208             #Check that we have a termination condition
1209 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1210             #Error here.
1211 0         0 return undef;
1212             }
1213              
1214             #Check that we have a carrying capacity
1215 0 0       0 unless ( defined $options{K_dash} ) {
1216             #Error here.
1217 0         0 return undef;
1218             }
1219              
1220             #Set the undefined condition to infinity
1221 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1222 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1223              
1224             #Set default rates
1225 0 0       0 $options{maximal_birth_rate} = 1 unless defined( $options{maximal_birth_rate} );
1226             delete $options{death_rate}
1227 0 0 0     0 if defined( $options{death_rate} ) && $options{death_rate} == 0;
1228              
1229             #Each node gets an ID number this tracks these
1230 0         0 my $node_id = 0;
1231              
1232             #Create a new tree with a root, start the list of terminal species
1233 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1234 0         0 my $root = Bio::Phylo::Forest::Node->new();
1235 0         0 $root->set_branch_length(0);
1236 0         0 $root->set_name( 'ID' . $node_id++ );
1237 0         0 $tree->insert($root);
1238 0         0 my @terminals = ($root);
1239 0         0 my ( $next_extinction, $next_speciation );
1240 0         0 my $time = 0;
1241 0         0 my $tree_size = 1;
1242            
1243            
1244 0         0 $options{birth_rate} = max(0,$options{max_birth_rate}*(1-1/$options{K_dash}));
1245            
1246              
1247             #Check whether we have a non-zero root edge
1248 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1249              
1250             #Non-zero root. We set the time to the first speciation event
1251 0         0 $next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
1252             }
1253             else {
1254              
1255             #Zero root, we want a speciation event straight away
1256 0         0 $next_speciation = 0;
1257             }
1258              
1259             #Time of the first extinction event. If no extinction we always
1260             #set the extinction event after the current speciation event
1261 0 0       0 if ( defined $options{death_rate} ) {
1262 0         0 $next_extinction = -log(rand) / $options{death_rate} / $tree_size;
1263             }
1264             else {
1265 0         0 $next_extinction = $next_speciation + 1;
1266             }
1267              
1268             #While the tree has not become extinct and the termination criterion
1269             #has not been achieved we create new speciation and extinction events
1270 0   0     0 while ($tree_size > 0
      0        
1271             && $tree_size < $options{tree_size}
1272             && $time < $options{tree_age} )
1273             {
1274              
1275             #Add the time since the last event to all terminal species
1276 0         0 foreach (@terminals) {
1277             $_->set_branch_length(
1278             $_->get_branch_length + min(
1279             $next_extinction, $next_speciation,
1280 0         0 $options{tree_age} - $time
1281             )
1282             );
1283             }
1284              
1285             #Update the time
1286 0         0 $time += min( $next_extinction, $next_speciation );
1287              
1288             #If the tree exceeds the time limit we are done
1289 0 0       0 return $tree if ( $time > $options{tree_age} );
1290              
1291             #Get the species effected by this event and remove it from the terminals list
1292 0         0 my $effected =
1293             splice( @terminals, int( rand( scalar @terminals ) ), 1 );
1294              
1295             #If we have a speciation event we add two new species
1296 0 0 0     0 if ( $next_speciation < $next_extinction || !defined $next_extinction )
1297             {
1298 0         0 foreach ( 1, 2 ) {
1299              
1300             #Create a new species
1301 0         0 my $child = Bio::Phylo::Forest::Node->new();
1302 0         0 $child->set_name( 'ID' . $node_id++ );
1303              
1304             #Give it a zero edge length
1305 0         0 $child->set_branch_length(0);
1306              
1307             #Add it as a child to the speciating species
1308 0         0 $effected->set_child($child);
1309              
1310             #Add it to the tree
1311 0         0 $tree->insert($child);
1312              
1313             #Add it to the terminals list
1314 0         0 push( @terminals, $child );
1315             }
1316             }
1317              
1318             #We calculate the time that the next extinction and speciation
1319             #events will occur (only the earliest of these will actually
1320             #happen). NB: this approach is only appropriate for models where
1321             #speciation and extinction times are exponentially distributed.
1322             #Windows sometimes returns 0 values for rand...
1323 0         0 my ( $r1, $r2 ) = ( 0, 0 );
1324 0         0 $r1 = rand until $r1;
1325 0         0 $r2 = rand until $r2;
1326 0         0 $tree_size = scalar @terminals;
1327 0 0       0 return $tree unless $tree_size;
1328            
1329 0         0 $options{birth_rate} = max(0,$options{max_birth_rate}*(1-$tree_size/$options{K_dash}));
1330 0 0       0 if ($options{birth_rate}==0)
1331             {
1332 0         0 $next_speciation = $options{tree_age};
1333             } else
1334             {
1335 0         0 $next_speciation = -log($r1) / $options{birth_rate} / $tree_size;
1336             }
1337 0 0       0 if ( defined $options{death_rate} ) {
1338 0         0 $next_extinction = -log($r2) / $options{death_rate} / $tree_size;
1339             }
1340             else {
1341 0         0 $next_extinction = $next_speciation + 1;
1342             }
1343             }
1344 0         0 return $tree;
1345             }
1346              
1347              
1348             =item constant_rate_birth_death()
1349              
1350             A temporal shift birth and death model
1351              
1352             Type : Evolutionary model
1353             Title : temporal_shift_birth_death
1354             Usage : $tree = constant_rate_birth_death(%options)
1355             Function: Produces a tree from the model terminating at a given size/time
1356             Returns : Bio::Phylo::Forest::Tree
1357             Args : %options with fields:
1358             birth_rates The birth rates
1359             death_rates The death rates
1360             rate_times The times after which the rates apply (first element must be 0)
1361             tree_size The size of the tree at which to terminate
1362             tree_age The age of the tree at which to terminate
1363              
1364             NB: At least one of tree_size and tree_age must be specified
1365              
1366             =cut
1367              
1368             sub temporal_shift_birth_death {
1369 0     0 0 0 my %options = @_;
1370              
1371             #Check that we have a termination condition
1372 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1373              
1374             #Error here.
1375 0         0 return undef;
1376             }
1377              
1378             #Set the undefined condition to infinity
1379 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1380 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1381              
1382             #Each node gets an ID number this tracks these
1383 0         0 my $node_id = 0;
1384              
1385             #Create a new tree with a root, start the list of terminal species
1386 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1387 0         0 my $root = Bio::Phylo::Forest::Node->new();
1388 0         0 $root->set_branch_length(0);
1389 0         0 $root->set_name( 'ID' . $node_id++ );
1390 0         0 $tree->insert($root);
1391 0         0 my @terminals = ($root);
1392 0         0 my ( $next_extinction, $next_speciation );
1393 0         0 my $time = 0;
1394 0         0 my $tree_size = 1;
1395              
1396             #Load current rates
1397 0         0 my $birth_rate = $options{birth_rates}[0];
1398 0         0 my $death_rate = $options{death_rates}[0];
1399 0         0 my $current_rates = 0;
1400 0         0 my $next_rate_change = $options{rate_times}[$current_rates+1];
1401            
1402             #Add an additional time to the end of the rate change times to simplify checking
1403 0         0 push(@{$options{rate_times}},$options{tree_size}*2);
  0         0  
1404              
1405             #Check whether we have a non-zero root edge
1406 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1407              
1408             #Non-zero root. We set the time to the first speciation event
1409 0         0 $next_speciation = -log(rand) / $birth_rate / $tree_size;
1410             }
1411             else {
1412              
1413             #Zero root, we want a speciation event straight away
1414 0         0 $next_speciation = 0;
1415             }
1416              
1417             # print "RATES:".$time."|".$birth_rate."|".$death_rate."\n";
1418             #Time of the first extinction event. If no extinction we always
1419             #set the extinction event after the current speciation event
1420 0         0 $next_extinction = -log(rand) / $death_rate / $tree_size;
1421              
1422             #While the tree has not become extinct and the termination criterion
1423             #has not been achieved we create new speciation and extinction events
1424 0   0     0 while ($tree_size > 0
      0        
1425             && $tree_size < $options{tree_size}
1426             && $time < $options{tree_age} )
1427             {
1428              
1429             # print "TIMES:".$next_extinction."|".$next_speciation."|".$next_rate_change."|\n";
1430             # print "RATES:".$time."|".$birth_rate."|".$death_rate."\n";
1431             #Add the time since the last event to all terminal species
1432 0         0 foreach (@terminals) {
1433             $_->set_branch_length(
1434             $_->get_branch_length + min(
1435             $next_extinction, $next_speciation,
1436 0         0 $options{tree_age} - $time
1437             )
1438             );
1439             }
1440              
1441             #Update the time
1442 0         0 my $time_last = $time;
1443 0         0 $time += min( $next_extinction, $next_speciation, $next_rate_change-$time_last);
1444              
1445             #If the tree exceeds the time limit we are done
1446 0 0       0 return $tree if ( $time > $options{tree_age} );
1447              
1448            
1449 0 0       0 if ($next_rate_change-$time_last < min( $next_extinction, $next_speciation) )
1450             {
1451 0         0 $current_rates += 1;
1452 0         0 $birth_rate = $options{birth_rates}[$current_rates];
1453 0         0 $death_rate = $options{death_rates}[$current_rates];
1454 0         0 $next_rate_change = $options{rate_times}[$current_rates+1];
1455            
1456             } else
1457             {
1458            
1459              
1460             #Get the species effected by this event and remove it from the terminals list
1461 0         0 my $effected = splice( @terminals, int( rand( scalar @terminals ) ), 1 );
1462              
1463             #If we have a speciation event we add two new species
1464 0 0 0     0 if ( $next_speciation < $next_extinction || !defined $next_extinction )
1465             {
1466 0         0 foreach ( 1, 2 ) {
1467              
1468             #Create a new species
1469 0         0 my $child = Bio::Phylo::Forest::Node->new();
1470 0         0 $child->set_name( 'ID' . $node_id++ );
1471              
1472             #Give it a zero edge length
1473 0         0 $child->set_branch_length(0);
1474              
1475             #Add it as a child to the speciating species
1476 0         0 $effected->set_child($child);
1477              
1478             #Add it to the tree
1479 0         0 $tree->insert($child);
1480              
1481             #Add it to the terminals list
1482 0         0 push( @terminals, $child );
1483             }
1484             }
1485             }
1486              
1487             #We calculate the time that the next extinction and speciation
1488             #events will occur (only the earliest of these will actually
1489             #happen). NB: this approach is only appropriate for models where
1490             #speciation and extinction times are exponentially distributed.
1491             #Windows sometimes returns 0 values for rand...
1492 0         0 my ( $r1, $r2 ) = ( 0, 0 );
1493 0         0 $r1 = rand until $r1;
1494 0         0 $r2 = rand until $r2;
1495 0         0 $tree_size = scalar @terminals;
1496 0 0       0 return $tree unless $tree_size;
1497 0         0 $next_speciation = -log($r1) / $birth_rate / $tree_size;
1498 0         0 $next_extinction = -log($r2) / $death_rate / $tree_size;
1499            
1500 0 0       0 if ((scalar @terminals)%100==0)
1501             {
1502 0         0 print $time."|".@terminals."|\n";
1503             }
1504              
1505             }
1506 0         0 return $tree;
1507             }
1508              
1509             =item evolving_speciation_rate()
1510              
1511             An evolutionary model featuring evolving speciation rates. Each daughter
1512             species is assigned its parent's speciation rate multiplied by a normally
1513             distributed noise factor.
1514              
1515             Type : Evolutionary model
1516             Title : evolving_speciation_rate
1517             Usage : $tree = evolving_speciation_rate(%options)
1518             Function: Produces a tree from the model terminating at a given size/time
1519             Returns : Bio::Phylo::Forest::Tree
1520             Args : %options with fields:
1521             birth_rate The initial speciation rate (default 1)
1522             evolving_std The standard deviation of the normal distribution
1523             from which the rate multiplier is drawn.
1524             tree_size The size of the tree at which to terminate
1525             tree_age The age of the tree at which to terminate
1526              
1527             NB: At least one of tree_size and tree_age must be specified
1528              
1529             =cut
1530              
1531             sub evolving_speciation_rate {
1532 0     0 1 0 my %options = @_;
1533              
1534             #Check that we have a termination condition
1535 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1536              
1537             #Error here.
1538 0         0 return undef;
1539             }
1540              
1541             #Set the undefined condition to infinity
1542 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1543 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1544              
1545             #Set default rates
1546 0 0       0 $options{birth_rate} = 1 unless defined( $options{birth_rate} );
1547 0 0       0 $options{evolving_std} = 1 unless defined( $options{evolving_std} );
1548              
1549             #Each node gets an ID number this tracks these
1550 0         0 my $node_id = 0;
1551              
1552             #Create a new tree with a root, start the list of terminal species
1553 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1554 0         0 my $root = Bio::Phylo::Forest::Node->new();
1555 0         0 $root->set_branch_length(0);
1556 0         0 $root->set_name( 'ID' . $node_id++ );
1557 0         0 $tree->insert($root);
1558 0         0 my @terminals = ($root);
1559 0         0 my @birth_rates = ( $options{birth_rate} );
1560 0         0 my $net_rate = $options{birth_rate};
1561 0         0 my $next_speciation;
1562 0         0 my $time = 0;
1563 0         0 my $tree_size = 1;
1564              
1565             #Check whether we have a non-zero root edge
1566 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1567              
1568             #Non-zero root. We set the time to the first speciation event
1569 0         0 $next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
1570             }
1571             else {
1572              
1573             #Zero root, we want a speciation event straight away
1574 0         0 $next_speciation = 0;
1575             }
1576              
1577             #While we haven't reached termination
1578 0   0     0 while ( $tree_size < $options{tree_size} && $time < $options{tree_age} ) {
1579              
1580             #Add the time since the last event to all terminal species
1581 0         0 foreach (@terminals) {
1582             $_->set_branch_length( $_->get_branch_length +
1583 0         0 min( $next_speciation, $options{tree_age} - $time ) );
1584             }
1585              
1586             #Update the time
1587 0         0 $time += $next_speciation;
1588              
1589             #If the tree exceeds the time limit we are done
1590 0 0       0 return $tree if ( $time > $options{tree_age} );
1591              
1592             #Get the species effected by this event
1593 0         0 my $rand_select = rand($net_rate);
1594 0         0 my $selected = 0;
1595 0   0     0 for (
1596             ;
1597             $selected < scalar @terminals
1598             && $rand_select > $birth_rates[$selected] ;
1599             $selected++
1600             )
1601             {
1602 0         0 $rand_select -= $birth_rates[$selected];
1603             }
1604              
1605             #Remove it from the terminals list
1606 0         0 my $effected = splice( @terminals, $selected, 1 );
1607 0         0 my $effected_rate = splice( @birth_rates, $selected, 1 );
1608              
1609             #Update the net speciation rate
1610 0         0 $net_rate -= $effected_rate;
1611              
1612             #If we have a speciation event we add two new species
1613 0         0 foreach ( 1, 2 ) {
1614              
1615             #Create a new species
1616 0         0 my $child = Bio::Phylo::Forest::Node->new();
1617 0         0 $child->set_name( 'ID' . $node_id++ );
1618              
1619             #Give it a zero edge length
1620 0         0 $child->set_branch_length(0);
1621              
1622             #Add it as a child to the speciating species
1623 0         0 $effected->set_child($child);
1624              
1625             #Add it to the tree
1626 0         0 $tree->insert($child);
1627              
1628             #Add it to the terminals list
1629 0         0 push( @terminals, $child );
1630              
1631             #New speciation rate
1632             my $new_speciation_rate =
1633 0         0 $effected_rate * ( 1 + qnorm(rand) * $options{evolving_std} );
1634 0 0       0 if ( $new_speciation_rate < 0 ) { $new_speciation_rate = 0; }
  0         0  
1635 0         0 push( @birth_rates, $new_speciation_rate );
1636 0         0 $net_rate += $new_speciation_rate;
1637             }
1638              
1639             # $net_rate = 0;
1640             # foreach (@birth_rates) { $net_rate += $_; }
1641             #Windows sometimes returns 0 values for rand...
1642 0         0 my ( $r1, $r2 ) = ( 0, 0 );
1643 0         0 $r1 = rand until $r1;
1644 0         0 $tree_size = scalar @terminals;
1645              
1646             #If all species have stopped speciating (unlikely)
1647 0 0       0 if ( $net_rate == 0 ) {
1648 0         0 return $tree;
1649             }
1650 0         0 $next_speciation = -log($r1) / $net_rate / $tree_size;
1651 0 0       0 return $tree unless $tree_size;
1652             }
1653 0         0 return $tree;
1654             }
1655              
1656              
1657             =item clade_shifts()
1658              
1659             A constant rate birth-death model with punctuated changes in the speciation
1660             and extinction rates. At each change one lineage receives new pre-specified
1661             speciation and extinction rates.
1662              
1663             Type : Evolutionary model
1664             Title : clade_shifts
1665             Usage : $tree = clade_shifts(%options)
1666             Function: Produces a tree from the model terminating at a given size/time
1667             Returns : Bio::Phylo::Forest::Tree
1668             Args : %options with fields:
1669             birth_rates The speciation rates
1670             death_rates The death rates
1671             rate_times The times at which the rates are introduced to a new
1672             clade. The first time should be zero. The remaining must be in
1673             ascending order.
1674             tree_size The size of the tree at which to terminate
1675             tree_age The age of the tree at which to terminate
1676              
1677             NB: At least one of tree_size and tree_age must be specified
1678              
1679             =cut
1680              
1681             sub clade_shifts {
1682 0     0 1 0 my %options = @_;
1683              
1684             #Check that we have a termination condition
1685 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1686              
1687             #Error here.
1688 0         0 return undef;
1689             }
1690              
1691             #Set the undefined condition to infinity
1692 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1693 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1694              
1695             #Each node gets an ID number this tracks these
1696 0         0 my $node_id = 0;
1697              
1698             #Create a new tree with a root, start the list of terminal species
1699 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1700 0         0 my $root = Bio::Phylo::Forest::Node->new();
1701 0         0 $root->set_branch_length(0);
1702 0         0 $root->set_name( 'ID' . $node_id++ );
1703 0         0 $tree->insert($root);
1704            
1705             #rates
1706 0         0 my @birth_rates_in = @{$options{birth_rates}};
  0         0  
1707 0         0 my @death_rates_in = @{$options{death_rates}};
  0         0  
1708 0         0 my @rate_times_in = @{$options{rate_times}};
  0         0  
1709            
1710 0 0       0 if ($rate_times_in[0] != 0)
1711             {
1712 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
1713             "The first rate time must be 0" );
1714             }
1715 0 0       0 if (scalar @birth_rates_in != scalar @death_rates_in)
1716             {
1717 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
1718             "birth and death rates must have the same length" );
1719             }
1720 0 0       0 if (scalar @birth_rates_in != scalar @rate_times_in)
1721             {
1722 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
1723             "birth/death rates must have the same length as rate times" );
1724             }
1725 0         0 my @birth_rates = ($birth_rates_in[0]);
1726 0         0 my @death_rates = ($death_rates_in[0]);
1727            
1728 0         0 my $net_birth_rate = $birth_rates[0];
1729 0         0 my $net_death_rate = $death_rates[0];
1730            
1731 0         0 shift(@birth_rates_in);
1732 0         0 shift(@death_rates_in);
1733            
1734 0         0 my @terminals = ($root);
1735 0         0 my ( $next_extinction, $next_speciation, $next_rate_change );
1736 0         0 my $time = 0;
1737 0         0 my $tree_size = 1;
1738 0         0 my $inf = 9**9**9**9;
1739              
1740             #Check whether we have a non-zero root edge
1741 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1742              
1743             #Non-zero root. We set the time to the first speciation event
1744 0 0       0 if ($birth_rates[0] > 0)
1745             {
1746 0         0 $next_speciation = -log(rand) / $birth_rates[0] ;
1747             } else
1748             {
1749 0         0 $next_speciation = $inf;
1750             }
1751             }
1752             else {
1753             #Zero root, we want a speciation event straight away
1754 0         0 $next_speciation = 0.0;
1755             }
1756              
1757             #Time of the first extinction event. If no extinction we always
1758             #set the extinction event after the current speciation event
1759 0 0       0 if ($death_rates[0] > 0)
1760             {
1761 0         0 $next_extinction = -log(rand) / $death_rates[0];
1762             } else
1763             {
1764 0         0 $next_extinction = $inf;
1765             }
1766            
1767             #Time of next rate change
1768 0         0 shift(@rate_times_in); #pop the initial 0
1769 0         0 $next_rate_change = shift(@rate_times_in);
1770            
1771             #While the tree has not become extinct and the termination criterion
1772             #has not been achieved we create new speciation and extinction events
1773 0   0     0 while ($tree_size > 0
      0        
1774             && $tree_size < $options{tree_size}
1775             && $time < $options{tree_age} )
1776             {
1777             #print $time."|".$tree_size."\n";
1778             #Update rates if a clade shift is happening
1779             #TODO index rates or pop off one at a time.
1780            
1781              
1782             #Add the time since the last event to all terminal species
1783 0         0 foreach (@terminals) {
1784             $_->set_branch_length(
1785             $_->get_branch_length + min(
1786             $next_extinction,
1787             $next_speciation,
1788             $next_rate_change-$time,
1789 0         0 $options{tree_age} - $time
1790             )
1791             );
1792             }
1793              
1794             #Update the time
1795 0         0 my $time_last = $time;
1796 0         0 $time += min( $next_extinction, $next_speciation, $next_rate_change-$time_last );
1797              
1798             #If the tree exceeds the time limit we are done
1799 0 0       0 return $tree if ( $time > $options{tree_age} );
1800            
1801             #We have a rate change
1802 0 0       0 if ($next_rate_change-$time_last < min($next_extinction, $next_speciation))
1803             {
1804             #Find a random species to effect
1805 0         0 my $effected_species = int(rand($tree_size));
1806             #Subtract current rates
1807 0         0 $net_death_rate -= $death_rates[$effected_species];
1808 0         0 $net_birth_rate -= $birth_rates[$effected_species];
1809             #Get new rates
1810 0         0 $death_rates[$effected_species] = shift(@death_rates_in);
1811 0         0 $birth_rates[$effected_species] = shift(@birth_rates_in);
1812             #Add new rates
1813 0         0 $net_death_rate += $death_rates[$effected_species];
1814 0         0 $net_birth_rate += $birth_rates[$effected_species];
1815             #Get next rate change time
1816 0 0       0 if (scalar(@rate_times_in))
1817             {
1818 0         0 $next_rate_change = shift(@rate_times_in);
1819             } else
1820             {
1821 0         0 $next_rate_change = $inf;
1822             }
1823             }
1824             #Choosing a random species to speciate
1825             else
1826             {
1827 0         0 my $selected = 0;
1828 0 0       0 if ( $next_speciation < $next_extinction )
1829             {
1830 0         0 my $rand_select = rand($net_birth_rate);
1831 0   0     0 for (
1832             ;
1833             $selected < scalar @terminals
1834             && $rand_select > $birth_rates[$selected] ;
1835             $selected++
1836             )
1837             {
1838 0         0 $rand_select -= $birth_rates[$selected];
1839             }
1840             } else
1841             {
1842 0         0 my $rand_select = rand($net_death_rate);
1843 0   0     0 for (
1844             ;
1845             $selected < scalar @terminals
1846             && $rand_select > $death_rates[$selected] ;
1847             $selected++
1848             )
1849             {
1850 0         0 $rand_select -= $death_rates[$selected];
1851             }
1852             }
1853 0 0       0 if ($net_birth_rate == 0)
1854             {
1855 0         0 $selected = 0;
1856             }
1857              
1858             #Remove the species effected by this event and remove it from the terminals list
1859 0         0 my $effected = splice( @terminals, $selected, 1 );
1860 0         0 my $effected_birth_rate = splice( @birth_rates, $selected, 1 );
1861 0         0 my $effected_death_rate = splice( @death_rates, $selected, 1 );
1862            
1863 0         0 $net_birth_rate -= $effected_birth_rate;
1864 0         0 $net_death_rate -= $effected_death_rate;
1865              
1866             #If we have a speciation event we add two new species
1867 0 0 0     0 if ( $next_speciation < $next_extinction || !defined $next_extinction )
1868             {
1869 0         0 foreach ( 1, 2 ) {
1870              
1871             #Create a new species
1872 0         0 my $child = Bio::Phylo::Forest::Node->new();
1873 0         0 $child->set_name( 'ID' . $node_id++ );
1874              
1875             #Give it a zero edge length
1876 0         0 $child->set_branch_length(0);
1877              
1878             #Add it as a child to the speciating species
1879 0         0 $effected->set_child($child);
1880              
1881             #Add it to the tree
1882 0         0 $tree->insert($child);
1883              
1884             #Add it to the terminals list
1885 0         0 push( @terminals, $child );
1886            
1887 0         0 push( @birth_rates, $effected_birth_rate );
1888 0         0 push( @death_rates, $effected_death_rate );
1889            
1890 0         0 $net_death_rate += $effected_death_rate;
1891 0         0 $net_birth_rate += $effected_birth_rate;
1892            
1893             }
1894             }
1895             }
1896             #We calculate the time that the next extinction and speciation
1897             #events will occur (only the earliest of these will actually
1898             #happen). NB: this approach is only appropriate for models where
1899             #speciation and extinction times are exponentially distributed.
1900            
1901             #Windows sometimes returns 0 values for rand...
1902 0         0 my ( $r1, $r2 ) = ( 0, 0 );
1903 0         0 $r1 = rand until $r1;
1904 0         0 $r2 = rand until $r2;
1905            
1906             #The current tree size
1907 0         0 $tree_size = scalar @terminals;
1908            
1909 0 0       0 return $tree unless $tree_size;
1910            
1911 0 0       0 if ($net_birth_rate > 0)
1912             {
1913 0         0 $next_speciation = -log($r1) / $net_birth_rate;
1914             } else
1915             {
1916 0         0 $next_speciation = $inf;
1917             }
1918            
1919 0 0       0 if ($net_death_rate > 0)
1920             {
1921 0         0 $next_extinction = -log($r2) / $net_death_rate;
1922             } else
1923             {
1924 0         0 $next_extinction = $inf;
1925             }
1926              
1927             }
1928 0         0 return $tree;
1929             }
1930              
1931             =item beta_binomial()
1932              
1933             An evolutionary model featuring evolving speciation rates. From Blum2007
1934              
1935             Type : Evolutionary model
1936             Title : beta_binomial
1937             Usage : $tree = beta_binomial(%options)
1938             Function: Produces a tree from the model terminating at a given size/time
1939             Returns : Bio::Phylo::Forest::Tree
1940             Args : %options with fields:
1941             birth_rate The initial speciation rate (default 1)
1942             model_param The parameter as defined in Blum2007
1943             tree_size The size of the tree at which to terminate
1944             tree_age The age of the tree at which to terminate
1945              
1946             NB: At least one of tree_size and tree_age must be specified
1947              
1948             =cut
1949              
1950             sub beta_binomial {
1951 0     0 1 0 my %options = @_;
1952              
1953             #Check that we have a termination condition
1954 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1955              
1956             #Error here.
1957 0         0 return undef;
1958             }
1959              
1960             #Set the undefined condition to infinity
1961 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1962 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1963              
1964             #Set default rates
1965 0 0       0 $options{birth_rate} = 1 unless defined( $options{birth_rate} );
1966 0 0       0 $options{model_param} = 0 unless defined( $options{model_param} );
1967              
1968             #Each node gets an ID number this tracks these
1969 0         0 my $node_id = 0;
1970              
1971             #Create a new tree with a root, start the list of terminal species
1972 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1973 0         0 my $root = Bio::Phylo::Forest::Node->new();
1974 0         0 $root->set_branch_length(0);
1975 0         0 $root->set_name( 'ID' . $node_id++ );
1976 0         0 $tree->insert($root);
1977 0         0 my @terminals = ($root);
1978 0         0 my @birth_rates = ( $options{birth_rate} );
1979 0         0 my $net_rate = $options{birth_rate};
1980 0         0 my $next_speciation;
1981 0         0 my $time = 0;
1982 0         0 my $tree_size = 1;
1983              
1984             #Check whether we have a non-zero root edge
1985 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1986              
1987             #Non-zero root. We set the time to the first speciation event
1988 0         0 $next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
1989             }
1990             else {
1991              
1992             #Zero root, we want a speciation event straight away
1993 0         0 $next_speciation = 0;
1994             }
1995              
1996             #While we haven't reached termination
1997 0   0     0 while ( $tree_size < $options{tree_size} && $time < $options{tree_age} ) {
1998              
1999             #Add the time since the last event to all terminal species
2000 0         0 foreach (@terminals) {
2001             $_->set_branch_length( $_->get_branch_length +
2002 0         0 min( $next_speciation, $options{tree_age} - $time ) );
2003             }
2004              
2005             #Update the time
2006 0         0 $time += $next_speciation;
2007              
2008             #If the tree exceeds the time limit we are done
2009 0 0       0 return $tree if ( $time > $options{tree_age} );
2010              
2011             #Get the species effected by this event
2012 0         0 my $rand_select = rand($net_rate);
2013 0         0 my $selected = 0;
2014 0   0     0 for (
2015             ;
2016             $selected < scalar @terminals
2017             && $rand_select > $birth_rates[$selected] ;
2018             $selected++
2019             )
2020             {
2021 0         0 $rand_select -= $birth_rates[$selected];
2022             }
2023              
2024             #Remove it from the terminals list
2025 0         0 my $effected = splice( @terminals, $selected, 1 );
2026 0         0 my $effected_rate = splice( @birth_rates, $selected, 1 );
2027             my $p =
2028 0         0 qbeta( rand, $options{model_param} + 1, $options{model_param} + 1 );
2029              
2030             #If we have a speciation event we add two new species
2031 0         0 foreach ( 1, 2 ) {
2032              
2033             #Create a new species
2034 0         0 my $child = Bio::Phylo::Forest::Node->new();
2035 0         0 $child->set_name( 'ID' . $node_id++ );
2036              
2037             #Give it a zero edge length
2038 0         0 $child->set_branch_length(0);
2039              
2040             #Add it as a child to the speciating species
2041 0         0 $effected->set_child($child);
2042              
2043             #Add it to the tree
2044 0         0 $tree->insert($child);
2045              
2046             #Add it to the terminals list
2047 0         0 push( @terminals, $child );
2048              
2049             #New speciation rate
2050 0         0 my $new_speciation_rate = $effected_rate * $p;
2051 0         0 $p = 1 - $p;
2052 0 0       0 if ( $new_speciation_rate < 0 ) { $new_speciation_rate = 0; }
  0         0  
2053 0         0 push( @birth_rates, $new_speciation_rate );
2054             }
2055              
2056             #Windows sometimes returns 0 values for rand...
2057 0         0 my ( $r1, $r2 ) = ( 0, 0 );
2058 0         0 $r1 = rand until $r1;
2059 0         0 $tree_size = scalar @terminals;
2060              
2061             #If all species have stopped speciating (unlikely)
2062 0 0       0 if ( $net_rate == 0 ) {
2063 0         0 return $tree;
2064             }
2065 0         0 $next_speciation = -log($r1) / $net_rate / $tree_size;
2066 0 0       0 return $tree unless $tree_size;
2067             }
2068 0         0 return $tree;
2069             }
2070              
2071             =back
2072              
2073             =cut
2074              
2075             =begin comment
2076              
2077             ###########################################################
2078             #INTERNAL METHODS
2079             #These are methods that permit additional manipulations of
2080             #a Bio::Phylo::Tree to be easily made. As such some of
2081             #these could easily be moved into Bio::Phylo::Forest::Tree
2082             ###########################################################
2083              
2084             =end comment
2085              
2086             =cut
2087              
2088             =begin comment
2089              
2090             Type : Internal method
2091             Title : copy_tree
2092             Usage : $tree = copy_tree($tree)
2093             Function: Makes a new independent copy of a tree
2094             Returns : the phylogenetic $tree
2095             Args : the phylogenetic $tree
2096              
2097             =end comment
2098              
2099             =cut
2100              
2101             sub copy_tree {
2102 15     15 0 112 return Bio::Phylo::IO->parse(
2103             -format => 'newick',
2104             -string => shift->to_newick( '-nodelabels' => 1 )
2105             )->first;
2106             }
2107              
2108             =begin comment
2109              
2110             Type : Internal method
2111             Title : truncate_tree_time
2112             Usage : truncate_tree_time($tree,$age)
2113             Function: Truncates the tree at the specified age
2114             Returns : N/A
2115             Args : $tree: the phylogenetic tree (which will be modified)
2116             $age: the age at which to cut the $tree
2117              
2118             =end comment
2119              
2120             =cut
2121              
2122             sub truncate_tree_time {
2123              
2124             #$node and $time are used only by this function recursively
2125 1127     1127 0 2237 my ( $tree, $age, $node, $time ) = @_;
2126              
2127             #If node and time weren't specified we are starting from the root
2128 1127 100       2104 $node = $tree->get_root unless defined $node;
2129 1127 100       1743 $time = 0 unless defined $time;
2130              
2131             #If we are truncating this branch
2132 1127 100       1967 if ( $time + $node->get_branch_length >= $age ) {
2133              
2134             #Collapse the node unless it is terminal
2135 257 100       605 $node->collapse unless $node->is_terminal();
2136              
2137             #Set the branch length appropriately
2138 257         941 $node->set_branch_length( $age - $time );
2139 257         888 return;
2140             }
2141              
2142             #If this node has no children we are done
2143 870 100       1994 return if $node->is_terminal();
2144              
2145             #Call the function recursively on the children
2146 504         776 foreach ( @{ $node->get_children } ) {
  504         941  
2147 1112         2480 truncate_tree_time( $tree, $age, $_,
2148             $time + $node->get_branch_length() );
2149             }
2150             }
2151              
2152             =begin comment
2153              
2154             Type : Internal method
2155             Title : truncate_tree_size
2156             Usage : truncate_tree_size($tree,$size)
2157             Function: Truncates the tree to the specified number of species
2158             Returns : N/A
2159             Args : $tree: the phylogenetic tree (which will be modified)
2160             $size: random species are delete so that the tree has this many terminals
2161              
2162             =end comment
2163              
2164             =cut
2165              
2166             sub truncate_tree_size {
2167 5     5 0 14 my ( $tree, $size ) = @_;
2168 5         11 my @terminals = @{ $tree->get_terminals };
  5         28  
2169 5         32 my @names;
2170              
2171             #Calculate the tree height and node distances from the root
2172             #much more efficient to do this in one hit than repeatedly
2173             #calling the analogous functions on the tree
2174 5         31 _calc_node_properties($tree);
2175              
2176             #Only push species that are extant and store the number of those
2177             # my $tree_height = $tree->get_tallest_tip->calc_path_to_root;
2178 5         19 my $tree_height = $tree->get_root->get_generic('tree_height');
2179 5         23 foreach (@terminals) {
2180 631 100       1333 if (
2181             abs(
2182             ( $_->get_generic('root_distance') - $tree_height ) /
2183             $tree_height
2184             ) < 1e-6
2185             )
2186             {
2187 147         335 push( @names, $_->get_name );
2188             }
2189             }
2190 5 50       20 if ( @names < $size ) { print "Internal error\n"; }
  0         0  
2191 5         14 my %deletions;
2192 5         25 while ( scalar( keys %deletions ) < @names - $size ) {
2193 172         422 $deletions{ $names[ int( rand(@names) ) ] } = 1;
2194             }
2195 5         51 $tree = prune_tips( $tree, [ keys %deletions ] );
2196 5         161 return $tree;
2197             }
2198              
2199             sub _get_ultrametric_size {
2200 0     0   0 my ( $tree, $size ) = @_;
2201 0         0 my @terminals = @{ $tree->get_terminals };
  0         0  
2202 0         0 _calc_node_properties($tree);
2203 0         0 my @names;
2204 0         0 my $tree_height = $tree->get_root->get_generic('tree_height');
2205 0         0 foreach (@terminals) {
2206 0 0       0 if (
2207             abs(
2208             ( $_->get_generic('root_distance') - $tree_height ) /
2209             $tree_height
2210             ) < 1e-6
2211             )
2212             {
2213 0         0 push( @names, $_->get_name );
2214             }
2215             }
2216 0         0 return scalar @names;
2217             }
2218              
2219             =begin comment
2220              
2221             Type : Internal method
2222             Title : remove_extinct_species
2223             Usage : remove_extinct_species($tree)
2224             Function: Removes extinct species from the tree. An extinct species
2225             is a terminal that does not extend as far as the furthest
2226             terminal(s).
2227             Returns : N/A
2228             Args : $tree: the phylogenetic tree (which will be modified)
2229             $age: the age at which to cut the $tree
2230              
2231             =end comment
2232              
2233             =cut
2234              
2235             sub remove_extinct_species {
2236 0     0 0 0 my $tree = shift;
2237              
2238             #Calculate the tree height and node distances from the root
2239             #much more efficient to do this in one hit than repeatedly
2240             #calling the analogous functions on the tree
2241 0         0 _calc_node_properties($tree);
2242 0         0 my $height = $tree->get_root->get_generic('tree_height');
2243 0 0       0 return unless $height > 0;
2244 0         0 my $leaves = $tree->get_terminals;
2245 0 0       0 return unless $leaves;
2246 0         0 my @remove;
2247 0         0 foreach ( @{$leaves} ) {
  0         0  
2248              
2249 0 0       0 unless (
2250             abs( ( $_->get_generic('root_distance') - $height ) / $height ) <
2251             1e-6 )
2252             {
2253 0         0 push( @remove, $_->get_name );
2254             }
2255             }
2256 0         0 $tree = prune_tips( $tree, \@remove );
2257 0         0 return $tree;
2258             }
2259              
2260             =begin comment
2261              
2262             Type : Internal method
2263             Title : prune_tips
2264             Usage : prune_tips($tree,$tips)
2265             Function: Removes named terminals from the tree
2266             Returns : N/A
2267             Args : $tree: the phylogenetic tree (which will be modified)
2268             $tips: array ref of terminals to remove from the tree
2269              
2270             NB: Available as $tree->prune_tips($tips), but had some problems with
2271             this.
2272              
2273             =end comment
2274              
2275             =cut
2276              
2277             sub prune_tips {
2278 5     5 0 18 my ( $self, $tips ) = @_;
2279 5         12 my %names_to_delete = map { $_ => 1 } @{$tips};
  97         175  
  5         17  
2280 534         901 my %keep = map { $_->get_name => 1 }
2281 631         1228 grep { not exists $names_to_delete{ $_->get_name } }
2282 5         13 @{ $self->get_terminals };
  5         80  
2283             $self->visit_depth_first(
2284             -post => sub {
2285 1196     1196   1774 my $node = shift;
2286 1196 100       2562 if ( $node->is_terminal ) {
2287 631 100       1617 if ( not $keep{ $node->get_name } ) {
2288 97         368 $node->set_parent();
2289 97         339 $self->delete($node);
2290             }
2291             }
2292             else {
2293 565         1008 my $seen_tip_to_keep = 0;
2294 565         843 for my $tip ( @{ $node->get_terminals } ) {
  565         1538  
2295 11941 100       22647 $seen_tip_to_keep++ if $keep{ $tip->get_name };
2296             }
2297 565 100       3371 if ( not $seen_tip_to_keep ) {
2298 29         118 $node->set_parent();
2299 29         118 $self->delete($node);
2300             }
2301             }
2302             }
2303 5         130 );
2304 5         107 $self->remove_unbranched_internals;
2305 5         248 return $self;
2306             }
2307              
2308             =begin comment
2309              
2310             Type : Internal method
2311             Title : lineage_through_time
2312             Usage : ($time,$count) = lineage_through_time($tree)
2313             Function: Alternative to $tree->ltt that permits extinctions
2314             Returns : $time: array ref of times
2315             $count: array ref of species counts corresponding to the times
2316             Args : the phylogenetic $tree for which to produce the ltt data
2317              
2318             =end comment
2319              
2320             =cut
2321              
2322             sub lineage_through_time {
2323 60     60 0 132 my $tree = shift;
2324 60         231 my ( $speciation, $extinction ) = _recursive_ltt_helper($tree);
2325 60         145 my @speciation = sort { $a <=> $b } @{$speciation};
  9823         10713  
  60         414  
2326 60         127 my @extinction = sort { $a <=> $b } @{$extinction};
  10362         11127  
  60         152  
2327 60         169 my @time = (0);
2328 60         125 my @count = (1);
2329 60         94 my $n_species = 1;
2330 60         257 my $end_time = max( @speciation, @extinction );
2331 60 50       194 return ( [], [] ) if ( $end_time == 0 );
2332              
2333             #We remove any extinction events occurring at the very end of the tree (as they are not real extinctions)
2334 60   100     348 while ( scalar @extinction
2335             && ( $end_time - $extinction[-1] ) / $end_time < 1e-6 )
2336             {
2337 690         1756 pop @extinction;
2338             }
2339 60   100     213 while ( scalar @speciation || scalar @extinction ) {
2340 3854 100 100     10851 if ( scalar @extinction == 0
      100        
2341             || ( scalar @speciation && $speciation[0] < $extinction[0] ) )
2342             {
2343 2242         2881 push( @count, ++$n_species );
2344 2242         4379 push( @time, shift(@speciation) );
2345             }
2346             else {
2347 1612         2173 push( @count, --$n_species );
2348 1612         3108 push( @time, shift(@extinction) );
2349             }
2350             }
2351 60         374 return ( \@time, \@count );
2352             }
2353              
2354             =begin comment
2355              
2356             Type : Internal method
2357             Title : _recursive_ltt_helper
2358             Usage : ($speciation, $extinction) = _recursive_ltt_helper($tree)
2359             Function: Helper for lineage_through_time
2360             Returns : $speciation: array ref of speciation times
2361             $extinction: array ref of extinction times
2362             Args : the phylogenetic $tree for which to produce the ltt data
2363              
2364             =end comment
2365              
2366             =cut
2367              
2368             sub _recursive_ltt_helper {
2369 4544     4544   7039 my ( $tree, $node, $time ) = @_;
2370              
2371             #If we are being invoked at the root level
2372 4544 100       7820 $node = $tree->get_root unless defined $node;
2373 4544 100       6945 $time = 0 unless defined $time;
2374              
2375             #The new time
2376 4544         8861 $time += $node->get_branch_length;
2377 4544 100       9381 return ( [], [$time] ) if ( $node->is_terminal );
2378 2242         3879 my @speciation;
2379             my @extinction;
2380 2242         2703 foreach ( @{ $node->get_children } ) {
  2242         3962  
2381 4484         8686 my ( $spec, $ext ) = _recursive_ltt_helper( $tree, $_, $time );
2382 4484         6998 @speciation = ( @speciation, @{$spec} );
  4484         10506  
2383 4484         6078 @extinction = ( @extinction, @{$ext} );
  4484         16186  
2384             }
2385 2242         4176 push( @speciation, $time );
2386 2242         4304 return ( \@speciation, \@extinction );
2387             }
2388              
2389             =begin comment
2390              
2391             Type : Internal method.
2392             Title : _calc_node_properties
2393             Usage : _calc_node_properties($tree);
2394             Function: Calculates the distance of nodes from the root
2395             Returns : The maximum distance from the root
2396             Args :
2397              
2398             =end comment
2399              
2400             =cut
2401              
2402             sub _calc_node_properties {
2403 1196     1196   2197 my ( $node, $root_distance );
2404 1196         1671 my $tree = shift;
2405 1196         3309 my $root = $tree->get_root;
2406              
2407             #Check whether we were given a node and distance
2408 1196 100       3084 if ( scalar @_ ) {
2409 1191         2141 $node = shift;
2410 1191         1651 $root_distance = shift;
2411              
2412             #Otherwise the root is the default
2413             }
2414             else {
2415 5         14 $node = $root;
2416 5         58 $root->set_generic( tree_height => 0 );
2417 5         13 $root_distance = 0;
2418             }
2419 1196         4080 $node->set_generic( root_distance => $root_distance );
2420 1196 100       3525 if ( $root_distance > $root->get_generic('tree_height') ) {
2421 126         325 $root->set_generic( tree_height => $root_distance );
2422             }
2423 1196         2206 my $terminal_count = 0;
2424 1196         3100 my $children = $node->get_children;
2425 1196 50       3054 if ( defined $children ) {
2426 1196         1672 foreach ( @{$children} ) {
  1196         4085  
2427 1191         3235 _calc_node_properties( $tree, $_,
2428             $root_distance + $_->get_branch_length() );
2429             }
2430             }
2431             }
2432              
2433             =begin comment
2434              
2435             Type : Internal method
2436             Title : nchoosek
2437             Usage : $out = nchoosek($n,$k)
2438             Function: Returns the binomial coefficient for $n and $k
2439             Returns : the binomial coefficient
2440             Args : $n, $k
2441              
2442             =end comment
2443              
2444             =cut
2445              
2446             sub nchoosek {
2447 0     0 0   my ( $n, $k ) = @_;
2448 0           my $r = 1;
2449 0 0 0       return 0 if ( $k > $n || $k < 0 );
2450 0           for ( my $d = 1 ; $d <= $k ; $d++ ) {
2451 0           $r *= $n--;
2452 0           $r /= $d;
2453             }
2454 0           return $r;
2455             }
2456             1;