File Coverage

blib/lib/Bio/Phylo/EvolutionaryModels.pm
Criterion Covered Total %
statement 396 819 48.3
branch 93 258 36.0
condition 38 143 26.5
subroutine 34 46 73.9
pod 13 21 61.9
total 574 1287 44.6


line stmt bran cond sub pod time code
1             package Bio::Phylo::EvolutionaryModels;
2 1     1   807 use strict;
  1         3  
  1         26  
3 1     1   4 use base 'Exporter';
  1         2  
  1         83  
4 1     1   337 use Bio::Phylo::Forest::Tree;
  1         3  
  1         10  
5 1     1   376 use Bio::Phylo::Forest;
  1         3  
  1         9  
6 1     1   7 use Math::CDF qw'qnorm qbeta';
  1         2  
  1         62  
7 1     1   6 use List::Util qw'min max';
  1         1  
  1         56  
8 1     1   6 use POSIX qw'ceil floor';
  1         2  
  1         7  
9 1     1   1286 use Config; #Use to check whether multi-threading is available
  1         2  
  1         40  
10              
11             BEGIN {
12              
13 1     1   5 use Bio::Phylo;
  1         2  
  1         3  
14 1     1   1027 our @EXPORT_OK =
15             qw(&sample &constant_rate_birth &constant_rate_birth_death &clade_shifts);
16             }
17              
18             =head1 NAME
19              
20             Bio::Phylo::EvolutionaryModels - Evolutionary models for phylogenetic trees and methods to sample these
21             Klaas Hartmann, September 2007
22              
23             =head1 SYNOPSIS
24              
25             #For convenience we import the sample routine (so we can write sample(...) instead of
26             #Bio::Phylo::EvolutionaryModels::sample(...).
27             use Bio::Phylo::EvolutionaryModels qw (sample);
28            
29             #Example#A######################################################################
30             #Simulate a single tree with ten species from the constant rate birth model with parameter 0.5
31             my $tree = Bio::Phylo::EvolutionaryModels::constant_rate_birth(birth_rate => .5, tree_size => 10);
32            
33             #Example#B######################################################################
34             #Sample 5 trees with ten species from the constant rate birth model using the b algorithm
35             my ($sample,$stats) = sample(sample_size =>5,
36             tree_size => 10,
37             algorithm => 'b',
38             algorithm_options => {rate => 1},
39             model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth,
40             model_options => {birth_rate=>.5});
41              
42            
43             #Print a newick string for the 4th sampled tree
44             print $sample->[3]->to_newick."\n";
45            
46             #Example#C######################################################################
47             #Sample 5 trees with ten species from the constant rate birth and death model using
48             #the bd algorithm and two threads (useful for dual core processors)
49             #NB: we must specify an nstar here, an appropriate choice will depend on the birth_rate
50             # and death_rate we are giving the model
51            
52             my ($sample,$stats) = sample(sample_size =>5,
53             tree_size => 10,
54             threads => 2,
55             algorithm => 'bd',
56             algorithm_options => {rate => 1, nstar => 30},
57             model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth_death,
58             model_options => {birth_rate=>1,death_rate=>.8});
59            
60             #Example#D######################################################################
61             #Sample 5 trees with ten species from the constant rate birth and death model using
62             #incomplete taxon sampling
63             #
64             #sampling_probability is set so that the true tree has 10 species with 50% probability,
65             #11 species with 30% probability and 12 species with 20% probability
66             #
67             #NB: we must specify an mstar here this will depend on the model parameters and the
68             # incomplete taxon sampling parameters
69              
70             my $algorithm_options = {rate => 1,
71             nstar => 30,
72             mstar => 12,
73             sampling_probability => [.5, .3, .2]};
74            
75             my ($sample,$stats) = sample(sample_size =>5,
76             tree_size => 10,
77             algorithm => 'incomplete_sampling_bd',
78             algorithm_options => $algorithm_options,
79             model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth_death,
80             model_options => {birth_rate=>1,death_rate=>.8});
81              
82             #Example#E######################################################################
83             #Sample 5 trees with ten species from a Yule model using the memoryless_b algorithm
84            
85             #First we define the random function for the shortest pendant edge for a Yule model
86             my $random_pendant_function = sub {
87             %options = @_;
88             return -log(rand)/$options{birth_rate}/$options{tree_size};
89             };
90            
91             #Then we produce our sample
92             my ($sample,$stats) = sample(sample_size =>5,
93             tree_size => 10,
94             algorithm => 'memoryless_b',
95             algorithm_options => {pendant_dist => $random_pendant_function},
96             model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth,
97             model_options => {birth_rate=>1});
98              
99             #Example#F#######################################################################
100             #Sample 5 trees with ten species from a constant birth death rate model using the
101             #constant_rate_bd algorithm
102             my ($sample) = sample(sample_size => 5,
103             tree_size => 10,
104             algorithm => 'constant_rate_bd',
105             model_options => {birth_rate=>1,death_rate=>.8});
106              
107             =head1 DESCRIPTION
108              
109             This package contains evolutionary models for phylogenetic trees and
110             algorithms for sampling from these models. It is a non-OO module that
111             optionally exports the 'sample', 'constant_rate_birth' and
112             'constant_rate_birth_death' subroutines into the caller's namespace,
113             using the C<< use Bio::Phylo::EvolutionaryModels qw(sample constant_rate_birth constant_rate_birth_death); >>
114             directive. Alternatively, you can call the subroutines as class methods,
115             as in the synopsis.
116              
117             The initial set of algorithms available in this package corresponds to those in:
118              
119             Sampling trees from evolutionary models
120             Klaas Hartmann, Dennis Wong, Tanja Gernhard
121             Systematic Biology, in press
122              
123             Some comments and code refers back to this paper.
124             Further algorithms and evolutionary are encouraged
125             and welcome.
126              
127             To make this code as straightforward as possible to read some of the
128             algorithms have been implemented in a less than optimal manner. The code
129             also follows the structure of an earlier version of the manuscript so
130             there is some redundancy (eg. the birth algorithm is just a specific
131             instance of the birth_death algorithm)
132              
133             =head1 SAMPLING
134              
135             All sampling algorithms should be accessed through the generic sample
136             interface.
137              
138             =head2 Generic sampling interface: sample()
139              
140             Type : Interface
141             Title : sample
142             Usage : see SYNOPSIS
143             Function: Samples phylogenetic trees from an evolutionary model
144             Returns : A sample of phylogenetic trees and statistics from the
145             sampling algorithm
146             Args : Sampling parameters in a hash
147              
148             This method acts as a gateway to the various sampling algorithms. The
149             argument is a single hash containing the options for the sampling run.
150              
151              
152             Sampling parameters (* denotes optional parameters):
153            
154             sample_size The number of trees to return (more trees may be returned)
155             tree_size The size that returned trees should be
156             model The evolutionary model (should be a function reference)
157             model_options A hash pointer for model options (see individual models)
158             algorithm The algorithm to use (omit the preceding sample_)
159             algorithm_options A hash pointer for options for the algorithm (see individual algorithms for details)
160             threads* The number of threads to use (default is 1)
161             output_format* Set to newick for newick trees (default is Bio::Phylo::Forest::Tree)
162             remove_extinct Set to true to remove extinct species
163              
164              
165              
166             Available algorithms (algorithm names in the paper are given in brackets):
167              
168             b For all pure birth models (simplified GSA)
169             bd For all birth and death models (GSA)
170             incomplete_sampling_bd As above, with incomplete taxon sampling (extended GSA)
171             memoryless_b For memoryless pure birth models (PBMSA)
172             constant_rate_bd For birth and death models with constant rates (BDSA)
173              
174              
175             Model
176              
177             If you create your own model it must accept an options hash as its input.
178             This options hash can contain any parameters you desire. Your model should
179             simulate a tree until it becomes extinct or the size/age limit as specified
180             in the options has been reached. Respectively these options are tree_size
181             and tree_age.
182              
183              
184             Multi-threading
185              
186             Multi-thread support is very simplistic. The number of threads you specify
187             are created and each is assigned the task of finding sample_size/threads
188             samples. I had problems with using Bio::Phylo::Forest::Tree in a multi-
189             threaded setting. Hence the sampled trees are returned as newick strings to
190             the main routine where (if required) Tree objects are recreated from the
191             strings. For most applications this overhead seems negligible in contrast
192             to the sampling times.
193              
194              
195             From a code perspective this function (sample):
196              
197             Checks input arguments
198             Handles multi-threading
199             Calls the individual algorithms to perform sampling
200             Reformats data
201              
202             =cut
203              
204             my @threads;
205              
206             sub sample {
207 5     5 1 107 my %options = @_;
208 5         66 my %methods_require = (
209             b => ['rate'],
210             bd => [ 'rate', 'nstar' ],
211             incomplete_sampling_bd =>
212             [ 'rate', 'nstar', 'mstar', 'sampling_probability' ],
213             memoryless_b => ['pendant_dist'],
214             constant_rate_bd => [],
215             );
216              
217             #Default is to sample a single tree
218 5 50       20 $options{sample_size} = 1 unless defined $options{sample_size};
219              
220             #Default is to use a single thread
221 5 100       27 $options{threads} = 1 unless defined $options{threads};
222              
223             #Check that multiple threads are actually supported
224 5 50 33     22 if ( $options{threads} > 1 && !$Config{useithreads} ) {
225 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
226             "your perl installation does not support multiple threads" );
227             }
228              
229             #Check an algorithm was specified
230 5 50       19 unless ( defined $options{algorithm} ) {
231 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw(
232             'error' => "an algorithm type must be specified" );
233             }
234              
235             #Check the algorithm type is valid
236 5 50       18 unless ( defined $methods_require{ $options{algorithm} } ) {
237 0         0 Bio::Phylo::Util::Exceptions::BadFormat->throw(
238             'error' => "'$options{algorithm}' is not a valid algorithm" );
239             }
240              
241             #Check the algorithm options
242 5         11 foreach ( @{ $methods_require{ $options{algorithm} } } ) {
  5         18  
243 8 50       23 unless ( defined $options{algorithm_options}->{$_} ) {
244 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
245             "'$_' must be specified for the '$options{algorithm}' algorithm"
246             );
247             }
248             }
249              
250             #If we are doing incomplete taxon sampling the sampling probability must be specified
251 5 0 33     17 if ( defined $options{incomplete_sampling}
      0        
252             && $options{incomplete_sampling}
253             && !( defined $options{algorithm_options}->{sampling_probability} ) )
254             {
255 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
256             "'sampling_probability' must be specified for post hoc incomplete sampling to be applied"
257             );
258             }
259              
260             #Check that a model has been specified
261 5 50 66     47 unless ( defined $options{model}
262             || $options{algorithm} eq 'constant_rate_bd' )
263             {
264 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw(
265             'error' => "a model must be specified" );
266             }
267              
268             #Get a function pointer for the algorithm
269 5         18 my $algorithm = 'sample_' . $options{algorithm};
270 5         23 $algorithm = \&$algorithm;
271 5         11 my @output;
272              
273             #Run the algorithm, different method for multiple threads
274 5 50       15 if ( $options{threads} > 1 ) {
275 0         0 require threads;
276 0         0 require threads::shared;
277 0         0 @output = ( [], [] );
278             $SIG{'KILL'} = sub {
279 0     0   0 foreach (@threads) { $_->kill('KILL')->detach(); }
  0         0  
280 0         0 threads->exit();
281 0         0 };
282              
283             #Start the threads
284 0         0 for ( ( 1 .. $options{threads} ) ) {
285 0         0 @_ = ();
286              
287             #Note the list context of the return argument here determines
288             #the data type returned from the thread.
289             ( $threads[ $_ - 1 ] ) = threads->new( \&_sample_newick, %options,
290             sample_size => ceil( $options{sample_size} / $options{threads} )
291 0         0 );
292             }
293              
294             #Wait for them to finish and combine the data
295 0         0 for ( ( 1 .. $options{threads} ) ) {
296 0         0 until ( $threads[ $_ - 1 ]->is_joinable() ) { sleep(0.1); }
  0         0  
297 0         0 my @thread_data = $threads[ $_ - 1 ]->join;
298 0         0 for ( my $index = 0 ; $index < scalar @thread_data ; $index++ ) {
299 0 0       0 $output[$index] = [] if scalar @output < $index;
300             $output[$index] =
301 0         0 [ @{ $output[$index] }, @{ $thread_data[$index] } ];
  0         0  
  0         0  
302             }
303             }
304              
305             #Turn newick strings back into tree objects
306 0 0 0     0 unless ( defined $options{output_format}
307             && $options{output_format} eq 'newick' )
308             {
309              
310             #Convert to newick trees
311 0         0 for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
  0         0  
312 0         0 $output[0]->[$index] = Bio::Phylo::IO->parse(
313             -format => 'newick',
314             -string => $output[0]->[$index]
315             )->first;
316             }
317             }
318             }
319             else {
320              
321             #Get the samples
322 5         35 @output = &$algorithm(%options);
323              
324             #Turn into newick trees if requested
325 5 50 66     164 if ( defined $options{output_format}
    100 66        
326             && $options{output_format} eq 'newick' )
327             {
328              
329             #Convert to newick trees
330 0         0 for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
  0         0  
331 0         0 $output[0]->[$index] =
332             $output[0]->[$index]->to_newick( '-nodelabels' => 1 );
333             }
334             }
335             elsif ( defined $options{output_format}
336             && $options{output_format} eq 'forest' )
337             {
338              
339             # save as a forest
340 4         27 my $forest = Bio::Phylo::Forest->new;
341 4         11 for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
  24         63  
342 20         61 $forest->insert( $output[0]->[$index] );
343             }
344 4         14 $output[0] = $forest;
345             }
346             }
347 5         75 return @output;
348             }
349              
350             =begin comment
351              
352             Type : Internal method
353             Title : _sample_newick
354             Usage : ($thread) = threads->new(\&_sample_newick, %options);
355             @thread_output = $thread->join;
356             Function: Wrapper for sampling routines used for multi-threading
357             Returns : Output from sampling algorithms with trees replaced by newick strings
358             Args : %options to pass to sampling algorithm
359              
360             =end comment
361              
362             =cut
363              
364             sub _sample_newick {
365 0     0   0 my %options = @_;
366 0         0 my $algorithm = 'sample_' . $options{algorithm};
367              
368             # Thread 'cancellation' signal handler
369 0     0   0 $SIG{'KILL'} = sub { threads->exit(); };
  0         0  
370 0         0 $algorithm = \&$algorithm;
371              
372             #Perform the sampling
373 0         0 my @output = ( &$algorithm(%options) );
374              
375             #Convert to newick trees
376 0         0 for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
  0         0  
377 0         0 $output[0]->[$index] =
378             $output[0]->[$index]->to_newick( '-nodelabels' => 1 );
379             }
380 0         0 return @output;
381             }
382              
383             =head2 Sampling algorithms
384              
385             These algorithms should be accessed through the sampling interface (sample()).
386             Additional parameters need to be passed to these algorithms as described for
387             each algorithm.
388              
389             =over
390              
391             =item sample_b()
392              
393             Sample from any birth model
394              
395             Type : Sampling algorithm
396             Title : sample_b
397             Usage : see sample
398             Function: Samples trees from a pure birth model
399             Returns : see sample
400             Args : %algorithm_options requires the field:
401             rate => sampling rate
402              
403             =cut
404              
405             sub sample_b {
406 1     1 1 4 my %options = @_;
407              
408             #The sample of trees
409 1         3 my @sample;
410              
411             #A list of the expected number of samples
412             my @expected_summary;
413 1         3 my $model = $options{model};
414 1         2 my $rate = $options{algorithm_options}->{rate};
415              
416             #While we have insufficient samples
417 1         4 while ( scalar @sample < $options{sample_size} ) {
418              
419             #Generate a candidate model run
420 14         58 my $candidate = &$model( %{ $options{model_options} },
421 14         24 tree_size => $options{tree_size} );
422              
423             #Check that the tree has no extinctions
424 14 50       56 unless ( $candidate->is_ultrametric(1e-6) ) {
425 0         0 Bio::Phylo::Util::Exceptions::BadFormat->throw(
426             'error' => "the model must be a pure birth process" );
427             }
428              
429             #Get the lineage through time data
430 14         40 my ( $time, $count ) = lineage_through_time($candidate);
431              
432             #The expected number of samples we want
433 14         30 my $expected_samples = $rate * ( $time->[-1] - $time->[-2] );
434 14         25 push( @expected_summary, $expected_samples );
435              
436             #Get the random number of samples from this candidate tree
437 14         31 while ( $expected_samples > 0 ) {
438              
439             #If the number of samples remaining is greater than one
440             #we take a sample, otherwise we take a sample with probability
441             #according to the remaining number of samples
442 16 100 100     63 if ( $expected_samples > 1 || rand(1) < $expected_samples ) {
443 5         16 my $tree = copy_tree($candidate);
444              
445             #Truncate the tree at a random point during which it had tree_size species
446 5         19 truncate_tree_time( $tree,
447             ( $time->[-1] - $time->[-2] ) * rand(1) + $time->[-2] );
448              
449             #Add the tree to our sample
450 5         12 push( @sample, $tree );
451              
452             #Update the sample counter (for GUI or other applications that want to know how
453             #many samples have been obtained)
454 5 50       16 if ( defined $options{counter} ) {
455 0         0 my $counter = $options{counter};
456 0         0 &$counter(1);
457             }
458             }
459 16         89 $expected_samples--;
460             }
461             }
462 1         6 return ( \@sample, \@expected_summary );
463             }
464              
465             =item sample_bd()
466              
467             Sample from any birth and death model for which nstar exists
468              
469             Type : Sampling algorithm
470             Title : sample_bd
471             Usage : see sample
472             Function: Samples trees from a birth and death model
473             Returns : see sample
474             Args : %algorithm_options requires the fields:
475             nstar => once a tree has nstar species there should be
476             a negligible chance of returning to tree_size species
477             rate => sampling rate
478              
479             =cut
480              
481             sub sample_bd {
482 1     1 1 5 my %options = @_;
483              
484             #The sample of trees
485 1         3 my @sample;
486              
487             #A list of the expected number of samples
488             my @expected_summary;
489              
490             #Convenience variables
491 1         3 my $model = $options{model};
492 1         2 my $nstar = $options{algorithm_options}->{nstar};
493 1         3 my $rate = $options{algorithm_options}->{rate};
494              
495             #While we have insufficient samples
496 1         4 while ( scalar @sample < $options{sample_size} ) {
497              
498             #Generate a candidate model run
499             my $candidate =
500 55         104 &$model( %{ $options{model_options} }, tree_size => $nstar );
  55         308  
501              
502             #Get the lineage through time data
503 55         227 my ( $time, $count ) = lineage_through_time($candidate);
504              
505             #Reorganise the lineage through time data
506             #@duration contains the length of the intervals with the right number of species
507             #@start contains the starting times of these intervals
508             #@prob contains the cumulative probability of each interval being selected
509             #$total_duration contains the sum of the interval lengths
510 55         140 my ( @duration, @start, @prob, $total_duration );
511 55         147 for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) {
  3492         4920  
512 3437 100       5144 if ( $count->[$index] == $options{tree_size} ) {
513 95         184 push( @duration, $time->[ $index + 1 ] - $time->[$index] );
514 95         138 push( @start, $time->[$index] );
515 95         134 $total_duration += $duration[-1];
516 95         140 push( @prob, $total_duration );
517             }
518             }
519 1     1   8 no warnings 'uninitialized'; # FIXME
  1         2  
  1         43  
520 55 100       353 next if $total_duration == 0;
521 1     1   5 use warnings;
  1         2  
  1         306  
522 20         74 for ( my $index = 0 ; $index < scalar @prob ; $index++ ) {
523 95         163 $prob[$index] /= $total_duration;
524             }
525              
526             #The expected number of samples we want
527 20         50 my $expected_samples = $rate * $total_duration;
528 20         52 push( @expected_summary, $expected_samples );
529              
530             #Get the random number of samples from this candidate tree
531 20         68 while ( $expected_samples > 0 ) {
532              
533             #If the number of samples remaining is greater than one
534             #we take a sample, otherwise we take a sample with probability
535             #according to the remaining number of samples
536 21 100 100     135 if ( $expected_samples > 1 || rand(1) < $expected_samples ) {
537 5         26 my $tree = copy_tree($candidate);
538              
539             #Get a random interval
540 5         22 my $interval_choice = rand(1);
541 5         11 my $interval;
542 5         38 for (
543             $interval = 0 ;
544             $interval_choice > $prob[$interval] ;
545             $interval++
546             )
547             {
548             }
549              
550             #Truncate the tree at a random point during this interval
551 5         29 truncate_tree_time( $tree,
552             $duration[$interval] * rand(1) + $start[$interval] );
553              
554             #Add the tree to our sample
555 5         20 push( @sample, $tree );
556              
557             #Update the sample counter (for GUI or other applications that want to know how
558             #many samples have been obtained)
559 5 50       23 if ( defined $options{counter} ) {
560 0         0 my $counter = $options{counter};
561 0         0 &$counter(1);
562             }
563             }
564 21         400 $expected_samples--;
565             }
566             }
567 1         8 return ( \@sample, \@expected_summary );
568             }
569              
570             =item sample_incomplete_sampling_bd()
571              
572             Sample from any birth and death model with incomplete taxon sampling
573              
574             Type : Sampling algorithm
575             Title : sample_incomplete_sampling_bd
576             Usage : see sample
577             Function: Samples trees from a birth and death model with incomplete taxon sampling
578             Returns : see sample
579             Args : %algorithm_options requires the fields:
580             rate => sampling rate
581             nstar => once a tree has nstar species there should be
582             a negligible chance of returning to mstar species
583             mstar => trees with more than mstar species form a negligible
584             contribution to the final sample.
585             sampling_probability => see below.
586            
587             sampling_probability
588              
589             vector: must have length (mstar-tree_size+1) The ith element gives the probability
590             of not sampling i species.
591             scalar: the probability of sampling any individual species. Is used to calculate
592             a vector as discussed in the paper.
593              
594             =cut
595              
596             sub sample_incomplete_sampling_bd {
597 1     1 1 5 my %options = @_;
598              
599             # %options = (%options, %{$options{algorithm_options}});
600             #The sample of trees
601 1         3 my @sample;
602              
603             #A list of the expected number of samples
604             my @expected_summary;
605              
606             # #Convenience variables
607 1         2 my $model = $options{model};
608 1         3 my $nstar = $options{algorithm_options}->{nstar};
609 1         2 my $mstar = $options{algorithm_options}->{mstar};
610 1         3 my $rate = $options{algorithm_options}->{rate};
611             my $sampling_probability =
612 1         2 $options{algorithm_options}->{sampling_probability};
613              
614             #If sampling_probability is a list check it's length
615 1 50 33     7 if ( ref $sampling_probability
616 1         7 && scalar @{$sampling_probability} !=
617             ( $mstar - $options{tree_size} + 1 ) )
618             {
619 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
620             "'sampling_probability' must be a scalar or a list with m_star-tree_size+1 items"
621             );
622             }
623              
624             #If a single sampling probability was given we calculate the
625             #probability of sub-sampling given numbers of species.
626 1 50       4 unless ( ref $sampling_probability ) {
627 0         0 my $p = $sampling_probability;
628 0         0 my @vec;
629 0         0 my $total = 0;
630 0         0 foreach ( ( $options{tree_size} .. $mstar ) ) {
631              
632             #The probability of sampling tree_size species from a tree containing $_ species
633             push( @vec,
634             nchoosek( $_, $options{tree_size} ) *
635             ( $p**$options{tree_size} ) *
636 0         0 ( ( 1 - $p )**( $_ - $options{tree_size} ) ) );
637 0         0 $total += $vec[-1];
638             }
639 0         0 for ( my $ii = 0 ; $ii < scalar @vec ; $ii++ ) { $vec[$ii] /= $total; }
  0         0  
640 0         0 $sampling_probability = \@vec;
641             }
642              
643             #We now normalise the sampling_probability list so that it sums to unity
644             #this allows comparable sampling rates to be used here and in sample_bd
645 1         2 my $total_sp = 0;
646 1         3 foreach ( @{$sampling_probability} ) { $total_sp += $_; }
  1         3  
  3         7  
647 1     1   6 no warnings; # FIXME
  1         2  
  1         73  
648 1         8 foreach ( my $index = 1 .. scalar @{$sampling_probability} ) {
  0         0  
  0         0  
649 1     1   6 no warnings; # FIXME
  1         2  
  1         33  
650 1         7 $sampling_probability->[ $index - 1 ] /= $total_sp;
651 1     1   5 use warnings;
  1         1  
  1         22  
652             }
653 1     1   4 use warnings;
  1         2  
  1         61  
654              
655             #While we have insufficient samples
656 1         5 while ( scalar @sample < $options{sample_size} ) {
657              
658             #Generate a candidate model run
659             my $candidate =
660 20         38 &$model( %{ $options{model_options} }, tree_size => $nstar );
  20         124  
661              
662             #Get the lineage through time data
663 20         91 my ( $time, $count ) = lineage_through_time($candidate);
664 20         48 my @size_stats;
665 20         53 for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) {
  1778         2526  
666 1     1   5 no warnings 'uninitialized'; # FIXME
  1         2  
  1         168  
667 1758 100       2558 if ( $count->[$index] >= $options{tree_size} ) {
668             $size_stats[ $count->[$index] - $options{tree_size} ] +=
669             $time->[$index] *
670             $sampling_probability->[ $count->[$index] -
671 1141         1791 $options{tree_size} ];
672             }
673             }
674              
675             #Reorganise the lineage through time data
676             #@duration contains the length of intervals with more than tree_size species
677             #@start contains the starting times of these intervals
678             #@prob contains the cumulative probability of each interval being selected
679             #$total_prob contains the sum of @prob before normalisation
680 20         41 my ( @duration, @start, @prob, $total_prob );
681 20         37 $total_prob = 0;
682 20         38 for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) {
  1778         2495  
683 1758 100       2457 if ( $count->[$index] >= $options{tree_size} ) {
684 1141         1503 push( @duration, $time->[ $index + 1 ] - $time->[$index] );
685 1141         1317 push( @start, $time->[$index] );
686 1     1   9 no warnings 'uninitialized'; # FIXME
  1         3  
  1         7289  
687             $total_prob +=
688             $duration[-1] *
689             $sampling_probability->[ $count->[$index] -
690 1141         1466 $options{tree_size} ];
691 1141         1505 push( @prob, $total_prob );
692             }
693             }
694 20 100       111 next if $total_prob == 0;
695 10         44 for ( my $index = 0 ; $index < scalar @prob ; $index++ ) {
696 1141         1604 $prob[$index] /= $total_prob;
697             }
698              
699             #The expected number of samples we want
700 10         22 my $expected_samples = $rate * $total_prob;
701 10         27 push( @expected_summary, $expected_samples );
702             $expected_samples = $options{sample_size} - scalar @sample
703 10 50       45 if $expected_samples > $options{sample_size} - scalar @sample;
704              
705             #Get the random number of samples from this candidate tree
706 10         48 while ( $expected_samples > 0 ) {
707              
708             #If the number of samples remaining is greater than one
709             #we take a sample, otherwise we take a sample with probability
710             #according to the remaining number of samples
711 10 100 66     88 if ( $expected_samples > 1 || rand(1) < $expected_samples ) {
712 5         28 my $tree = copy_tree($candidate);
713              
714             #Get a random interval
715 5         24 my $interval_choice = rand(1);
716 5         13 my $interval;
717 5         77 for (
718             $interval = 0 ;
719             $interval_choice > $prob[$interval] ;
720             $interval++
721             )
722             {
723             }
724              
725             #Truncate the tree at a random point during this interval
726 5         38 truncate_tree_time( $tree,
727             $duration[$interval] * rand(1) + $start[$interval] );
728              
729             #Remove random species so it has the right size
730 5         34 truncate_tree_size( $tree, $options{tree_size} );
731              
732             #Add the tree to our sample
733 5         18 push( @sample, $tree );
734              
735             #Update the sample counter (for GUI or other applications that want to know how
736             #many samples have been obtained)
737 5 50       29 if ( defined $options{counter} ) {
738 0         0 my $counter = $options{counter};
739 0         0 &$counter(1);
740             }
741             }
742 10         541 $expected_samples--;
743             }
744             }
745 1         13 return ( \@sample, \@expected_summary );
746             }
747              
748             =item sample_memoryless_b()
749              
750             Sample from a memoryless birth model
751              
752             Type : Sampling algorithm
753             Title : sample_memoryless_b
754             Usage : see sample
755             Function: Samples trees from a memoryless birth model
756             Returns : see sample
757             Args : %algorithm_options with fields:
758             pendant_dist => function reference for generating random
759             shortest pendant edges
760              
761             NB: The function pointed to by pendant_dist is given model_options
762             as it's input argument with an added field tree_size. It must return
763             a random value from the probability density for the shortest pendant
764             edges.
765              
766             =cut
767              
768             sub sample_memoryless_b {
769 1     1 1 6 my %options = @_;
770              
771             #The sample of trees
772 1         3 my @sample;
773              
774             #A list of the expected number of samples
775             my @expected_summary;
776              
777             #The user specified functions
778 1         4 my $model = $options{model};
779 1         2 my $pendant_dist = $options{algorithm_options}->{pendant_dist};
780              
781             #While we have insufficient samples
782 1         6 while ( scalar @sample < $options{sample_size} ) {
783              
784             #Generate a tree ending just after the last speciation event
785 5         24 my $tree = &$model( %{ $options{model_options} },
786 5         13 tree_size => $options{tree_size} );
787              
788             #Check that the tree has no extinctions
789 5 50       31 unless ( $tree->is_ultrametric(1e-6) ) {
790 0         0 Bio::Phylo::Util::Exceptions::BadFormat->throw(
791             'error' => "the model must be a pure birth process" );
792             }
793              
794             #Get the random length to add after the last speciation event
795 5         49 my $pendant_add = &$pendant_dist( %{ $options{model_options} },
796 5         11 tree_size => $options{tree_size} );
797              
798             #Add the final length
799 5         58 foreach ( @{ $tree->get_terminals } ) {
  5         23  
800 50         100 $_->set_branch_length( $_->get_branch_length + $pendant_add );
801             }
802              
803             #Add to the sample
804 5         19 push( @sample, $tree );
805 5         11 push( @expected_summary, 1 );
806              
807             #Update the sample counter (for GUI or other applications that want to know how
808             #many samples have been obtained)
809 5 50       34 if ( defined $options{counter} ) {
810 0         0 my $counter = $options{counter};
811 0         0 &$counter(1);
812             }
813             }
814 1         7 return ( \@sample, \@expected_summary );
815             }
816              
817             =item sample_constant_rate_bd()
818              
819             Sample from a constant rate birth and death model
820              
821             Type : Sampling algorithm
822             Title : sample_constant_rate_bd
823             Usage : see sample
824             Function: Samples trees from a memoryless birth model
825             Returns : see sample
826             Args : no specific algorithm options but see below
827              
828             NB: This algorithm only applies to constant rate birth and death
829             processes. Consequently a model does not need to be specified (and
830             will be ignored if it is). But birth_rate and death_rate model
831             options must be given.
832              
833             =cut
834              
835             sub sample_constant_rate_bd {
836 1     1 1 5 my %options = @_;
837              
838             #Store parameters in shorter variables (for clarity)
839             my ( $br, $dr, $n ) = (
840             $options{model_options}->{birth_rate},
841             $options{model_options}->{death_rate},
842             $options{tree_size}
843 1         5 );
844 1         4 my @sample;
845              
846             #Loop for sampling each tree
847 1         6 while ( scalar @sample < $options{sample_size} ) {
848 5         16 my @nodes;
849              
850             #Compute the random tree age from the inverse CDF (different formulas for
851             #birth rate == death rate and otherwise)
852             my $tree_age;
853              
854             #The uniform random variable
855 5         19 my $r = rand;
856 5 50       23 if ( $br == $dr ) {
857 0         0 $tree_age = 1 / ( $br * ( $r**( -1 / $n ) - 1 ) );
858             }
859             else {
860 5         57 $tree_age =
861             1 /
862             ( $br - $dr ) *
863             log(
864             ( 1 - $dr / $br * $r**( 1 / $n ) ) / ( 1 - $r**( 1 / $n ) ) );
865             }
866              
867             #Find the random speciation times
868 5         8 my @speciation;
869 5         19 foreach ( 0 .. ( $n - 2 ) ) {
870 45 50       64 if ( $br == $dr ) {
871 0         0 my $r = rand;
872 0         0 $speciation[$_] =
873             $r * $tree_age / ( 1 + $br * $tree_age * ( 1 - $r ) );
874             }
875             else {
876              
877             #Two repeated parts of the inverse CDF for clarity
878 45         81 my $a = $br - $dr * exp( ( $dr - $br ) * $tree_age );
879 45         64 my $b = ( 1 - exp( ( $dr - $br ) * $tree_age ) ) * rand;
880              
881             #The random speciation time from the inverse CDF
882 45         97 $speciation[$_] =
883             1 /
884             ( $br - $dr ) *
885             log( ( $a - $dr * $b ) / ( $a - $br * $b ) );
886             }
887             }
888              
889             #Create the initial terminals and a vector for their ages
890 5         11 my @terminals;
891             my @ages;
892 5         12 foreach ( 0 .. ( $n - 1 ) ) {
893              
894             #Add a new terminal
895 50         120 $terminals[$_] = Bio::Phylo::Forest::Node->new();
896 50         169 $terminals[$_]->set_name( 'ID' . $_ );
897 50         94 $ages[$_] = 0;
898             }
899 5         17 @nodes = @terminals;
900              
901             #Sort the speciation times
902 5         40 my @sorted_speciation = sort { $a <=> $b } @speciation;
  101         145  
903              
904             #Make a hash for easily finding the index of a given speciation event
905 5         10 my %speciation_hash;
906 5         17 foreach ( 0 .. ( $n - 2 ) ) {
907 45         163 $speciation_hash{ $speciation[$_] } = $_;
908             }
909              
910             #Construct the tree
911 5         17 foreach my $index ( 0 .. ( $n - 2 ) ) {
912              
913             #Create the parent node
914 45         126 my $parent = Bio::Phylo::Forest::Node->new();
915 45         174 $parent->set_name( 'ID' . ( $n + $index ) );
916 45         92 push( @nodes, $parent );
917              
918             #An index for this speciation event back into the unsorted vectors
919 45         248 my $spec_index = $speciation_hash{ $sorted_speciation[$index] };
920              
921             #Add the children to the parent node
922 45         149 $parent->set_child( $terminals[$spec_index] );
923 45         135 $terminals[$spec_index]->set_parent($parent);
924 45         125 $parent->set_child( $terminals[ $spec_index + 1 ] );
925 45         134 $terminals[ $spec_index + 1 ]->set_parent($parent);
926              
927             #Set the children's branch lengths
928 45         164 $terminals[$spec_index]->set_branch_length(
929             $sorted_speciation[$index] - $ages[$spec_index] );
930 45         181 $terminals[ $spec_index + 1 ]->set_branch_length(
931             $sorted_speciation[$index] - $ages[ $spec_index + 1 ] );
932              
933             #Replace the two terminals with the new one
934 45         102 splice( @terminals, $spec_index, 2, $parent );
935 45         92 splice( @ages, $spec_index, 2, $sorted_speciation[$index] );
936              
937             #Update the mapping for the sorted speciation times
938 45         192 foreach ( keys %speciation_hash ) {
939 405 100       702 $speciation_hash{$_}-- if $speciation_hash{$_} > $spec_index;
940             }
941             }
942              
943             #Add the nodes to a tree
944 5         34 my $tree = Bio::Phylo::Forest::Tree->new();
945 5         17 foreach ( reverse(@nodes) ) { $tree->insert($_); }
  95         175  
946 5         12 push( @sample, $tree );
947              
948             #Update the sample counter (for GUI or other applications that want to know how
949             #many samples have been obtained)
950 5 50       87 if ( defined $options{counter} ) {
951 0         0 my $counter = $options{counter};
952 0         0 &$counter(1);
953             }
954             }
955 1         8 return ( \@sample, [] );
956             }
957              
958             =back
959              
960             =head1 EVOLUTIONARY MODELS
961              
962             All evolutionary models take a options hash as their input argument
963             and return a Bio::Phylo::Forest::Tree. This tree may contain extinct
964             lineages (lineages that end prior to the end of the tree).
965              
966             The options hash contains any model specific parameters (see the
967             individual model descriptions) and one or both terminating conditions:
968             tree_size => the number of extant species at which to terminate the tree
969             tree_age => the age of the tree at which to terminate the process
970              
971             Note that if the model stops due to the tree_size condition then the
972             tree ends immediately after the speciation event that created the last
973             species.
974              
975             =over
976              
977             =item constant_rate_birth()
978              
979             A constant rate birth model (Yule/ERM)
980              
981             Type : Evolutionary model
982             Title : constant_rate_birth
983             Usage : $tree = constant_rate_birth(%options)
984             Function: Produces a tree from the model terminating at a given size/time
985             Returns : Bio::Phylo::Forest::Tree
986             Args : %options with fields:
987             birth_rate The birth rate parameter (default 1)
988             tree_size The size of the tree at which to terminate
989             tree_age The age of the tree at which to terminate
990              
991             NB: At least one of tree_size and tree_age must be specified
992              
993             =cut
994              
995             sub constant_rate_birth {
996 20     20 1 849 my %options = @_;
997 20         40 $options{death_rate} = 0;
998 20         73 return constant_rate_birth_death(%options);
999             }
1000              
1001             =item external_model()
1002              
1003             A dummy model that takes as input a set of newick_trees and randomly samples
1004             these.
1005              
1006             Type : Evolutionary model
1007             Title : external_model
1008             Usage : $tree = $external_model(%options)
1009             Function: Returns a random tree that was given as input
1010             Returns : Bio::Phylo::Forest::Tree
1011             Args : %options with fields:
1012             trees An array of newick strings. One of these is returned at random.
1013              
1014             NB: The usual parameters tree_size and tree_age will be ignored. When sampling
1015             using this model the trees array must contain trees adhering to the requirements
1016             of the sampling algorithm. This is NOT checked automatically.
1017              
1018             =cut
1019              
1020             sub external_model {
1021 0     0 1 0 my %options = @_;
1022 0         0 my $choice = int( rand( scalar @{ $options{trees} } ) );
  0         0  
1023              
1024             #Pick a newick string and turn it in to a Bio::Phylo::Forest::Tree object
1025             my $tree = Bio::Phylo::IO->parse(
1026             -format => 'newick',
1027 0         0 -string => $options{trees}->[$choice]
1028             )->first;
1029 0         0 return $tree;
1030             }
1031              
1032             =item constant_rate_birth_death()
1033              
1034             A constant rate birth and death model
1035              
1036             Type : Evolutionary model
1037             Title : constant_rate_birth_death
1038             Usage : $tree = constant_rate_birth_death(%options)
1039             Function: Produces a tree from the model terminating at a given size/time
1040             Returns : Bio::Phylo::Forest::Tree
1041             Args : %options with fields:
1042             birth_rate The birth rate parameter (default 1)
1043             death_rate The death rate parameter (default no extinction)
1044             tree_size The size of the tree at which to terminate
1045             tree_age The age of the tree at which to terminate
1046              
1047             NB: At least one of tree_size and tree_age must be specified
1048              
1049             =cut
1050              
1051             sub constant_rate_birth_death {
1052 95     95 1 404 my %options = @_;
1053              
1054             #Check that we have a termination condition
1055 95 50 33     324 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1056              
1057             #Error here.
1058 0         0 return undef;
1059             }
1060              
1061             #Set the undefined condition to infinity
1062 95 50       272 $options{tree_size} = 1e6 unless defined $options{tree_size};
1063 95 50       338 $options{tree_age} = 1e6 unless defined $options{tree_age};
1064              
1065             #Set default rates
1066 95 50       226 $options{birth_rate} = 1 unless defined( $options{birth_rate} );
1067             delete $options{death_rate}
1068 95 100 66     603 if defined( $options{death_rate} ) && $options{death_rate} == 0;
1069              
1070             #Each node gets an ID number this tracks these
1071 95         156 my $node_id = 0;
1072              
1073             #Create a new tree with a root, start the list of terminal species
1074 95         419 my $tree = Bio::Phylo::Forest::Tree->new();
1075 95         390 my $root = Bio::Phylo::Forest::Node->new();
1076 95         374 $root->set_branch_length(0);
1077 95         454 $root->set_name( 'ID' . $node_id++ );
1078 95         386 $tree->insert($root);
1079 95         193 my @terminals = ($root);
1080 95         155 my ( $next_extinction, $next_speciation );
1081 95         145 my $time = 0;
1082 95         159 my $tree_size = 1;
1083              
1084             #Check whether we have a non-zero root edge
1085 95 50 33     279 if ( defined $options{root_edge} && $options{root_edge} ) {
1086              
1087             #Non-zero root. We set the time to the first speciation event
1088 0         0 $next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
1089             }
1090             else {
1091              
1092             #Zero root, we want a speciation event straight away
1093 95         139 $next_speciation = 0;
1094             }
1095              
1096             #Time of the first extinction event. If no extinction we always
1097             #set the extinction event after the current speciation event
1098 95 100       198 if ( defined $options{death_rate} ) {
1099 75         467 $next_extinction = -log(rand) / $options{death_rate} / $tree_size;
1100             }
1101             else {
1102 20         32 $next_extinction = $next_speciation + 1;
1103             }
1104              
1105             #While the tree has not become extinct and the termination criterion
1106             #has not been achieved we create new speciation and extinction events
1107 95   66     581 while ($tree_size > 0
      66        
1108             && $tree_size < $options{tree_size}
1109             && $time < $options{tree_age} )
1110             {
1111              
1112             #Add the time since the last event to all terminal species
1113 5423         9167 foreach (@terminals) {
1114             $_->set_branch_length(
1115             $_->get_branch_length + min(
1116             $next_extinction, $next_speciation,
1117 68947         119408 $options{tree_age} - $time
1118             )
1119             );
1120             }
1121              
1122             #Update the time
1123 5423         10235 $time += min( $next_extinction, $next_speciation );
1124              
1125             #If the tree exceeds the time limit we are done
1126 5423 50       10391 return $tree if ( $time > $options{tree_age} );
1127              
1128             #Get the species effected by this event and remove it from the terminals list
1129 5423         13056 my $effected =
1130             splice( @terminals, int( rand( scalar @terminals ) ), 1 );
1131              
1132             #If we have a speciation event we add two new species
1133 5423 100 66     14055 if ( $next_speciation < $next_extinction || !defined $next_extinction )
1134             {
1135 3169         5107 foreach ( 1, 2 ) {
1136              
1137             #Create a new species
1138 6338         17104 my $child = Bio::Phylo::Forest::Node->new();
1139 6338         23134 $child->set_name( 'ID' . $node_id++ );
1140              
1141             #Give it a zero edge length
1142 6338         18360 $child->set_branch_length(0);
1143              
1144             #Add it as a child to the speciating species
1145 6338         15632 $effected->set_child($child);
1146              
1147             #Add it to the tree
1148 6338         17074 $tree->insert($child);
1149              
1150             #Add it to the terminals list
1151 6338         13206 push( @terminals, $child );
1152             }
1153             }
1154              
1155             #We calculate the time that the next extinction and speciation
1156             #events will occur (only the earliest of these will actually
1157             #happen). NB: this approach is only appropriate for models where
1158             #speciation and extinction times are exponentially distributed.
1159             #Windows sometimes returns 0 values for rand...
1160 5423         9416 my ( $r1, $r2 ) = ( 0, 0 );
1161 5423         13690 $r1 = rand until $r1;
1162 5423         11493 $r2 = rand until $r2;
1163 5423         8356 $tree_size = scalar @terminals;
1164 5423 100       8875 return $tree unless $tree_size;
1165 5375         16121 $next_speciation = -log($r1) / $options{birth_rate} / $tree_size;
1166 5375 100       9440 if ( defined $options{death_rate} ) {
1167 5195         26494 $next_extinction = -log($r2) / $options{death_rate} / $tree_size;
1168             }
1169             else {
1170 180         841 $next_extinction = $next_speciation + 1;
1171             }
1172             }
1173 47         262 return $tree;
1174             }
1175              
1176              
1177             =item diversity_dependent_speciation()
1178              
1179             A birth and death model with speciation rate dependent on diversity as per
1180             Etienne et. al. 2012
1181              
1182             Type : Evolutionary model
1183             Title : diversity_dependent_speciation
1184             Usage : $tree = diversity_dependent_speciation(%options)
1185             Function: Produces a tree from the model terminating at a given size/time
1186             Returns : Bio::Phylo::Forest::Tree
1187             Args : %options with fields:
1188             maximal_birth_rate The maximal birth rate parameter (default 1)
1189             death_rate The death rate parameter (default no extinction)
1190             K_dash The modified carrying capacity (no default)
1191             tree_size The size of the tree at which to terminate
1192             tree_age The age of the tree at which to terminate
1193              
1194             NB: At least one of tree_size and tree_age must be specified
1195              
1196             Reference:
1197             Rampal S. Etienne, Bart Haegeman, Tanja Stadler, Tracy Aze, Paul N. Pearson,
1198             Andy Purvis and Albert B. Phillimore. "Diversity-dependence brings molecular
1199             phylogenies closer to agreement with the fossil record"
1200             doi: 10.1098/rspb.2011.1439
1201              
1202             =cut
1203              
1204             sub diversity_dependent_speciation {
1205 0     0 1 0 my %options = @_;
1206              
1207             #Check that we have a termination condition
1208 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1209             #Error here.
1210 0         0 return undef;
1211             }
1212              
1213             #Check that we have a carrying capacity
1214 0 0       0 unless ( defined $options{K_dash} ) {
1215             #Error here.
1216 0         0 return undef;
1217             }
1218              
1219             #Set the undefined condition to infinity
1220 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1221 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1222              
1223             #Set default rates
1224 0 0       0 $options{maximal_birth_rate} = 1 unless defined( $options{maximal_birth_rate} );
1225             delete $options{death_rate}
1226 0 0 0     0 if defined( $options{death_rate} ) && $options{death_rate} == 0;
1227              
1228             #Each node gets an ID number this tracks these
1229 0         0 my $node_id = 0;
1230              
1231             #Create a new tree with a root, start the list of terminal species
1232 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1233 0         0 my $root = Bio::Phylo::Forest::Node->new();
1234 0         0 $root->set_branch_length(0);
1235 0         0 $root->set_name( 'ID' . $node_id++ );
1236 0         0 $tree->insert($root);
1237 0         0 my @terminals = ($root);
1238 0         0 my ( $next_extinction, $next_speciation );
1239 0         0 my $time = 0;
1240 0         0 my $tree_size = 1;
1241            
1242            
1243 0         0 $options{birth_rate} = max(0,$options{max_birth_rate}*(1-1/$options{K_dash}));
1244            
1245              
1246             #Check whether we have a non-zero root edge
1247 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1248              
1249             #Non-zero root. We set the time to the first speciation event
1250 0         0 $next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
1251             }
1252             else {
1253              
1254             #Zero root, we want a speciation event straight away
1255 0         0 $next_speciation = 0;
1256             }
1257              
1258             #Time of the first extinction event. If no extinction we always
1259             #set the extinction event after the current speciation event
1260 0 0       0 if ( defined $options{death_rate} ) {
1261 0         0 $next_extinction = -log(rand) / $options{death_rate} / $tree_size;
1262             }
1263             else {
1264 0         0 $next_extinction = $next_speciation + 1;
1265             }
1266              
1267             #While the tree has not become extinct and the termination criterion
1268             #has not been achieved we create new speciation and extinction events
1269 0   0     0 while ($tree_size > 0
      0        
1270             && $tree_size < $options{tree_size}
1271             && $time < $options{tree_age} )
1272             {
1273              
1274             #Add the time since the last event to all terminal species
1275 0         0 foreach (@terminals) {
1276             $_->set_branch_length(
1277             $_->get_branch_length + min(
1278             $next_extinction, $next_speciation,
1279 0         0 $options{tree_age} - $time
1280             )
1281             );
1282             }
1283              
1284             #Update the time
1285 0         0 $time += min( $next_extinction, $next_speciation );
1286              
1287             #If the tree exceeds the time limit we are done
1288 0 0       0 return $tree if ( $time > $options{tree_age} );
1289              
1290             #Get the species effected by this event and remove it from the terminals list
1291 0         0 my $effected =
1292             splice( @terminals, int( rand( scalar @terminals ) ), 1 );
1293              
1294             #If we have a speciation event we add two new species
1295 0 0 0     0 if ( $next_speciation < $next_extinction || !defined $next_extinction )
1296             {
1297 0         0 foreach ( 1, 2 ) {
1298              
1299             #Create a new species
1300 0         0 my $child = Bio::Phylo::Forest::Node->new();
1301 0         0 $child->set_name( 'ID' . $node_id++ );
1302              
1303             #Give it a zero edge length
1304 0         0 $child->set_branch_length(0);
1305              
1306             #Add it as a child to the speciating species
1307 0         0 $effected->set_child($child);
1308              
1309             #Add it to the tree
1310 0         0 $tree->insert($child);
1311              
1312             #Add it to the terminals list
1313 0         0 push( @terminals, $child );
1314             }
1315             }
1316              
1317             #We calculate the time that the next extinction and speciation
1318             #events will occur (only the earliest of these will actually
1319             #happen). NB: this approach is only appropriate for models where
1320             #speciation and extinction times are exponentially distributed.
1321             #Windows sometimes returns 0 values for rand...
1322 0         0 my ( $r1, $r2 ) = ( 0, 0 );
1323 0         0 $r1 = rand until $r1;
1324 0         0 $r2 = rand until $r2;
1325 0         0 $tree_size = scalar @terminals;
1326 0 0       0 return $tree unless $tree_size;
1327            
1328 0         0 $options{birth_rate} = max(0,$options{max_birth_rate}*(1-$tree_size/$options{K_dash}));
1329 0 0       0 if ($options{birth_rate}==0)
1330             {
1331 0         0 $next_speciation = $options{tree_age};
1332             } else
1333             {
1334 0         0 $next_speciation = -log($r1) / $options{birth_rate} / $tree_size;
1335             }
1336 0 0       0 if ( defined $options{death_rate} ) {
1337 0         0 $next_extinction = -log($r2) / $options{death_rate} / $tree_size;
1338             }
1339             else {
1340 0         0 $next_extinction = $next_speciation + 1;
1341             }
1342             }
1343 0         0 return $tree;
1344             }
1345              
1346              
1347             =item constant_rate_birth_death()
1348              
1349             A temporal shift birth and death model
1350              
1351             Type : Evolutionary model
1352             Title : temporal_shift_birth_death
1353             Usage : $tree = constant_rate_birth_death(%options)
1354             Function: Produces a tree from the model terminating at a given size/time
1355             Returns : Bio::Phylo::Forest::Tree
1356             Args : %options with fields:
1357             birth_rates The birth rates
1358             death_rates The death rates
1359             rate_times The times after which the rates apply (first element must be 0)
1360             tree_size The size of the tree at which to terminate
1361             tree_age The age of the tree at which to terminate
1362              
1363             NB: At least one of tree_size and tree_age must be specified
1364              
1365             =cut
1366              
1367             sub temporal_shift_birth_death {
1368 0     0 0 0 my %options = @_;
1369              
1370             #Check that we have a termination condition
1371 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1372              
1373             #Error here.
1374 0         0 return undef;
1375             }
1376              
1377             #Set the undefined condition to infinity
1378 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1379 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1380              
1381             #Each node gets an ID number this tracks these
1382 0         0 my $node_id = 0;
1383              
1384             #Create a new tree with a root, start the list of terminal species
1385 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1386 0         0 my $root = Bio::Phylo::Forest::Node->new();
1387 0         0 $root->set_branch_length(0);
1388 0         0 $root->set_name( 'ID' . $node_id++ );
1389 0         0 $tree->insert($root);
1390 0         0 my @terminals = ($root);
1391 0         0 my ( $next_extinction, $next_speciation );
1392 0         0 my $time = 0;
1393 0         0 my $tree_size = 1;
1394              
1395             #Load current rates
1396 0         0 my $birth_rate = $options{birth_rates}[0];
1397 0         0 my $death_rate = $options{death_rates}[0];
1398 0         0 my $current_rates = 0;
1399 0         0 my $next_rate_change = $options{rate_times}[$current_rates+1];
1400            
1401             #Add an additional time to the end of the rate change times to simplify checking
1402 0         0 push(@{$options{rate_times}},$options{tree_size}*2);
  0         0  
1403              
1404             #Check whether we have a non-zero root edge
1405 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1406              
1407             #Non-zero root. We set the time to the first speciation event
1408 0         0 $next_speciation = -log(rand) / $birth_rate / $tree_size;
1409             }
1410             else {
1411              
1412             #Zero root, we want a speciation event straight away
1413 0         0 $next_speciation = 0;
1414             }
1415              
1416             # print "RATES:".$time."|".$birth_rate."|".$death_rate."\n";
1417             #Time of the first extinction event. If no extinction we always
1418             #set the extinction event after the current speciation event
1419 0         0 $next_extinction = -log(rand) / $death_rate / $tree_size;
1420              
1421             #While the tree has not become extinct and the termination criterion
1422             #has not been achieved we create new speciation and extinction events
1423 0   0     0 while ($tree_size > 0
      0        
1424             && $tree_size < $options{tree_size}
1425             && $time < $options{tree_age} )
1426             {
1427              
1428             # print "TIMES:".$next_extinction."|".$next_speciation."|".$next_rate_change."|\n";
1429             # print "RATES:".$time."|".$birth_rate."|".$death_rate."\n";
1430             #Add the time since the last event to all terminal species
1431 0         0 foreach (@terminals) {
1432             $_->set_branch_length(
1433             $_->get_branch_length + min(
1434             $next_extinction, $next_speciation,
1435 0         0 $options{tree_age} - $time
1436             )
1437             );
1438             }
1439              
1440             #Update the time
1441 0         0 my $time_last = $time;
1442 0         0 $time += min( $next_extinction, $next_speciation, $next_rate_change-$time_last);
1443              
1444             #If the tree exceeds the time limit we are done
1445 0 0       0 return $tree if ( $time > $options{tree_age} );
1446              
1447            
1448 0 0       0 if ($next_rate_change-$time_last < min( $next_extinction, $next_speciation) )
1449             {
1450 0         0 $current_rates += 1;
1451 0         0 $birth_rate = $options{birth_rates}[$current_rates];
1452 0         0 $death_rate = $options{death_rates}[$current_rates];
1453 0         0 $next_rate_change = $options{rate_times}[$current_rates+1];
1454            
1455             } else
1456             {
1457            
1458              
1459             #Get the species effected by this event and remove it from the terminals list
1460 0         0 my $effected = splice( @terminals, int( rand( scalar @terminals ) ), 1 );
1461              
1462             #If we have a speciation event we add two new species
1463 0 0 0     0 if ( $next_speciation < $next_extinction || !defined $next_extinction )
1464             {
1465 0         0 foreach ( 1, 2 ) {
1466              
1467             #Create a new species
1468 0         0 my $child = Bio::Phylo::Forest::Node->new();
1469 0         0 $child->set_name( 'ID' . $node_id++ );
1470              
1471             #Give it a zero edge length
1472 0         0 $child->set_branch_length(0);
1473              
1474             #Add it as a child to the speciating species
1475 0         0 $effected->set_child($child);
1476              
1477             #Add it to the tree
1478 0         0 $tree->insert($child);
1479              
1480             #Add it to the terminals list
1481 0         0 push( @terminals, $child );
1482             }
1483             }
1484             }
1485              
1486             #We calculate the time that the next extinction and speciation
1487             #events will occur (only the earliest of these will actually
1488             #happen). NB: this approach is only appropriate for models where
1489             #speciation and extinction times are exponentially distributed.
1490             #Windows sometimes returns 0 values for rand...
1491 0         0 my ( $r1, $r2 ) = ( 0, 0 );
1492 0         0 $r1 = rand until $r1;
1493 0         0 $r2 = rand until $r2;
1494 0         0 $tree_size = scalar @terminals;
1495 0 0       0 return $tree unless $tree_size;
1496 0         0 $next_speciation = -log($r1) / $birth_rate / $tree_size;
1497 0         0 $next_extinction = -log($r2) / $death_rate / $tree_size;
1498            
1499 0 0       0 if ((scalar @terminals)%100==0)
1500             {
1501 0         0 print $time."|".@terminals."|\n";
1502             }
1503              
1504             }
1505 0         0 return $tree;
1506             }
1507              
1508             =item evolving_speciation_rate()
1509              
1510             An evolutionary model featuring evolving speciation rates. Each daughter
1511             species is assigned its parent's speciation rate multiplied by a normally
1512             distributed noise factor.
1513              
1514             Type : Evolutionary model
1515             Title : evolving_speciation_rate
1516             Usage : $tree = evolving_speciation_rate(%options)
1517             Function: Produces a tree from the model terminating at a given size/time
1518             Returns : Bio::Phylo::Forest::Tree
1519             Args : %options with fields:
1520             birth_rate The initial speciation rate (default 1)
1521             evolving_std The standard deviation of the normal distribution
1522             from which the rate multiplier is drawn.
1523             tree_size The size of the tree at which to terminate
1524             tree_age The age of the tree at which to terminate
1525              
1526             NB: At least one of tree_size and tree_age must be specified
1527              
1528             =cut
1529              
1530             sub evolving_speciation_rate {
1531 0     0 1 0 my %options = @_;
1532              
1533             #Check that we have a termination condition
1534 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1535              
1536             #Error here.
1537 0         0 return undef;
1538             }
1539              
1540             #Set the undefined condition to infinity
1541 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1542 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1543              
1544             #Set default rates
1545 0 0       0 $options{birth_rate} = 1 unless defined( $options{birth_rate} );
1546 0 0       0 $options{evolving_std} = 1 unless defined( $options{evolving_std} );
1547              
1548             #Each node gets an ID number this tracks these
1549 0         0 my $node_id = 0;
1550              
1551             #Create a new tree with a root, start the list of terminal species
1552 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1553 0         0 my $root = Bio::Phylo::Forest::Node->new();
1554 0         0 $root->set_branch_length(0);
1555 0         0 $root->set_name( 'ID' . $node_id++ );
1556 0         0 $tree->insert($root);
1557 0         0 my @terminals = ($root);
1558 0         0 my @birth_rates = ( $options{birth_rate} );
1559 0         0 my $net_rate = $options{birth_rate};
1560 0         0 my $next_speciation;
1561 0         0 my $time = 0;
1562 0         0 my $tree_size = 1;
1563              
1564             #Check whether we have a non-zero root edge
1565 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1566              
1567             #Non-zero root. We set the time to the first speciation event
1568 0         0 $next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
1569             }
1570             else {
1571              
1572             #Zero root, we want a speciation event straight away
1573 0         0 $next_speciation = 0;
1574             }
1575              
1576             #While we haven't reached termination
1577 0   0     0 while ( $tree_size < $options{tree_size} && $time < $options{tree_age} ) {
1578              
1579             #Add the time since the last event to all terminal species
1580 0         0 foreach (@terminals) {
1581             $_->set_branch_length( $_->get_branch_length +
1582 0         0 min( $next_speciation, $options{tree_age} - $time ) );
1583             }
1584              
1585             #Update the time
1586 0         0 $time += $next_speciation;
1587              
1588             #If the tree exceeds the time limit we are done
1589 0 0       0 return $tree if ( $time > $options{tree_age} );
1590              
1591             #Get the species effected by this event
1592 0         0 my $rand_select = rand($net_rate);
1593 0         0 my $selected = 0;
1594 0   0     0 for (
1595             ;
1596             $selected < scalar @terminals
1597             && $rand_select > $birth_rates[$selected] ;
1598             $selected++
1599             )
1600             {
1601 0         0 $rand_select -= $birth_rates[$selected];
1602             }
1603              
1604             #Remove it from the terminals list
1605 0         0 my $effected = splice( @terminals, $selected, 1 );
1606 0         0 my $effected_rate = splice( @birth_rates, $selected, 1 );
1607              
1608             #Update the net speciation rate
1609 0         0 $net_rate -= $effected_rate;
1610              
1611             #If we have a speciation event we add two new species
1612 0         0 foreach ( 1, 2 ) {
1613              
1614             #Create a new species
1615 0         0 my $child = Bio::Phylo::Forest::Node->new();
1616 0         0 $child->set_name( 'ID' . $node_id++ );
1617              
1618             #Give it a zero edge length
1619 0         0 $child->set_branch_length(0);
1620              
1621             #Add it as a child to the speciating species
1622 0         0 $effected->set_child($child);
1623              
1624             #Add it to the tree
1625 0         0 $tree->insert($child);
1626              
1627             #Add it to the terminals list
1628 0         0 push( @terminals, $child );
1629              
1630             #New speciation rate
1631             my $new_speciation_rate =
1632 0         0 $effected_rate * ( 1 + qnorm(rand) * $options{evolving_std} );
1633 0 0       0 if ( $new_speciation_rate < 0 ) { $new_speciation_rate = 0; }
  0         0  
1634 0         0 push( @birth_rates, $new_speciation_rate );
1635 0         0 $net_rate += $new_speciation_rate;
1636             }
1637              
1638             # $net_rate = 0;
1639             # foreach (@birth_rates) { $net_rate += $_; }
1640             #Windows sometimes returns 0 values for rand...
1641 0         0 my ( $r1, $r2 ) = ( 0, 0 );
1642 0         0 $r1 = rand until $r1;
1643 0         0 $tree_size = scalar @terminals;
1644              
1645             #If all species have stopped speciating (unlikely)
1646 0 0       0 if ( $net_rate == 0 ) {
1647 0         0 return $tree;
1648             }
1649 0         0 $next_speciation = -log($r1) / $net_rate / $tree_size;
1650 0 0       0 return $tree unless $tree_size;
1651             }
1652 0         0 return $tree;
1653             }
1654              
1655              
1656             =item clade_shifts()
1657              
1658             A constant rate birth-death model with punctuated changes in the speciation
1659             and extinction rates. At each change one lineage receives new pre-specified
1660             speciation and extinction rates.
1661              
1662             Type : Evolutionary model
1663             Title : clade_shifts
1664             Usage : $tree = clade_shifts(%options)
1665             Function: Produces a tree from the model terminating at a given size/time
1666             Returns : Bio::Phylo::Forest::Tree
1667             Args : %options with fields:
1668             birth_rates The speciation rates
1669             death_rates The death rates
1670             rate_times The times at which the rates are introduced to a new
1671             clade. The first time should be zero. The remaining must be in
1672             ascending order.
1673             tree_size The size of the tree at which to terminate
1674             tree_age The age of the tree at which to terminate
1675              
1676             NB: At least one of tree_size and tree_age must be specified
1677              
1678             =cut
1679              
1680             sub clade_shifts {
1681 0     0 1 0 my %options = @_;
1682              
1683             #Check that we have a termination condition
1684 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1685              
1686             #Error here.
1687 0         0 return undef;
1688             }
1689              
1690             #Set the undefined condition to infinity
1691 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1692 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1693              
1694             #Each node gets an ID number this tracks these
1695 0         0 my $node_id = 0;
1696              
1697             #Create a new tree with a root, start the list of terminal species
1698 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1699 0         0 my $root = Bio::Phylo::Forest::Node->new();
1700 0         0 $root->set_branch_length(0);
1701 0         0 $root->set_name( 'ID' . $node_id++ );
1702 0         0 $tree->insert($root);
1703            
1704             #rates
1705 0         0 my @birth_rates_in = @{$options{birth_rates}};
  0         0  
1706 0         0 my @death_rates_in = @{$options{death_rates}};
  0         0  
1707 0         0 my @rate_times_in = @{$options{rate_times}};
  0         0  
1708            
1709 0 0       0 if ($rate_times_in[0] != 0)
1710             {
1711 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
1712             "The first rate time must be 0" );
1713             }
1714 0 0       0 if (scalar @birth_rates_in != scalar @death_rates_in)
1715             {
1716 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
1717             "birth and death rates must have the same length" );
1718             }
1719 0 0       0 if (scalar @birth_rates_in != scalar @rate_times_in)
1720             {
1721 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
1722             "birth/death rates must have the same length as rate times" );
1723             }
1724 0         0 my @birth_rates = ($birth_rates_in[0]);
1725 0         0 my @death_rates = ($death_rates_in[0]);
1726            
1727 0         0 my $net_birth_rate = $birth_rates[0];
1728 0         0 my $net_death_rate = $death_rates[0];
1729            
1730 0         0 shift(@birth_rates_in);
1731 0         0 shift(@death_rates_in);
1732            
1733 0         0 my @terminals = ($root);
1734 0         0 my ( $next_extinction, $next_speciation, $next_rate_change );
1735 0         0 my $time = 0;
1736 0         0 my $tree_size = 1;
1737 0         0 my $inf = 9**9**9**9;
1738              
1739             #Check whether we have a non-zero root edge
1740 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1741              
1742             #Non-zero root. We set the time to the first speciation event
1743 0 0       0 if ($birth_rates[0] > 0)
1744             {
1745 0         0 $next_speciation = -log(rand) / $birth_rates[0] ;
1746             } else
1747             {
1748 0         0 $next_speciation = $inf;
1749             }
1750             }
1751             else {
1752             #Zero root, we want a speciation event straight away
1753 0         0 $next_speciation = 0.0;
1754             }
1755              
1756             #Time of the first extinction event. If no extinction we always
1757             #set the extinction event after the current speciation event
1758 0 0       0 if ($death_rates[0] > 0)
1759             {
1760 0         0 $next_extinction = -log(rand) / $death_rates[0];
1761             } else
1762             {
1763 0         0 $next_extinction = $inf;
1764             }
1765            
1766             #Time of next rate change
1767 0         0 shift(@rate_times_in); #pop the initial 0
1768 0         0 $next_rate_change = shift(@rate_times_in);
1769            
1770             #While the tree has not become extinct and the termination criterion
1771             #has not been achieved we create new speciation and extinction events
1772 0   0     0 while ($tree_size > 0
      0        
1773             && $tree_size < $options{tree_size}
1774             && $time < $options{tree_age} )
1775             {
1776             #print $time."|".$tree_size."\n";
1777             #Update rates if a clade shift is happening
1778             #TODO index rates or pop off one at a time.
1779            
1780              
1781             #Add the time since the last event to all terminal species
1782 0         0 foreach (@terminals) {
1783             $_->set_branch_length(
1784             $_->get_branch_length + min(
1785             $next_extinction,
1786             $next_speciation,
1787             $next_rate_change-$time,
1788 0         0 $options{tree_age} - $time
1789             )
1790             );
1791             }
1792              
1793             #Update the time
1794 0         0 my $time_last = $time;
1795 0         0 $time += min( $next_extinction, $next_speciation, $next_rate_change-$time_last );
1796              
1797             #If the tree exceeds the time limit we are done
1798 0 0       0 return $tree if ( $time > $options{tree_age} );
1799            
1800             #We have a rate change
1801 0 0       0 if ($next_rate_change-$time_last < min($next_extinction, $next_speciation))
1802             {
1803             #Find a random species to effect
1804 0         0 my $effected_species = int(rand($tree_size));
1805             #Subtract current rates
1806 0         0 $net_death_rate -= $death_rates[$effected_species];
1807 0         0 $net_birth_rate -= $birth_rates[$effected_species];
1808             #Get new rates
1809 0         0 $death_rates[$effected_species] = shift(@death_rates_in);
1810 0         0 $birth_rates[$effected_species] = shift(@birth_rates_in);
1811             #Add new rates
1812 0         0 $net_death_rate += $death_rates[$effected_species];
1813 0         0 $net_birth_rate += $birth_rates[$effected_species];
1814             #Get next rate change time
1815 0 0       0 if (scalar(@rate_times_in))
1816             {
1817 0         0 $next_rate_change = shift(@rate_times_in);
1818             } else
1819             {
1820 0         0 $next_rate_change = $inf;
1821             }
1822             }
1823             #Choosing a random species to speciate
1824             else
1825             {
1826 0         0 my $selected = 0;
1827 0 0       0 if ( $next_speciation < $next_extinction )
1828             {
1829 0         0 my $rand_select = rand($net_birth_rate);
1830 0   0     0 for (
1831             ;
1832             $selected < scalar @terminals
1833             && $rand_select > $birth_rates[$selected] ;
1834             $selected++
1835             )
1836             {
1837 0         0 $rand_select -= $birth_rates[$selected];
1838             }
1839             } else
1840             {
1841 0         0 my $rand_select = rand($net_death_rate);
1842 0   0     0 for (
1843             ;
1844             $selected < scalar @terminals
1845             && $rand_select > $death_rates[$selected] ;
1846             $selected++
1847             )
1848             {
1849 0         0 $rand_select -= $death_rates[$selected];
1850             }
1851             }
1852 0 0       0 if ($net_birth_rate == 0)
1853             {
1854 0         0 $selected = 0;
1855             }
1856              
1857             #Remove the species effected by this event and remove it from the terminals list
1858 0         0 my $effected = splice( @terminals, $selected, 1 );
1859 0         0 my $effected_birth_rate = splice( @birth_rates, $selected, 1 );
1860 0         0 my $effected_death_rate = splice( @death_rates, $selected, 1 );
1861            
1862 0         0 $net_birth_rate -= $effected_birth_rate;
1863 0         0 $net_death_rate -= $effected_death_rate;
1864              
1865             #If we have a speciation event we add two new species
1866 0 0 0     0 if ( $next_speciation < $next_extinction || !defined $next_extinction )
1867             {
1868 0         0 foreach ( 1, 2 ) {
1869              
1870             #Create a new species
1871 0         0 my $child = Bio::Phylo::Forest::Node->new();
1872 0         0 $child->set_name( 'ID' . $node_id++ );
1873              
1874             #Give it a zero edge length
1875 0         0 $child->set_branch_length(0);
1876              
1877             #Add it as a child to the speciating species
1878 0         0 $effected->set_child($child);
1879              
1880             #Add it to the tree
1881 0         0 $tree->insert($child);
1882              
1883             #Add it to the terminals list
1884 0         0 push( @terminals, $child );
1885            
1886 0         0 push( @birth_rates, $effected_birth_rate );
1887 0         0 push( @death_rates, $effected_death_rate );
1888            
1889 0         0 $net_death_rate += $effected_death_rate;
1890 0         0 $net_birth_rate += $effected_birth_rate;
1891            
1892             }
1893             }
1894             }
1895             #We calculate the time that the next extinction and speciation
1896             #events will occur (only the earliest of these will actually
1897             #happen). NB: this approach is only appropriate for models where
1898             #speciation and extinction times are exponentially distributed.
1899            
1900             #Windows sometimes returns 0 values for rand...
1901 0         0 my ( $r1, $r2 ) = ( 0, 0 );
1902 0         0 $r1 = rand until $r1;
1903 0         0 $r2 = rand until $r2;
1904            
1905             #The current tree size
1906 0         0 $tree_size = scalar @terminals;
1907            
1908 0 0       0 return $tree unless $tree_size;
1909            
1910 0 0       0 if ($net_birth_rate > 0)
1911             {
1912 0         0 $next_speciation = -log($r1) / $net_birth_rate;
1913             } else
1914             {
1915 0         0 $next_speciation = $inf;
1916             }
1917            
1918 0 0       0 if ($net_death_rate > 0)
1919             {
1920 0         0 $next_extinction = -log($r2) / $net_death_rate;
1921             } else
1922             {
1923 0         0 $next_extinction = $inf;
1924             }
1925              
1926             }
1927 0         0 return $tree;
1928             }
1929              
1930             =item beta_binomial()
1931              
1932             An evolutionary model featuring evolving speciation rates. From Blum2007
1933              
1934             Type : Evolutionary model
1935             Title : beta_binomial
1936             Usage : $tree = beta_binomial(%options)
1937             Function: Produces a tree from the model terminating at a given size/time
1938             Returns : Bio::Phylo::Forest::Tree
1939             Args : %options with fields:
1940             birth_rate The initial speciation rate (default 1)
1941             model_param The parameter as defined in Blum2007
1942             tree_size The size of the tree at which to terminate
1943             tree_age The age of the tree at which to terminate
1944              
1945             NB: At least one of tree_size and tree_age must be specified
1946              
1947             =cut
1948              
1949             sub beta_binomial {
1950 0     0 1 0 my %options = @_;
1951              
1952             #Check that we have a termination condition
1953 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1954              
1955             #Error here.
1956 0         0 return undef;
1957             }
1958              
1959             #Set the undefined condition to infinity
1960 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1961 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1962              
1963             #Set default rates
1964 0 0       0 $options{birth_rate} = 1 unless defined( $options{birth_rate} );
1965 0 0       0 $options{model_param} = 0 unless defined( $options{model_param} );
1966              
1967             #Each node gets an ID number this tracks these
1968 0         0 my $node_id = 0;
1969              
1970             #Create a new tree with a root, start the list of terminal species
1971 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1972 0         0 my $root = Bio::Phylo::Forest::Node->new();
1973 0         0 $root->set_branch_length(0);
1974 0         0 $root->set_name( 'ID' . $node_id++ );
1975 0         0 $tree->insert($root);
1976 0         0 my @terminals = ($root);
1977 0         0 my @birth_rates = ( $options{birth_rate} );
1978 0         0 my $net_rate = $options{birth_rate};
1979 0         0 my $next_speciation;
1980 0         0 my $time = 0;
1981 0         0 my $tree_size = 1;
1982              
1983             #Check whether we have a non-zero root edge
1984 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1985              
1986             #Non-zero root. We set the time to the first speciation event
1987 0         0 $next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
1988             }
1989             else {
1990              
1991             #Zero root, we want a speciation event straight away
1992 0         0 $next_speciation = 0;
1993             }
1994              
1995             #While we haven't reached termination
1996 0   0     0 while ( $tree_size < $options{tree_size} && $time < $options{tree_age} ) {
1997              
1998             #Add the time since the last event to all terminal species
1999 0         0 foreach (@terminals) {
2000             $_->set_branch_length( $_->get_branch_length +
2001 0         0 min( $next_speciation, $options{tree_age} - $time ) );
2002             }
2003              
2004             #Update the time
2005 0         0 $time += $next_speciation;
2006              
2007             #If the tree exceeds the time limit we are done
2008 0 0       0 return $tree if ( $time > $options{tree_age} );
2009              
2010             #Get the species effected by this event
2011 0         0 my $rand_select = rand($net_rate);
2012 0         0 my $selected = 0;
2013 0   0     0 for (
2014             ;
2015             $selected < scalar @terminals
2016             && $rand_select > $birth_rates[$selected] ;
2017             $selected++
2018             )
2019             {
2020 0         0 $rand_select -= $birth_rates[$selected];
2021             }
2022              
2023             #Remove it from the terminals list
2024 0         0 my $effected = splice( @terminals, $selected, 1 );
2025 0         0 my $effected_rate = splice( @birth_rates, $selected, 1 );
2026             my $p =
2027 0         0 qbeta( rand, $options{model_param} + 1, $options{model_param} + 1 );
2028              
2029             #If we have a speciation event we add two new species
2030 0         0 foreach ( 1, 2 ) {
2031              
2032             #Create a new species
2033 0         0 my $child = Bio::Phylo::Forest::Node->new();
2034 0         0 $child->set_name( 'ID' . $node_id++ );
2035              
2036             #Give it a zero edge length
2037 0         0 $child->set_branch_length(0);
2038              
2039             #Add it as a child to the speciating species
2040 0         0 $effected->set_child($child);
2041              
2042             #Add it to the tree
2043 0         0 $tree->insert($child);
2044              
2045             #Add it to the terminals list
2046 0         0 push( @terminals, $child );
2047              
2048             #New speciation rate
2049 0         0 my $new_speciation_rate = $effected_rate * $p;
2050 0         0 $p = 1 - $p;
2051 0 0       0 if ( $new_speciation_rate < 0 ) { $new_speciation_rate = 0; }
  0         0  
2052 0         0 push( @birth_rates, $new_speciation_rate );
2053             }
2054              
2055             #Windows sometimes returns 0 values for rand...
2056 0         0 my ( $r1, $r2 ) = ( 0, 0 );
2057 0         0 $r1 = rand until $r1;
2058 0         0 $tree_size = scalar @terminals;
2059              
2060             #If all species have stopped speciating (unlikely)
2061 0 0       0 if ( $net_rate == 0 ) {
2062 0         0 return $tree;
2063             }
2064 0         0 $next_speciation = -log($r1) / $net_rate / $tree_size;
2065 0 0       0 return $tree unless $tree_size;
2066             }
2067 0         0 return $tree;
2068             }
2069              
2070             =back
2071              
2072             =cut
2073              
2074             =begin comment
2075              
2076             ###########################################################
2077             #INTERNAL METHODS
2078             #These are methods that permit additional manipulations of
2079             #a Bio::Phylo::Tree to be easily made. As such some of
2080             #these could easily be moved into Bio::Phylo::Forest::Tree
2081             ###########################################################
2082              
2083             =end comment
2084              
2085             =cut
2086              
2087             =begin comment
2088              
2089             Type : Internal method
2090             Title : copy_tree
2091             Usage : $tree = copy_tree($tree)
2092             Function: Makes a new independent copy of a tree
2093             Returns : the phylogenetic $tree
2094             Args : the phylogenetic $tree
2095              
2096             =end comment
2097              
2098             =cut
2099              
2100             sub copy_tree {
2101 15     15 0 131 return Bio::Phylo::IO->parse(
2102             -format => 'newick',
2103             -string => shift->to_newick( '-nodelabels' => 1 )
2104             )->first;
2105             }
2106              
2107             =begin comment
2108              
2109             Type : Internal method
2110             Title : truncate_tree_time
2111             Usage : truncate_tree_time($tree,$age)
2112             Function: Truncates the tree at the specified age
2113             Returns : N/A
2114             Args : $tree: the phylogenetic tree (which will be modified)
2115             $age: the age at which to cut the $tree
2116              
2117             =end comment
2118              
2119             =cut
2120              
2121             sub truncate_tree_time {
2122              
2123             #$node and $time are used only by this function recursively
2124 843     843 0 1612 my ( $tree, $age, $node, $time ) = @_;
2125              
2126             #If node and time weren't specified we are starting from the root
2127 843 100       1727 $node = $tree->get_root unless defined $node;
2128 843 100       1326 $time = 0 unless defined $time;
2129              
2130             #If we are truncating this branch
2131 843 100       1434 if ( $time + $node->get_branch_length >= $age ) {
2132              
2133             #Collapse the node unless it is terminal
2134 252 100       568 $node->collapse unless $node->is_terminal();
2135              
2136             #Set the branch length appropriately
2137 252         840 $node->set_branch_length( $age - $time );
2138 252         743 return;
2139             }
2140              
2141             #If this node has no children we are done
2142 591 100       1273 return if $node->is_terminal();
2143              
2144             #Call the function recursively on the children
2145 361         506 foreach ( @{ $node->get_children } ) {
  361         593  
2146 828         1791 truncate_tree_time( $tree, $age, $_,
2147             $time + $node->get_branch_length() );
2148             }
2149             }
2150              
2151             =begin comment
2152              
2153             Type : Internal method
2154             Title : truncate_tree_size
2155             Usage : truncate_tree_size($tree,$size)
2156             Function: Truncates the tree to the specified number of species
2157             Returns : N/A
2158             Args : $tree: the phylogenetic tree (which will be modified)
2159             $size: random species are delete so that the tree has this many terminals
2160              
2161             =end comment
2162              
2163             =cut
2164              
2165             sub truncate_tree_size {
2166 5     5 0 19 my ( $tree, $size ) = @_;
2167 5         11 my @terminals = @{ $tree->get_terminals };
  5         26  
2168 5         25 my @names;
2169              
2170             #Calculate the tree height and node distances from the root
2171             #much more efficient to do this in one hit than repeatedly
2172             #calling the analogous functions on the tree
2173 5         29 _calc_node_properties($tree);
2174              
2175             #Only push species that are extant and store the number of those
2176             # my $tree_height = $tree->get_tallest_tip->calc_path_to_root;
2177 5         21 my $tree_height = $tree->get_root->get_generic('tree_height');
2178 5         20 foreach (@terminals) {
2179 567 100       1021 if (
2180             abs(
2181             ( $_->get_generic('root_distance') - $tree_height ) /
2182             $tree_height
2183             ) < 1e-6
2184             )
2185             {
2186 145         302 push( @names, $_->get_name );
2187             }
2188             }
2189 5 50       23 if ( @names < $size ) { print "Internal error\n"; }
  0         0  
2190 5         13 my %deletions;
2191 5         27 while ( scalar( keys %deletions ) < @names - $size ) {
2192 155         371 $deletions{ $names[ int( rand(@names) ) ] } = 1;
2193             }
2194 5         50 $tree = prune_tips( $tree, [ keys %deletions ] );
2195 5         143 return $tree;
2196             }
2197              
2198             sub _get_ultrametric_size {
2199 0     0   0 my ( $tree, $size ) = @_;
2200 0         0 my @terminals = @{ $tree->get_terminals };
  0         0  
2201 0         0 _calc_node_properties($tree);
2202 0         0 my @names;
2203 0         0 my $tree_height = $tree->get_root->get_generic('tree_height');
2204 0         0 foreach (@terminals) {
2205 0 0       0 if (
2206             abs(
2207             ( $_->get_generic('root_distance') - $tree_height ) /
2208             $tree_height
2209             ) < 1e-6
2210             )
2211             {
2212 0         0 push( @names, $_->get_name );
2213             }
2214             }
2215 0         0 return scalar @names;
2216             }
2217              
2218             =begin comment
2219              
2220             Type : Internal method
2221             Title : remove_extinct_species
2222             Usage : remove_extinct_species($tree)
2223             Function: Removes extinct species from the tree. An extinct species
2224             is a terminal that does not extend as far as the furthest
2225             terminal(s).
2226             Returns : N/A
2227             Args : $tree: the phylogenetic tree (which will be modified)
2228             $age: the age at which to cut the $tree
2229              
2230             =end comment
2231              
2232             =cut
2233              
2234             sub remove_extinct_species {
2235 0     0 0 0 my $tree = shift;
2236              
2237             #Calculate the tree height and node distances from the root
2238             #much more efficient to do this in one hit than repeatedly
2239             #calling the analogous functions on the tree
2240 0         0 _calc_node_properties($tree);
2241 0         0 my $height = $tree->get_root->get_generic('tree_height');
2242 0 0       0 return unless $height > 0;
2243 0         0 my $leaves = $tree->get_terminals;
2244 0 0       0 return unless $leaves;
2245 0         0 my @remove;
2246 0         0 foreach ( @{$leaves} ) {
  0         0  
2247              
2248 0 0       0 unless (
2249             abs( ( $_->get_generic('root_distance') - $height ) / $height ) <
2250             1e-6 )
2251             {
2252 0         0 push( @remove, $_->get_name );
2253             }
2254             }
2255 0         0 $tree = prune_tips( $tree, \@remove );
2256 0         0 return $tree;
2257             }
2258              
2259             =begin comment
2260              
2261             Type : Internal method
2262             Title : prune_tips
2263             Usage : prune_tips($tree,$tips)
2264             Function: Removes named terminals from the tree
2265             Returns : N/A
2266             Args : $tree: the phylogenetic tree (which will be modified)
2267             $tips: array ref of terminals to remove from the tree
2268              
2269             NB: Available as $tree->prune_tips($tips), but had some problems with
2270             this.
2271              
2272             =end comment
2273              
2274             =cut
2275              
2276             sub prune_tips {
2277 5     5 0 17 my ( $self, $tips ) = @_;
2278 5         13 my %names_to_delete = map { $_ => 1 } @{$tips};
  95         142  
  5         14  
2279 472         726 my %keep = map { $_->get_name => 1 }
2280 567         1015 grep { not exists $names_to_delete{ $_->get_name } }
2281 5         16 @{ $self->get_terminals };
  5         45  
2282             $self->visit_depth_first(
2283             -post => sub {
2284 1077     1077   1616 my $node = shift;
2285 1077 100       2163 if ( $node->is_terminal ) {
2286 567 100       1252 if ( not $keep{ $node->get_name } ) {
2287 95         293 $node->set_parent();
2288 95         304 $self->delete($node);
2289             }
2290             }
2291             else {
2292 510         801 my $seen_tip_to_keep = 0;
2293 510         656 for my $tip ( @{ $node->get_terminals } ) {
  510         1232  
2294 9248 100       16534 $seen_tip_to_keep++ if $keep{ $tip->get_name };
2295             }
2296 510 100       2472 if ( not $seen_tip_to_keep ) {
2297 26         84 $node->set_parent();
2298 26         78 $self->delete($node);
2299             }
2300             }
2301             }
2302 5         116 );
2303 5         120 $self->remove_unbranched_internals;
2304 5         202 return $self;
2305             }
2306              
2307             =begin comment
2308              
2309             Type : Internal method
2310             Title : lineage_through_time
2311             Usage : ($time,$count) = lineage_through_time($tree)
2312             Function: Alternative to $tree->ltt that permits extinctions
2313             Returns : $time: array ref of times
2314             $count: array ref of species counts corresponding to the times
2315             Args : the phylogenetic $tree for which to produce the ltt data
2316              
2317             =end comment
2318              
2319             =cut
2320              
2321             sub lineage_through_time {
2322 89     89 0 180 my $tree = shift;
2323 89         253 my ( $speciation, $extinction ) = _recursive_ltt_helper($tree);
2324 89         189 my @speciation = sort { $a <=> $b } @{$speciation};
  13358         14943  
  89         555  
2325 89         187 my @extinction = sort { $a <=> $b } @{$extinction};
  14123         15581  
  89         230  
2326 89         209 my @time = (0);
2327 89         194 my @count = (1);
2328 89         165 my $n_species = 1;
2329 89         383 my $end_time = max( @speciation, @extinction );
2330 89 50       255 return ( [], [] ) if ( $end_time == 0 );
2331              
2332             #We remove any extinction events occurring at the very end of the tree (as they are not real extinctions)
2333 89   100     543 while ( scalar @extinction
2334             && ( $end_time - $extinction[-1] ) / $end_time < 1e-6 )
2335             {
2336 998         2235 pop @extinction;
2337             }
2338 89   100     313 while ( scalar @speciation || scalar @extinction ) {
2339 5321 100 100     14949 if ( scalar @extinction == 0
      100        
2340             || ( scalar @speciation && $speciation[0] < $extinction[0] ) )
2341             {
2342 3115         4185 push( @count, ++$n_species );
2343 3115         6025 push( @time, shift(@speciation) );
2344             }
2345             else {
2346 2206         2895 push( @count, --$n_species );
2347 2206         4136 push( @time, shift(@extinction) );
2348             }
2349             }
2350 89         494 return ( \@time, \@count );
2351             }
2352              
2353             =begin comment
2354              
2355             Type : Internal method
2356             Title : _recursive_ltt_helper
2357             Usage : ($speciation, $extinction) = _recursive_ltt_helper($tree)
2358             Function: Helper for lineage_through_time
2359             Returns : $speciation: array ref of speciation times
2360             $extinction: array ref of extinction times
2361             Args : the phylogenetic $tree for which to produce the ltt data
2362              
2363             =end comment
2364              
2365             =cut
2366              
2367             sub _recursive_ltt_helper {
2368 6319     6319   9351 my ( $tree, $node, $time ) = @_;
2369              
2370             #If we are being invoked at the root level
2371 6319 100       10236 $node = $tree->get_root unless defined $node;
2372 6319 100       9820 $time = 0 unless defined $time;
2373              
2374             #The new time
2375 6319         12013 $time += $node->get_branch_length;
2376 6319 100       12204 return ( [], [$time] ) if ( $node->is_terminal );
2377 3115         5052 my @speciation;
2378             my @extinction;
2379 3115         3759 foreach ( @{ $node->get_children } ) {
  3115         5967  
2380 6230         11014 my ( $spec, $ext ) = _recursive_ltt_helper( $tree, $_, $time );
2381 6230         8713 @speciation = ( @speciation, @{$spec} );
  6230         12488  
2382 6230         7744 @extinction = ( @extinction, @{$ext} );
  6230         19913  
2383             }
2384 3115         5457 push( @speciation, $time );
2385 3115         5906 return ( \@speciation, \@extinction );
2386             }
2387              
2388             =begin comment
2389              
2390             Type : Internal method.
2391             Title : _calc_node_properties
2392             Usage : _calc_node_properties($tree);
2393             Function: Calculates the distance of nodes from the root
2394             Returns : The maximum distance from the root
2395             Args :
2396              
2397             =end comment
2398              
2399             =cut
2400              
2401             sub _calc_node_properties {
2402 1077     1077   1836 my ( $node, $root_distance );
2403 1077         1543 my $tree = shift;
2404 1077         2689 my $root = $tree->get_root;
2405              
2406             #Check whether we were given a node and distance
2407 1077 100       2390 if ( scalar @_ ) {
2408 1072         1603 $node = shift;
2409 1072         1455 $root_distance = shift;
2410              
2411             #Otherwise the root is the default
2412             }
2413             else {
2414 5         10 $node = $root;
2415 5         46 $root->set_generic( tree_height => 0 );
2416 5         10 $root_distance = 0;
2417             }
2418 1077         3531 $node->set_generic( root_distance => $root_distance );
2419 1077 100       2760 if ( $root_distance > $root->get_generic('tree_height') ) {
2420 105         285 $root->set_generic( tree_height => $root_distance );
2421             }
2422 1077         1627 my $terminal_count = 0;
2423 1077         2970 my $children = $node->get_children;
2424 1077 50       2419 if ( defined $children ) {
2425 1077         1289 foreach ( @{$children} ) {
  1077         3419  
2426 1072         2578 _calc_node_properties( $tree, $_,
2427             $root_distance + $_->get_branch_length() );
2428             }
2429             }
2430             }
2431              
2432             =begin comment
2433              
2434             Type : Internal method
2435             Title : nchoosek
2436             Usage : $out = nchoosek($n,$k)
2437             Function: Returns the binomial coefficient for $n and $k
2438             Returns : the binomial coefficient
2439             Args : $n, $k
2440              
2441             =end comment
2442              
2443             =cut
2444              
2445             sub nchoosek {
2446 0     0 0   my ( $n, $k ) = @_;
2447 0           my $r = 1;
2448 0 0 0       return 0 if ( $k > $n || $k < 0 );
2449 0           for ( my $d = 1 ; $d <= $k ; $d++ ) {
2450 0           $r *= $n--;
2451 0           $r /= $d;
2452             }
2453 0           return $r;
2454             }
2455             1;