File Coverage

blib/lib/Bio/Phylo/EvolutionaryModels.pm
Criterion Covered Total %
statement 397 820 48.4
branch 93 258 36.0
condition 37 143 25.8
subroutine 34 46 73.9
pod 13 21 61.9
total 574 1288 44.5


line stmt bran cond sub pod time code
1             package Bio::Phylo::EvolutionaryModels;
2 1     1   763 use strict;
  1         3  
  1         30  
3 1     1   4 use base 'Exporter';
  1         2  
  1         88  
4 1     1   282 use Bio::Phylo::Forest::Tree;
  1         3  
  1         8  
5 1     1   386 use Bio::Phylo::Forest;
  1         2  
  1         9  
6 1     1   5 use Math::CDF qw'qnorm qbeta';
  1         2  
  1         60  
7 1     1   6 use List::Util qw'min max';
  1         2  
  1         45  
8 1     1   316 use POSIX qw'ceil floor';
  1         4339  
  1         4  
9 1     1   1235 use Config; #Use to check whether multi-threading is available
  1         2  
  1         37  
10            
11             BEGIN {
12            
13             # set the version for version checking
14 1     1   5 use Bio::Phylo; our $VERSION = $Bio::Phylo::VERSION;
  1     1   2  
  1         4  
  1         4  
15 1         981 our @EXPORT_OK =
16             qw(&sample &constant_rate_birth &constant_rate_birth_death &clade_shifts);
17             }
18            
19             =head1 NAME
20            
21             Bio::Phylo::EvolutionaryModels - Evolutionary models for phylogenetic trees and methods to sample these
22             Klaas Hartmann, September 2007
23            
24             =head1 SYNOPSIS
25            
26             #For convenience we import the sample routine (so we can write sample(...) instead of
27             #Bio::Phylo::EvolutionaryModels::sample(...).
28             use Bio::Phylo::EvolutionaryModels qw (sample);
29            
30             #Example#A######################################################################
31             #Simulate a single tree with ten species from the constant rate birth model with parameter 0.5
32             my $tree = Bio::Phylo::EvolutionaryModels::constant_rate_birth(birth_rate => .5, tree_size => 10);
33            
34             #Example#B######################################################################
35             #Sample 5 trees with ten species from the constant rate birth model using the b algorithm
36             my ($sample,$stats) = sample(sample_size =>5,
37             tree_size => 10,
38             algorithm => 'b',
39             algorithm_options => {rate => 1},
40             model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth,
41             model_options => {birth_rate=>.5});
42            
43            
44             #Print a newick string for the 4th sampled tree
45             print $sample->[3]->to_newick."\n";
46            
47             #Example#C######################################################################
48             #Sample 5 trees with ten species from the constant rate birth and death model using
49             #the bd algorithm and two threads (useful for dual core processors)
50             #NB: we must specify an nstar here, an appropriate choice will depend on the birth_rate
51             # and death_rate we are giving the model
52            
53             my ($sample,$stats) = sample(sample_size =>5,
54             tree_size => 10,
55             threads => 2,
56             algorithm => 'bd',
57             algorithm_options => {rate => 1, nstar => 30},
58             model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth_death,
59             model_options => {birth_rate=>1,death_rate=>.8});
60            
61             #Example#D######################################################################
62             #Sample 5 trees with ten species from the constant rate birth and death model using
63             #incomplete taxon sampling
64             #
65             #sampling_probability is set so that the true tree has 10 species with 50% probability,
66             #11 species with 30% probability and 12 species with 20% probability
67             #
68             #NB: we must specify an mstar here this will depend on the model parameters and the
69             # incomplete taxon sampling parameters
70            
71             my $algorithm_options = {rate => 1,
72             nstar => 30,
73             mstar => 12,
74             sampling_probability => [.5, .3, .2]};
75            
76             my ($sample,$stats) = sample(sample_size =>5,
77             tree_size => 10,
78             algorithm => 'incomplete_sampling_bd',
79             algorithm_options => $algorithm_options,
80             model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth_death,
81             model_options => {birth_rate=>1,death_rate=>.8});
82            
83             #Example#E######################################################################
84             #Sample 5 trees with ten species from a Yule model using the memoryless_b algorithm
85            
86             #First we define the random function for the shortest pendant edge for a Yule model
87             my $random_pendant_function = sub {
88             %options = @_;
89             return -log(rand)/$options{birth_rate}/$options{tree_size};
90             };
91            
92             #Then we produce our sample
93             my ($sample,$stats) = sample(sample_size =>5,
94             tree_size => 10,
95             algorithm => 'memoryless_b',
96             algorithm_options => {pendant_dist => $random_pendant_function},
97             model => \&Bio::Phylo::EvolutionaryModels::constant_rate_birth,
98             model_options => {birth_rate=>1});
99            
100             #Example#F#######################################################################
101             #Sample 5 trees with ten species from a constant birth death rate model using the
102             #constant_rate_bd algorithm
103             my ($sample) = sample(sample_size => 5,
104             tree_size => 10,
105             algorithm => 'constant_rate_bd',
106             model_options => {birth_rate=>1,death_rate=>.8});
107            
108             =head1 DESCRIPTION
109            
110             This package contains evolutionary models for phylogenetic trees and
111             algorithms for sampling from these models. It is a non-OO module that
112             optionally exports the 'sample', 'constant_rate_birth' and
113             'constant_rate_birth_death' subroutines into the caller's namespace,
114             using the C<< use Bio::Phylo::EvolutionaryModels qw(sample constant_rate_birth constant_rate_birth_death); >>
115             directive. Alternatively, you can call the subroutines as class methods,
116             as in the synopsis.
117            
118             The initial set of algorithms available in this package corresponds to those in:
119            
120             Sampling trees from evolutionary models
121             Klaas Hartmann, Dennis Wong, Tanja Gernhard
122             Systematic Biology, in press
123            
124             Some comments and code refers back to this paper.
125             Further algorithms and evolutionary are encouraged
126             and welcome.
127            
128             To make this code as straightforward as possible to read some of the
129             algorithms have been implemented in a less than optimal manner. The code
130             also follows the structure of an earlier version of the manuscript so
131             there is some redundancy (eg. the birth algorithm is just a specific
132             instance of the birth_death algorithm)
133            
134             =head1 SAMPLING
135            
136             All sampling algorithms should be accessed through the generic sample
137             interface.
138            
139             =head2 Generic sampling interface: sample()
140            
141             Type : Interface
142             Title : sample
143             Usage : see SYNOPSIS
144             Function: Samples phylogenetic trees from an evolutionary model
145             Returns : A sample of phylogenetic trees and statistics from the
146             sampling algorithm
147             Args : Sampling parameters in a hash
148            
149             This method acts as a gateway to the various sampling algorithms. The
150             argument is a single hash containing the options for the sampling run.
151            
152            
153             Sampling parameters (* denotes optional parameters):
154            
155             sample_size The number of trees to return (more trees may be returned)
156             tree_size The size that returned trees should be
157             model The evolutionary model (should be a function reference)
158             model_options A hash pointer for model options (see individual models)
159             algorithm The algorithm to use (omit the preceding sample_)
160             algorithm_options A hash pointer for options for the algorithm (see individual algorithms for details)
161             threads* The number of threads to use (default is 1)
162             output_format* Set to newick for newick trees (default is Bio::Phylo::Forest::Tree)
163             remove_extinct Set to true to remove extinct species
164            
165            
166            
167             Available algorithms (algorithm names in the paper are given in brackets):
168            
169             b For all pure birth models (simplified GSA)
170             bd For all birth and death models (GSA)
171             incomplete_sampling_bd As above, with incomplete taxon sampling (extended GSA)
172             memoryless_b For memoryless pure birth models (PBMSA)
173             constant_rate_bd For birth and death models with constant rates (BDSA)
174            
175            
176             Model
177            
178             If you create your own model it must accept an options hash as its input.
179             This options hash can contain any parameters you desire. Your model should
180             simulate a tree until it becomes extinct or the size/age limit as specified
181             in the options has been reached. Respectively these options are tree_size
182             and tree_age.
183            
184            
185             Multi-threading
186            
187             Multi-thread support is very simplistic. The number of threads you specify
188             are created and each is assigned the task of finding sample_size/threads
189             samples. I had problems with using Bio::Phylo::Forest::Tree in a multi-
190             threaded setting. Hence the sampled trees are returned as newick strings to
191             the main routine where (if required) Tree objects are recreated from the
192             strings. For most applications this overhead seems negligible in contrast
193             to the sampling times.
194            
195            
196             From a code perspective this function (sample):
197            
198             Checks input arguments
199             Handles multi-threading
200             Calls the individual algorithms to perform sampling
201             Reformats data
202            
203             =cut
204            
205             my @threads;
206            
207             sub sample {
208 5     5 1 107 my %options = @_;
209 5         51 my %methods_require = (
210             b => ['rate'],
211             bd => [ 'rate', 'nstar' ],
212             incomplete_sampling_bd =>
213             [ 'rate', 'nstar', 'mstar', 'sampling_probability' ],
214             memoryless_b => ['pendant_dist'],
215             constant_rate_bd => [],
216             );
217            
218             #Default is to sample a single tree
219 5 50       27 $options{sample_size} = 1 unless defined $options{sample_size};
220            
221             #Default is to use a single thread
222 5 100       19 $options{threads} = 1 unless defined $options{threads};
223            
224             #Check that multiple threads are actually supported
225 5 50 33     19 if ( $options{threads} > 1 && !$Config{useithreads} ) {
226 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
227             "your perl installation does not support multiple threads" );
228             }
229            
230             #Check an algorithm was specified
231 5 50       16 unless ( defined $options{algorithm} ) {
232 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw(
233             'error' => "an algorithm type must be specified" );
234             }
235            
236             #Check the algorithm type is valid
237 5 50       15 unless ( defined $methods_require{ $options{algorithm} } ) {
238 0         0 Bio::Phylo::Util::Exceptions::BadFormat->throw(
239             'error' => "'$options{algorithm}' is not a valid algorithm" );
240             }
241            
242             #Check the algorithm options
243 5         10 foreach ( @{ $methods_require{ $options{algorithm} } } ) {
  5         14  
244 8 50       19 unless ( defined $options{algorithm_options}->{$_} ) {
245 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
246             "'$_' must be specified for the '$options{algorithm}' algorithm"
247             );
248             }
249             }
250            
251             #If we are doing incomplete taxon sampling the sampling probability must be specified
252 5 0 33     15 if ( defined $options{incomplete_sampling}
      0        
253             && $options{incomplete_sampling}
254             && !( defined $options{algorithm_options}->{sampling_probability} ) )
255             {
256 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
257             "'sampling_probability' must be specified for post hoc incomplete sampling to be applied"
258             );
259             }
260            
261             #Check that a model has been specified
262 5 50 66     26 unless ( defined $options{model}
263             || $options{algorithm} eq 'constant_rate_bd' )
264             {
265 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw(
266             'error' => "a model must be specified" );
267             }
268            
269             #Get a function pointer for the algorithm
270 5         15 my $algorithm = 'sample_' . $options{algorithm};
271 5         24 $algorithm = \&$algorithm;
272 5         7 my @output;
273            
274             #Run the algorithm, different method for multiple threads
275 5 50       16 if ( $options{threads} > 1 ) {
276 0         0 require threads;
277 0         0 require threads::shared;
278 0         0 @output = ( [], [] );
279             $SIG{'KILL'} = sub {
280 0     0   0 foreach (@threads) { $_->kill('KILL')->detach(); }
  0         0  
281 0         0 threads->exit();
282 0         0 };
283            
284             #Start the threads
285 0         0 for ( ( 1 .. $options{threads} ) ) {
286 0         0 @_ = ();
287            
288             #Note the list context of the return argument here determines
289             #the data type returned from the thread.
290             ( $threads[ $_ - 1 ] ) = threads->new( \&_sample_newick, %options,
291             sample_size => ceil( $options{sample_size} / $options{threads} )
292 0         0 );
293             }
294            
295             #Wait for them to finish and combine the data
296 0         0 for ( ( 1 .. $options{threads} ) ) {
297 0         0 until ( $threads[ $_ - 1 ]->is_joinable() ) { sleep(0.1); }
  0         0  
298 0         0 my @thread_data = $threads[ $_ - 1 ]->join;
299 0         0 for ( my $index = 0 ; $index < scalar @thread_data ; $index++ ) {
300 0 0       0 $output[$index] = [] if scalar @output < $index;
301             $output[$index] =
302 0         0 [ @{ $output[$index] }, @{ $thread_data[$index] } ];
  0         0  
  0         0  
303             }
304             }
305            
306             #Turn newick strings back into tree objects
307 0 0 0     0 unless ( defined $options{output_format}
308             && $options{output_format} eq 'newick' )
309             {
310            
311             #Convert to newick trees
312 0         0 for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
  0         0  
313 0         0 $output[0]->[$index] = Bio::Phylo::IO->parse(
314             -format => 'newick',
315             -string => $output[0]->[$index]
316             )->first;
317             }
318             }
319             }
320             else {
321            
322             #Get the samples
323 5         32 @output = &$algorithm(%options);
324            
325             #Turn into newick trees if requested
326 5 50 66     110 if ( defined $options{output_format}
    100 66        
327             && $options{output_format} eq 'newick' )
328             {
329            
330             #Convert to newick trees
331 0         0 for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
  0         0  
332 0         0 $output[0]->[$index] =
333             $output[0]->[$index]->to_newick( '-nodelabels' => 1 );
334             }
335             }
336             elsif ( defined $options{output_format}
337             && $options{output_format} eq 'forest' )
338             {
339            
340             # save as a forest
341 4         22 my $forest = Bio::Phylo::Forest->new;
342 4         12 for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
  24         63  
343 20         56 $forest->insert( $output[0]->[$index] );
344             }
345 4         10 $output[0] = $forest;
346             }
347             }
348 5         64 return @output;
349             }
350            
351             =begin comment
352            
353             Type : Internal method
354             Title : _sample_newick
355             Usage : ($thread) = threads->new(\&_sample_newick, %options);
356             @thread_output = $thread->join;
357             Function: Wrapper for sampling routines used for multi-threading
358             Returns : Output from sampling algorithms with trees replaced by newick strings
359             Args : %options to pass to sampling algorithm
360            
361             =end comment
362            
363             =cut
364            
365             sub _sample_newick {
366 0     0   0 my %options = @_;
367 0         0 my $algorithm = 'sample_' . $options{algorithm};
368            
369             # Thread 'cancellation' signal handler
370 0     0   0 $SIG{'KILL'} = sub { threads->exit(); };
  0         0  
371 0         0 $algorithm = \&$algorithm;
372            
373             #Perform the sampling
374 0         0 my @output = ( &$algorithm(%options) );
375            
376             #Convert to newick trees
377 0         0 for ( my $index = 0 ; $index < scalar @{ $output[0] } ; $index++ ) {
  0         0  
378 0         0 $output[0]->[$index] =
379             $output[0]->[$index]->to_newick( '-nodelabels' => 1 );
380             }
381 0         0 return @output;
382             }
383            
384             =head2 Sampling algorithms
385            
386             These algorithms should be accessed through the sampling interface (sample()).
387             Additional parameters need to be passed to these algorithms as described for
388             each algorithm.
389            
390             =over
391            
392             =item sample_b()
393            
394             Sample from any birth model
395            
396             Type : Sampling algorithm
397             Title : sample_b
398             Usage : see sample
399             Function: Samples trees from a pure birth model
400             Returns : see sample
401             Args : %algorithm_options requires the field:
402             rate => sampling rate
403            
404             =cut
405            
406             sub sample_b {
407 1     1 1 3 my %options = @_;
408            
409             #The sample of trees
410 1         2 my @sample;
411            
412             #A list of the expected number of samples
413             my @expected_summary;
414 1         2 my $model = $options{model};
415 1         2 my $rate = $options{algorithm_options}->{rate};
416            
417             #While we have insufficient samples
418 1         3 while ( scalar @sample < $options{sample_size} ) {
419            
420             #Generate a candidate model run
421 16         82 my $candidate = &$model( %{ $options{model_options} },
422 16         30 tree_size => $options{tree_size} );
423            
424             #Check that the tree has no extinctions
425 16 50       73 unless ( $candidate->is_ultrametric(1e-6) ) {
426 0         0 Bio::Phylo::Util::Exceptions::BadFormat->throw(
427             'error' => "the model must be a pure birth process" );
428             }
429            
430             #Get the lineage through time data
431 16         54 my ( $time, $count ) = lineage_through_time($candidate);
432            
433             #The expected number of samples we want
434 16         42 my $expected_samples = $rate * ( $time->[-1] - $time->[-2] );
435 16         34 push( @expected_summary, $expected_samples );
436            
437             #Get the random number of samples from this candidate tree
438 16         45 while ( $expected_samples > 0 ) {
439            
440             #If the number of samples remaining is greater than one
441             #we take a sample, otherwise we take a sample with probability
442             #according to the remaining number of samples
443 16 100 66     91 if ( $expected_samples > 1 || rand(1) < $expected_samples ) {
444 5         20 my $tree = copy_tree($candidate);
445            
446             #Truncate the tree at a random point during which it had tree_size species
447 5         18 truncate_tree_time( $tree,
448             ( $time->[-1] - $time->[-2] ) * rand(1) + $time->[-2] );
449            
450             #Add the tree to our sample
451 5         15 push( @sample, $tree );
452            
453             #Update the sample counter (for GUI or other applications that want to know how
454             #many samples have been obtained)
455 5 50       16 if ( defined $options{counter} ) {
456 0         0 my $counter = $options{counter};
457 0         0 &$counter(1);
458             }
459             }
460 16         110 $expected_samples--;
461             }
462             }
463 1         6 return ( \@sample, \@expected_summary );
464             }
465            
466             =item sample_bd()
467            
468             Sample from any birth and death model for which nstar exists
469            
470             Type : Sampling algorithm
471             Title : sample_bd
472             Usage : see sample
473             Function: Samples trees from a birth and death model
474             Returns : see sample
475             Args : %algorithm_options requires the fields:
476             nstar => once a tree has nstar species there should be
477             a negligible chance of returning to tree_size species
478             rate => sampling rate
479            
480             =cut
481            
482             sub sample_bd {
483 1     1 1 5 my %options = @_;
484            
485             #The sample of trees
486 1         3 my @sample;
487            
488             #A list of the expected number of samples
489             my @expected_summary;
490            
491             #Convenience variables
492 1         2 my $model = $options{model};
493 1         1 my $nstar = $options{algorithm_options}->{nstar};
494 1         3 my $rate = $options{algorithm_options}->{rate};
495            
496             #While we have insufficient samples
497 1         4 while ( scalar @sample < $options{sample_size} ) {
498            
499             #Generate a candidate model run
500             my $candidate =
501 15         27 &$model( %{ $options{model_options} }, tree_size => $nstar );
  15         92  
502            
503             #Get the lineage through time data
504 15         67 my ( $time, $count ) = lineage_through_time($candidate);
505            
506             #Reorganise the lineage through time data
507             #@duration contains the length of the intervals with the right number of species
508             #@start contains the starting times of these intervals
509             #@prob contains the cumulative probability of each interval being selected
510             #$total_duration contains the sum of the interval lengths
511 15         36 my ( @duration, @start, @prob, $total_duration );
512 15         30 for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) {
  1203         1756  
513 1188 100       1764 if ( $count->[$index] == $options{tree_size} ) {
514 40         72 push( @duration, $time->[ $index + 1 ] - $time->[$index] );
515 40         59 push( @start, $time->[$index] );
516 40         54 $total_duration += $duration[-1];
517 40         63 push( @prob, $total_duration );
518             }
519             }
520 1     1   6 no warnings 'uninitialized'; # FIXME
  1         1  
  1         41  
521 15 100       76 next if $total_duration == 0;
522 1     1   5 use warnings;
  1         2  
  1         313  
523 8         23 for ( my $index = 0 ; $index < scalar @prob ; $index++ ) {
524 40         67 $prob[$index] /= $total_duration;
525             }
526            
527             #The expected number of samples we want
528 8         17 my $expected_samples = $rate * $total_duration;
529 8         23 push( @expected_summary, $expected_samples );
530            
531             #Get the random number of samples from this candidate tree
532 8         21 while ( $expected_samples > 0 ) {
533            
534             #If the number of samples remaining is greater than one
535             #we take a sample, otherwise we take a sample with probability
536             #according to the remaining number of samples
537 8 100 66     47 if ( $expected_samples > 1 || rand(1) < $expected_samples ) {
538 5         24 my $tree = copy_tree($candidate);
539            
540             #Get a random interval
541 5         19 my $interval_choice = rand(1);
542 5         12 my $interval;
543 5         33 for (
544             $interval = 0 ;
545             $interval_choice > $prob[$interval] ;
546             $interval++
547             )
548             {
549             }
550            
551             #Truncate the tree at a random point during this interval
552 5         32 truncate_tree_time( $tree,
553             $duration[$interval] * rand(1) + $start[$interval] );
554            
555             #Add the tree to our sample
556 5         15 push( @sample, $tree );
557            
558             #Update the sample counter (for GUI or other applications that want to know how
559             #many samples have been obtained)
560 5 50       22 if ( defined $options{counter} ) {
561 0         0 my $counter = $options{counter};
562 0         0 &$counter(1);
563             }
564             }
565 8         166 $expected_samples--;
566             }
567             }
568 1         7 return ( \@sample, \@expected_summary );
569             }
570            
571             =item sample_incomplete_sampling_bd()
572            
573             Sample from any birth and death model with incomplete taxon sampling
574            
575             Type : Sampling algorithm
576             Title : sample_incomplete_sampling_bd
577             Usage : see sample
578             Function: Samples trees from a birth and death model with incomplete taxon sampling
579             Returns : see sample
580             Args : %algorithm_options requires the fields:
581             rate => sampling rate
582             nstar => once a tree has nstar species there should be
583             a negligible chance of returning to mstar species
584             mstar => trees with more than mstar species form a negligible
585             contribution to the final sample.
586             sampling_probability => see below.
587            
588             sampling_probability
589            
590             vector: must have length (mstar-tree_size+1) The ith element gives the probability
591             of not sampling i species.
592             scalar: the probability of sampling any individual species. Is used to calculate
593             a vector as discussed in the paper.
594            
595             =cut
596            
597             sub sample_incomplete_sampling_bd {
598 1     1 1 6 my %options = @_;
599            
600             # %options = (%options, %{$options{algorithm_options}});
601             #The sample of trees
602 1         3 my @sample;
603            
604             #A list of the expected number of samples
605             my @expected_summary;
606            
607             # #Convenience variables
608 1         2 my $model = $options{model};
609 1         2 my $nstar = $options{algorithm_options}->{nstar};
610 1         2 my $mstar = $options{algorithm_options}->{mstar};
611 1         1 my $rate = $options{algorithm_options}->{rate};
612             my $sampling_probability =
613 1         3 $options{algorithm_options}->{sampling_probability};
614            
615             #If sampling_probability is a list check it's length
616 1 50 33     5 if ( ref $sampling_probability
617 1         6 && scalar @{$sampling_probability} !=
618             ( $mstar - $options{tree_size} + 1 ) )
619             {
620 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
621             "'sampling_probability' must be a scalar or a list with m_star-tree_size+1 items"
622             );
623             }
624            
625             #If a single sampling probability was given we calculate the
626             #probability of sub-sampling given numbers of species.
627 1 50       3 unless ( ref $sampling_probability ) {
628 0         0 my $p = $sampling_probability;
629 0         0 my @vec;
630 0         0 my $total = 0;
631 0         0 foreach ( ( $options{tree_size} .. $mstar ) ) {
632            
633             #The probability of sampling tree_size species from a tree containing $_ species
634             push( @vec,
635             nchoosek( $_, $options{tree_size} ) *
636             ( $p**$options{tree_size} ) *
637 0         0 ( ( 1 - $p )**( $_ - $options{tree_size} ) ) );
638 0         0 $total += $vec[-1];
639             }
640 0         0 for ( my $ii = 0 ; $ii < scalar @vec ; $ii++ ) { $vec[$ii] /= $total; }
  0         0  
641 0         0 $sampling_probability = \@vec;
642             }
643            
644             #We now normalise the sampling_probability list so that it sums to unity
645             #this allows comparable sampling rates to be used here and in sample_bd
646 1         3 my $total_sp = 0;
647 1         3 foreach ( @{$sampling_probability} ) { $total_sp += $_; }
  1         2  
  3         8  
648 1     1   5 no warnings; # FIXME
  1         5  
  1         48  
649 1         6 foreach ( my $index = 1 .. scalar @{$sampling_probability} ) {
  0         0  
  0         0  
650 1     1   5 no warnings; # FIXME
  1         1  
  1         27  
651 1         7 $sampling_probability->[ $index - 1 ] /= $total_sp;
652 1     1   5 use warnings;
  1         1  
  1         25  
653             }
654 1     1   4 use warnings;
  1         2  
  1         65  
655            
656             #While we have insufficient samples
657 1         4 while ( scalar @sample < $options{sample_size} ) {
658            
659             #Generate a candidate model run
660             my $candidate =
661 13         28 &$model( %{ $options{model_options} }, tree_size => $nstar );
  13         72  
662            
663             #Get the lineage through time data
664 13         65 my ( $time, $count ) = lineage_through_time($candidate);
665 13         35 my @size_stats;
666 13         41 for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) {
  1709         2423  
667 1     1   5 no warnings 'uninitialized'; # FIXME
  1         2  
  1         87  
668 1696 100       2424 if ( $count->[$index] >= $options{tree_size} ) {
669             $size_stats[ $count->[$index] - $options{tree_size} ] +=
670             $time->[$index] *
671             $sampling_probability->[ $count->[$index] -
672 1091         1693 $options{tree_size} ];
673             }
674             }
675            
676             #Reorganise the lineage through time data
677             #@duration contains the length of intervals with more than tree_size species
678             #@start contains the starting times of these intervals
679             #@prob contains the cumulative probability of each interval being selected
680             #$total_prob contains the sum of @prob before normalisation
681 13         30 my ( @duration, @start, @prob, $total_prob );
682 13         32 $total_prob = 0;
683 13         23 for ( my $index = 0 ; $index < scalar @{$time} - 1 ; $index++ ) {
  1709         2459  
684 1696 100       2441 if ( $count->[$index] >= $options{tree_size} ) {
685 1091         1457 push( @duration, $time->[ $index + 1 ] - $time->[$index] );
686 1091         1373 push( @start, $time->[$index] );
687 1     1   5 no warnings 'uninitialized'; # FIXME
  1         2  
  1         4098  
688             $total_prob +=
689             $duration[-1] *
690             $sampling_probability->[ $count->[$index] -
691 1091         1424 $options{tree_size} ];
692 1091         1506 push( @prob, $total_prob );
693             }
694             }
695 13 100       65 next if $total_prob == 0;
696 7         27 for ( my $index = 0 ; $index < scalar @prob ; $index++ ) {
697 1091         1551 $prob[$index] /= $total_prob;
698             }
699            
700             #The expected number of samples we want
701 7         21 my $expected_samples = $rate * $total_prob;
702 7         17 push( @expected_summary, $expected_samples );
703             $expected_samples = $options{sample_size} - scalar @sample
704 7 50       24 if $expected_samples > $options{sample_size} - scalar @sample;
705            
706             #Get the random number of samples from this candidate tree
707 7         22 while ( $expected_samples > 0 ) {
708            
709             #If the number of samples remaining is greater than one
710             #we take a sample, otherwise we take a sample with probability
711             #according to the remaining number of samples
712 8 100 100     55 if ( $expected_samples > 1 || rand(1) < $expected_samples ) {
713 5         19 my $tree = copy_tree($candidate);
714            
715             #Get a random interval
716 5         22 my $interval_choice = rand(1);
717 5         10 my $interval;
718 5         131 for (
719             $interval = 0 ;
720             $interval_choice > $prob[$interval] ;
721             $interval++
722             )
723             {
724             }
725            
726             #Truncate the tree at a random point during this interval
727 5         36 truncate_tree_time( $tree,
728             $duration[$interval] * rand(1) + $start[$interval] );
729            
730             #Remove random species so it has the right size
731 5         26 truncate_tree_size( $tree, $options{tree_size} );
732            
733             #Add the tree to our sample
734 5         22 push( @sample, $tree );
735            
736             #Update the sample counter (for GUI or other applications that want to know how
737             #many samples have been obtained)
738 5 50       26 if ( defined $options{counter} ) {
739 0         0 my $counter = $options{counter};
740 0         0 &$counter(1);
741             }
742             }
743 8         507 $expected_samples--;
744             }
745             }
746 1         8 return ( \@sample, \@expected_summary );
747             }
748            
749             =item sample_memoryless_b()
750            
751             Sample from a memoryless birth model
752            
753             Type : Sampling algorithm
754             Title : sample_memoryless_b
755             Usage : see sample
756             Function: Samples trees from a memoryless birth model
757             Returns : see sample
758             Args : %algorithm_options with fields:
759             pendant_dist => function reference for generating random
760             shortest pendant edges
761            
762             NB: The function pointed to by pendant_dist is given model_options
763             as it's input argument with an added field tree_size. It must return
764             a random value from the probability density for the shortest pendant
765             edges.
766            
767             =cut
768            
769             sub sample_memoryless_b {
770 1     1 1 7 my %options = @_;
771            
772             #The sample of trees
773 1         4 my @sample;
774            
775             #A list of the expected number of samples
776             my @expected_summary;
777            
778             #The user specified functions
779 1         3 my $model = $options{model};
780 1         3 my $pendant_dist = $options{algorithm_options}->{pendant_dist};
781            
782             #While we have insufficient samples
783 1         5 while ( scalar @sample < $options{sample_size} ) {
784            
785             #Generate a tree ending just after the last speciation event
786 5         38 my $tree = &$model( %{ $options{model_options} },
787 5         12 tree_size => $options{tree_size} );
788            
789             #Check that the tree has no extinctions
790 5 50       36 unless ( $tree->is_ultrametric(1e-6) ) {
791 0         0 Bio::Phylo::Util::Exceptions::BadFormat->throw(
792             'error' => "the model must be a pure birth process" );
793             }
794            
795             #Get the random length to add after the last speciation event
796 5         57 my $pendant_add = &$pendant_dist( %{ $options{model_options} },
797 5         12 tree_size => $options{tree_size} );
798            
799             #Add the final length
800 5         87 foreach ( @{ $tree->get_terminals } ) {
  5         33  
801 50         113 $_->set_branch_length( $_->get_branch_length + $pendant_add );
802             }
803            
804             #Add to the sample
805 5         16 push( @sample, $tree );
806 5         16 push( @expected_summary, 1 );
807            
808             #Update the sample counter (for GUI or other applications that want to know how
809             #many samples have been obtained)
810 5 50       44 if ( defined $options{counter} ) {
811 0         0 my $counter = $options{counter};
812 0         0 &$counter(1);
813             }
814             }
815 1         8 return ( \@sample, \@expected_summary );
816             }
817            
818             =item sample_constant_rate_bd()
819            
820             Sample from a constant rate birth and death model
821            
822             Type : Sampling algorithm
823             Title : sample_constant_rate_bd
824             Usage : see sample
825             Function: Samples trees from a memoryless birth model
826             Returns : see sample
827             Args : no specific algorithm options but see below
828            
829             NB: This algorithm only applies to constant rate birth and death
830             processes. Consequently a model does not need to be specified (and
831             will be ignored if it is). But birth_rate and death_rate model
832             options must be given.
833            
834             =cut
835            
836             sub sample_constant_rate_bd {
837 1     1 1 4 my %options = @_;
838            
839             #Store parameters in shorter variables (for clarity)
840             my ( $br, $dr, $n ) = (
841             $options{model_options}->{birth_rate},
842             $options{model_options}->{death_rate},
843             $options{tree_size}
844 1         5 );
845 1         2 my @sample;
846            
847             #Loop for sampling each tree
848 1         4 while ( scalar @sample < $options{sample_size} ) {
849 5         15 my @nodes;
850            
851             #Compute the random tree age from the inverse CDF (different formulas for
852             #birth rate == death rate and otherwise)
853             my $tree_age;
854            
855             #The uniform random variable
856 5         19 my $r = rand;
857 5 50       22 if ( $br == $dr ) {
858 0         0 $tree_age = 1 / ( $br * ( $r**( -1 / $n ) - 1 ) );
859             }
860             else {
861 5         53 $tree_age =
862             1 /
863             ( $br - $dr ) *
864             log(
865             ( 1 - $dr / $br * $r**( 1 / $n ) ) / ( 1 - $r**( 1 / $n ) ) );
866             }
867            
868             #Find the random speciation times
869 5         13 my @speciation;
870 5         19 foreach ( 0 .. ( $n - 2 ) ) {
871 45 50       70 if ( $br == $dr ) {
872 0         0 my $r = rand;
873 0         0 $speciation[$_] =
874             $r * $tree_age / ( 1 + $br * $tree_age * ( 1 - $r ) );
875             }
876             else {
877            
878             #Two repeated parts of the inverse CDF for clarity
879 45         82 my $a = $br - $dr * exp( ( $dr - $br ) * $tree_age );
880 45         62 my $b = ( 1 - exp( ( $dr - $br ) * $tree_age ) ) * rand;
881            
882             #The random speciation time from the inverse CDF
883 45         96 $speciation[$_] =
884             1 /
885             ( $br - $dr ) *
886             log( ( $a - $dr * $b ) / ( $a - $br * $b ) );
887             }
888             }
889            
890             #Create the initial terminals and a vector for their ages
891 5         11 my @terminals;
892             my @ages;
893 5         14 foreach ( 0 .. ( $n - 1 ) ) {
894            
895             #Add a new terminal
896 50         146 $terminals[$_] = Bio::Phylo::Forest::Node->new();
897 50         210 $terminals[$_]->set_name( 'ID' . $_ );
898 50         106 $ages[$_] = 0;
899             }
900 5         21 @nodes = @terminals;
901            
902             #Sort the speciation times
903 5         46 my @sorted_speciation = sort { $a <=> $b } @speciation;
  96         148  
904            
905             #Make a hash for easily finding the index of a given speciation event
906 5         11 my %speciation_hash;
907 5         19 foreach ( 0 .. ( $n - 2 ) ) {
908 45         183 $speciation_hash{ $speciation[$_] } = $_;
909             }
910            
911             #Construct the tree
912 5         18 foreach my $index ( 0 .. ( $n - 2 ) ) {
913            
914             #Create the parent node
915 45         135 my $parent = Bio::Phylo::Forest::Node->new();
916 45         189 $parent->set_name( 'ID' . ( $n + $index ) );
917 45         95 push( @nodes, $parent );
918            
919             #An index for this speciation event back into the unsorted vectors
920 45         219 my $spec_index = $speciation_hash{ $sorted_speciation[$index] };
921            
922             #Add the children to the parent node
923 45         168 $parent->set_child( $terminals[$spec_index] );
924 45         147 $terminals[$spec_index]->set_parent($parent);
925 45         133 $parent->set_child( $terminals[ $spec_index + 1 ] );
926 45         138 $terminals[ $spec_index + 1 ]->set_parent($parent);
927            
928             #Set the children's branch lengths
929 45         163 $terminals[$spec_index]->set_branch_length(
930             $sorted_speciation[$index] - $ages[$spec_index] );
931 45         158 $terminals[ $spec_index + 1 ]->set_branch_length(
932             $sorted_speciation[$index] - $ages[ $spec_index + 1 ] );
933            
934             #Replace the two terminals with the new one
935 45         95 splice( @terminals, $spec_index, 2, $parent );
936 45         98 splice( @ages, $spec_index, 2, $sorted_speciation[$index] );
937            
938             #Update the mapping for the sorted speciation times
939 45         187 foreach ( keys %speciation_hash ) {
940 405 100       692 $speciation_hash{$_}-- if $speciation_hash{$_} > $spec_index;
941             }
942             }
943            
944             #Add the nodes to a tree
945 5         40 my $tree = Bio::Phylo::Forest::Tree->new();
946 5         19 foreach ( reverse(@nodes) ) { $tree->insert($_); }
  95         204  
947 5         12 push( @sample, $tree );
948            
949             #Update the sample counter (for GUI or other applications that want to know how
950             #many samples have been obtained)
951 5 50       114 if ( defined $options{counter} ) {
952 0         0 my $counter = $options{counter};
953 0         0 &$counter(1);
954             }
955             }
956 1         7 return ( \@sample, [] );
957             }
958            
959             =back
960            
961             =head1 EVOLUTIONARY MODELS
962            
963             All evolutionary models take a options hash as their input argument
964             and return a Bio::Phylo::Forest::Tree. This tree may contain extinct
965             lineages (lineages that end prior to the end of the tree).
966            
967             The options hash contains any model specific parameters (see the
968             individual model descriptions) and one or both terminating conditions:
969             tree_size => the number of extant species at which to terminate the tree
970             tree_age => the age of the tree at which to terminate the process
971            
972             Note that if the model stops due to the tree_size condition then the
973             tree ends immediately after the speciation event that created the last
974             species.
975            
976             =over
977            
978             =item constant_rate_birth()
979            
980             A constant rate birth model (Yule/ERM)
981            
982             Type : Evolutionary model
983             Title : constant_rate_birth
984             Usage : $tree = constant_rate_birth(%options)
985             Function: Produces a tree from the model terminating at a given size/time
986             Returns : Bio::Phylo::Forest::Tree
987             Args : %options with fields:
988             birth_rate The birth rate parameter (default 1)
989             tree_size The size of the tree at which to terminate
990             tree_age The age of the tree at which to terminate
991            
992             NB: At least one of tree_size and tree_age must be specified
993            
994             =cut
995            
996             sub constant_rate_birth {
997 22     22 1 545 my %options = @_;
998 22         64 $options{death_rate} = 0;
999 22         81 return constant_rate_birth_death(%options);
1000             }
1001            
1002             =item external_model()
1003            
1004             A dummy model that takes as input a set of newick_trees and randomly samples
1005             these.
1006            
1007             Type : Evolutionary model
1008             Title : external_model
1009             Usage : $tree = $external_model(%options)
1010             Function: Returns a random tree that was given as input
1011             Returns : Bio::Phylo::Forest::Tree
1012             Args : %options with fields:
1013             trees An array of newick strings. One of these is returned at random.
1014            
1015             NB: The usual parameters tree_size and tree_age will be ignored. When sampling
1016             using this model the trees array must contain trees adhering to the requirements
1017             of the sampling algorithm. This is NOT checked automatically.
1018            
1019             =cut
1020            
1021             sub external_model {
1022 0     0 1 0 my %options = @_;
1023 0         0 my $choice = int( rand( scalar @{ $options{trees} } ) );
  0         0  
1024            
1025             #Pick a newick string and turn it in to a Bio::Phylo::Forest::Tree object
1026             my $tree = Bio::Phylo::IO->parse(
1027             -format => 'newick',
1028 0         0 -string => $options{trees}->[$choice]
1029             )->first;
1030 0         0 return $tree;
1031             }
1032            
1033             =item constant_rate_birth_death()
1034            
1035             A constant rate birth and death model
1036            
1037             Type : Evolutionary model
1038             Title : constant_rate_birth_death
1039             Usage : $tree = constant_rate_birth_death(%options)
1040             Function: Produces a tree from the model terminating at a given size/time
1041             Returns : Bio::Phylo::Forest::Tree
1042             Args : %options with fields:
1043             birth_rate The birth rate parameter (default 1)
1044             death_rate The death rate parameter (default no extinction)
1045             tree_size The size of the tree at which to terminate
1046             tree_age The age of the tree at which to terminate
1047            
1048             NB: At least one of tree_size and tree_age must be specified
1049            
1050             =cut
1051            
1052             sub constant_rate_birth_death {
1053 50     50 1 198 my %options = @_;
1054            
1055             #Check that we have a termination condition
1056 50 50 33     200 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1057            
1058             #Error here.
1059 0         0 return undef;
1060             }
1061            
1062             #Set the undefined condition to infinity
1063 50 50       134 $options{tree_size} = 1e6 unless defined $options{tree_size};
1064 50 50       167 $options{tree_age} = 1e6 unless defined $options{tree_age};
1065            
1066             #Set default rates
1067 50 50       124 $options{birth_rate} = 1 unless defined( $options{birth_rate} );
1068             delete $options{death_rate}
1069 50 100 66     316 if defined( $options{death_rate} ) && $options{death_rate} == 0;
1070            
1071             #Each node gets an ID number this tracks these
1072 50         90 my $node_id = 0;
1073            
1074             #Create a new tree with a root, start the list of terminal species
1075 50         218 my $tree = Bio::Phylo::Forest::Tree->new();
1076 50         218 my $root = Bio::Phylo::Forest::Node->new();
1077 50         214 $root->set_branch_length(0);
1078 50         268 $root->set_name( 'ID' . $node_id++ );
1079 50         203 $tree->insert($root);
1080 50         98 my @terminals = ($root);
1081 50         86 my ( $next_extinction, $next_speciation );
1082 50         103 my $time = 0;
1083 50         67 my $tree_size = 1;
1084            
1085             #Check whether we have a non-zero root edge
1086 50 50 33     198 if ( defined $options{root_edge} && $options{root_edge} ) {
1087            
1088             #Non-zero root. We set the time to the first speciation event
1089 0         0 $next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
1090             }
1091             else {
1092            
1093             #Zero root, we want a speciation event straight away
1094 50         90 $next_speciation = 0;
1095             }
1096            
1097             #Time of the first extinction event. If no extinction we always
1098             #set the extinction event after the current speciation event
1099 50 100       101 if ( defined $options{death_rate} ) {
1100 28         198 $next_extinction = -log(rand) / $options{death_rate} / $tree_size;
1101             }
1102             else {
1103 22         44 $next_extinction = $next_speciation + 1;
1104             }
1105            
1106             #While the tree has not become extinct and the termination criterion
1107             #has not been achieved we create new speciation and extinction events
1108 50   66     294 while ($tree_size > 0
      66        
1109             && $tree_size < $options{tree_size}
1110             && $time < $options{tree_age} )
1111             {
1112            
1113             #Add the time since the last event to all terminal species
1114 3096         5196 foreach (@terminals) {
1115             $_->set_branch_length(
1116             $_->get_branch_length + min(
1117             $next_extinction, $next_speciation,
1118 41725         73075 $options{tree_age} - $time
1119             )
1120             );
1121             }
1122            
1123             #Update the time
1124 3096         5571 $time += min( $next_extinction, $next_speciation );
1125            
1126             #If the tree exceeds the time limit we are done
1127 3096 50       5759 return $tree if ( $time > $options{tree_age} );
1128            
1129             #Get the species effected by this event and remove it from the terminals list
1130 3096         7320 my $effected =
1131             splice( @terminals, int( rand( scalar @terminals ) ), 1 );
1132            
1133             #If we have a speciation event we add two new species
1134 3096 100 66     8429 if ( $next_speciation < $next_extinction || !defined $next_extinction )
1135             {
1136 1843         3064 foreach ( 1, 2 ) {
1137            
1138             #Create a new species
1139 3686         10596 my $child = Bio::Phylo::Forest::Node->new();
1140 3686         13716 $child->set_name( 'ID' . $node_id++ );
1141            
1142             #Give it a zero edge length
1143 3686         10772 $child->set_branch_length(0);
1144            
1145             #Add it as a child to the speciating species
1146 3686         10169 $effected->set_child($child);
1147            
1148             #Add it to the tree
1149 3686         9644 $tree->insert($child);
1150            
1151             #Add it to the terminals list
1152 3686         7430 push( @terminals, $child );
1153             }
1154             }
1155            
1156             #We calculate the time that the next extinction and speciation
1157             #events will occur (only the earliest of these will actually
1158             #happen). NB: this approach is only appropriate for models where
1159             #speciation and extinction times are exponentially distributed.
1160             #Windows sometimes returns 0 values for rand...
1161 3096         5012 my ( $r1, $r2 ) = ( 0, 0 );
1162 3096         7559 $r1 = rand until $r1;
1163 3096         5840 $r2 = rand until $r2;
1164 3096         4005 $tree_size = scalar @terminals;
1165 3096 100       5011 return $tree unless $tree_size;
1166 3082         8445 $next_speciation = -log($r1) / $options{birth_rate} / $tree_size;
1167 3082 100       5399 if ( defined $options{death_rate} ) {
1168 2884         13568 $next_extinction = -log($r2) / $options{death_rate} / $tree_size;
1169             }
1170             else {
1171 198         996 $next_extinction = $next_speciation + 1;
1172             }
1173             }
1174 36         196 return $tree;
1175             }
1176            
1177            
1178             =item diversity_dependent_speciation()
1179            
1180             A birth and death model with speciation rate dependent on diversity as per
1181             Etienne et. al. 2012
1182            
1183             Type : Evolutionary model
1184             Title : diversity_dependent_speciation
1185             Usage : $tree = diversity_dependent_speciation(%options)
1186             Function: Produces a tree from the model terminating at a given size/time
1187             Returns : Bio::Phylo::Forest::Tree
1188             Args : %options with fields:
1189             maximal_birth_rate The maximal birth rate parameter (default 1)
1190             death_rate The death rate parameter (default no extinction)
1191             K_dash The modified carrying capacity (no default)
1192             tree_size The size of the tree at which to terminate
1193             tree_age The age of the tree at which to terminate
1194            
1195             NB: At least one of tree_size and tree_age must be specified
1196            
1197             Reference:
1198             Rampal S. Etienne, Bart Haegeman, Tanja Stadler, Tracy Aze, Paul N. Pearson,
1199             Andy Purvis and Albert B. Phillimore. "Diversity-dependence brings molecular
1200             phylogenies closer to agreement with the fossil record"
1201             doi: 10.1098/rspb.2011.1439
1202            
1203             =cut
1204            
1205             sub diversity_dependent_speciation {
1206 0     0 1 0 my %options = @_;
1207            
1208             #Check that we have a termination condition
1209 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1210             #Error here.
1211 0         0 return undef;
1212             }
1213            
1214             #Check that we have a carrying capacity
1215 0 0       0 unless ( defined $options{K_dash} ) {
1216             #Error here.
1217 0         0 return undef;
1218             }
1219            
1220             #Set the undefined condition to infinity
1221 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1222 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1223            
1224             #Set default rates
1225 0 0       0 $options{maximal_birth_rate} = 1 unless defined( $options{maximal_birth_rate} );
1226             delete $options{death_rate}
1227 0 0 0     0 if defined( $options{death_rate} ) && $options{death_rate} == 0;
1228            
1229             #Each node gets an ID number this tracks these
1230 0         0 my $node_id = 0;
1231            
1232             #Create a new tree with a root, start the list of terminal species
1233 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1234 0         0 my $root = Bio::Phylo::Forest::Node->new();
1235 0         0 $root->set_branch_length(0);
1236 0         0 $root->set_name( 'ID' . $node_id++ );
1237 0         0 $tree->insert($root);
1238 0         0 my @terminals = ($root);
1239 0         0 my ( $next_extinction, $next_speciation );
1240 0         0 my $time = 0;
1241 0         0 my $tree_size = 1;
1242            
1243            
1244 0         0 $options{birth_rate} = max(0,$options{max_birth_rate}*(1-1/$options{K_dash}));
1245            
1246            
1247             #Check whether we have a non-zero root edge
1248 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1249            
1250             #Non-zero root. We set the time to the first speciation event
1251 0         0 $next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
1252             }
1253             else {
1254            
1255             #Zero root, we want a speciation event straight away
1256 0         0 $next_speciation = 0;
1257             }
1258            
1259             #Time of the first extinction event. If no extinction we always
1260             #set the extinction event after the current speciation event
1261 0 0       0 if ( defined $options{death_rate} ) {
1262 0         0 $next_extinction = -log(rand) / $options{death_rate} / $tree_size;
1263             }
1264             else {
1265 0         0 $next_extinction = $next_speciation + 1;
1266             }
1267            
1268             #While the tree has not become extinct and the termination criterion
1269             #has not been achieved we create new speciation and extinction events
1270 0   0     0 while ($tree_size > 0
      0        
1271             && $tree_size < $options{tree_size}
1272             && $time < $options{tree_age} )
1273             {
1274            
1275             #Add the time since the last event to all terminal species
1276 0         0 foreach (@terminals) {
1277             $_->set_branch_length(
1278             $_->get_branch_length + min(
1279             $next_extinction, $next_speciation,
1280 0         0 $options{tree_age} - $time
1281             )
1282             );
1283             }
1284            
1285             #Update the time
1286 0         0 $time += min( $next_extinction, $next_speciation );
1287            
1288             #If the tree exceeds the time limit we are done
1289 0 0       0 return $tree if ( $time > $options{tree_age} );
1290            
1291             #Get the species effected by this event and remove it from the terminals list
1292 0         0 my $effected =
1293             splice( @terminals, int( rand( scalar @terminals ) ), 1 );
1294            
1295             #If we have a speciation event we add two new species
1296 0 0 0     0 if ( $next_speciation < $next_extinction || !defined $next_extinction )
1297             {
1298 0         0 foreach ( 1, 2 ) {
1299            
1300             #Create a new species
1301 0         0 my $child = Bio::Phylo::Forest::Node->new();
1302 0         0 $child->set_name( 'ID' . $node_id++ );
1303            
1304             #Give it a zero edge length
1305 0         0 $child->set_branch_length(0);
1306            
1307             #Add it as a child to the speciating species
1308 0         0 $effected->set_child($child);
1309            
1310             #Add it to the tree
1311 0         0 $tree->insert($child);
1312            
1313             #Add it to the terminals list
1314 0         0 push( @terminals, $child );
1315             }
1316             }
1317            
1318             #We calculate the time that the next extinction and speciation
1319             #events will occur (only the earliest of these will actually
1320             #happen). NB: this approach is only appropriate for models where
1321             #speciation and extinction times are exponentially distributed.
1322             #Windows sometimes returns 0 values for rand...
1323 0         0 my ( $r1, $r2 ) = ( 0, 0 );
1324 0         0 $r1 = rand until $r1;
1325 0         0 $r2 = rand until $r2;
1326 0         0 $tree_size = scalar @terminals;
1327 0 0       0 return $tree unless $tree_size;
1328            
1329 0         0 $options{birth_rate} = max(0,$options{max_birth_rate}*(1-$tree_size/$options{K_dash}));
1330 0 0       0 if ($options{birth_rate}==0)
1331             {
1332 0         0 $next_speciation = $options{tree_age};
1333             } else
1334             {
1335 0         0 $next_speciation = -log($r1) / $options{birth_rate} / $tree_size;
1336             }
1337 0 0       0 if ( defined $options{death_rate} ) {
1338 0         0 $next_extinction = -log($r2) / $options{death_rate} / $tree_size;
1339             }
1340             else {
1341 0         0 $next_extinction = $next_speciation + 1;
1342             }
1343             }
1344 0         0 return $tree;
1345             }
1346            
1347            
1348             =item constant_rate_birth_death()
1349            
1350             A temporal shift birth and death model
1351            
1352             Type : Evolutionary model
1353             Title : temporal_shift_birth_death
1354             Usage : $tree = constant_rate_birth_death(%options)
1355             Function: Produces a tree from the model terminating at a given size/time
1356             Returns : Bio::Phylo::Forest::Tree
1357             Args : %options with fields:
1358             birth_rates The birth rates
1359             death_rates The death rates
1360             rate_times The times after which the rates apply (first element must be 0)
1361             tree_size The size of the tree at which to terminate
1362             tree_age The age of the tree at which to terminate
1363            
1364             NB: At least one of tree_size and tree_age must be specified
1365            
1366             =cut
1367            
1368             sub temporal_shift_birth_death {
1369 0     0 0 0 my %options = @_;
1370            
1371             #Check that we have a termination condition
1372 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1373            
1374             #Error here.
1375 0         0 return undef;
1376             }
1377            
1378             #Set the undefined condition to infinity
1379 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1380 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1381            
1382             #Each node gets an ID number this tracks these
1383 0         0 my $node_id = 0;
1384            
1385             #Create a new tree with a root, start the list of terminal species
1386 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1387 0         0 my $root = Bio::Phylo::Forest::Node->new();
1388 0         0 $root->set_branch_length(0);
1389 0         0 $root->set_name( 'ID' . $node_id++ );
1390 0         0 $tree->insert($root);
1391 0         0 my @terminals = ($root);
1392 0         0 my ( $next_extinction, $next_speciation );
1393 0         0 my $time = 0;
1394 0         0 my $tree_size = 1;
1395            
1396             #Load current rates
1397 0         0 my $birth_rate = $options{birth_rates}[0];
1398 0         0 my $death_rate = $options{death_rates}[0];
1399 0         0 my $current_rates = 0;
1400 0         0 my $next_rate_change = $options{rate_times}[$current_rates+1];
1401            
1402             #Add an additional time to the end of the rate change times to simplify checking
1403 0         0 push(@{$options{rate_times}},$options{tree_size}*2);
  0         0  
1404            
1405             #Check whether we have a non-zero root edge
1406 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1407            
1408             #Non-zero root. We set the time to the first speciation event
1409 0         0 $next_speciation = -log(rand) / $birth_rate / $tree_size;
1410             }
1411             else {
1412            
1413             #Zero root, we want a speciation event straight away
1414 0         0 $next_speciation = 0;
1415             }
1416            
1417             # print "RATES:".$time."|".$birth_rate."|".$death_rate."\n";
1418             #Time of the first extinction event. If no extinction we always
1419             #set the extinction event after the current speciation event
1420 0         0 $next_extinction = -log(rand) / $death_rate / $tree_size;
1421            
1422             #While the tree has not become extinct and the termination criterion
1423             #has not been achieved we create new speciation and extinction events
1424 0   0     0 while ($tree_size > 0
      0        
1425             && $tree_size < $options{tree_size}
1426             && $time < $options{tree_age} )
1427             {
1428            
1429             # print "TIMES:".$next_extinction."|".$next_speciation."|".$next_rate_change."|\n";
1430             # print "RATES:".$time."|".$birth_rate."|".$death_rate."\n";
1431             #Add the time since the last event to all terminal species
1432 0         0 foreach (@terminals) {
1433             $_->set_branch_length(
1434             $_->get_branch_length + min(
1435             $next_extinction, $next_speciation,
1436 0         0 $options{tree_age} - $time
1437             )
1438             );
1439             }
1440            
1441             #Update the time
1442 0         0 my $time_last = $time;
1443 0         0 $time += min( $next_extinction, $next_speciation, $next_rate_change-$time_last);
1444            
1445             #If the tree exceeds the time limit we are done
1446 0 0       0 return $tree if ( $time > $options{tree_age} );
1447            
1448            
1449 0 0       0 if ($next_rate_change-$time_last < min( $next_extinction, $next_speciation) )
1450             {
1451 0         0 $current_rates += 1;
1452 0         0 $birth_rate = $options{birth_rates}[$current_rates];
1453 0         0 $death_rate = $options{death_rates}[$current_rates];
1454 0         0 $next_rate_change = $options{rate_times}[$current_rates+1];
1455            
1456             } else
1457             {
1458            
1459            
1460             #Get the species effected by this event and remove it from the terminals list
1461 0         0 my $effected = splice( @terminals, int( rand( scalar @terminals ) ), 1 );
1462            
1463             #If we have a speciation event we add two new species
1464 0 0 0     0 if ( $next_speciation < $next_extinction || !defined $next_extinction )
1465             {
1466 0         0 foreach ( 1, 2 ) {
1467            
1468             #Create a new species
1469 0         0 my $child = Bio::Phylo::Forest::Node->new();
1470 0         0 $child->set_name( 'ID' . $node_id++ );
1471            
1472             #Give it a zero edge length
1473 0         0 $child->set_branch_length(0);
1474            
1475             #Add it as a child to the speciating species
1476 0         0 $effected->set_child($child);
1477            
1478             #Add it to the tree
1479 0         0 $tree->insert($child);
1480            
1481             #Add it to the terminals list
1482 0         0 push( @terminals, $child );
1483             }
1484             }
1485             }
1486            
1487             #We calculate the time that the next extinction and speciation
1488             #events will occur (only the earliest of these will actually
1489             #happen). NB: this approach is only appropriate for models where
1490             #speciation and extinction times are exponentially distributed.
1491             #Windows sometimes returns 0 values for rand...
1492 0         0 my ( $r1, $r2 ) = ( 0, 0 );
1493 0         0 $r1 = rand until $r1;
1494 0         0 $r2 = rand until $r2;
1495 0         0 $tree_size = scalar @terminals;
1496 0 0       0 return $tree unless $tree_size;
1497 0         0 $next_speciation = -log($r1) / $birth_rate / $tree_size;
1498 0         0 $next_extinction = -log($r2) / $death_rate / $tree_size;
1499            
1500 0 0       0 if ((scalar @terminals)%100==0)
1501             {
1502 0         0 print $time."|".@terminals."|\n";
1503             }
1504            
1505             }
1506 0         0 return $tree;
1507             }
1508            
1509             =item evolving_speciation_rate()
1510            
1511             An evolutionary model featuring evolving speciation rates. Each daughter
1512             species is assigned its parent's speciation rate multiplied by a normally
1513             distributed noise factor.
1514            
1515             Type : Evolutionary model
1516             Title : evolving_speciation_rate
1517             Usage : $tree = evolving_speciation_rate(%options)
1518             Function: Produces a tree from the model terminating at a given size/time
1519             Returns : Bio::Phylo::Forest::Tree
1520             Args : %options with fields:
1521             birth_rate The initial speciation rate (default 1)
1522             evolving_std The standard deviation of the normal distribution
1523             from which the rate multiplier is drawn.
1524             tree_size The size of the tree at which to terminate
1525             tree_age The age of the tree at which to terminate
1526            
1527             NB: At least one of tree_size and tree_age must be specified
1528            
1529             =cut
1530            
1531             sub evolving_speciation_rate {
1532 0     0 1 0 my %options = @_;
1533            
1534             #Check that we have a termination condition
1535 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1536            
1537             #Error here.
1538 0         0 return undef;
1539             }
1540            
1541             #Set the undefined condition to infinity
1542 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1543 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1544            
1545             #Set default rates
1546 0 0       0 $options{birth_rate} = 1 unless defined( $options{birth_rate} );
1547 0 0       0 $options{evolving_std} = 1 unless defined( $options{evolving_std} );
1548            
1549             #Each node gets an ID number this tracks these
1550 0         0 my $node_id = 0;
1551            
1552             #Create a new tree with a root, start the list of terminal species
1553 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1554 0         0 my $root = Bio::Phylo::Forest::Node->new();
1555 0         0 $root->set_branch_length(0);
1556 0         0 $root->set_name( 'ID' . $node_id++ );
1557 0         0 $tree->insert($root);
1558 0         0 my @terminals = ($root);
1559 0         0 my @birth_rates = ( $options{birth_rate} );
1560 0         0 my $net_rate = $options{birth_rate};
1561 0         0 my $next_speciation;
1562 0         0 my $time = 0;
1563 0         0 my $tree_size = 1;
1564            
1565             #Check whether we have a non-zero root edge
1566 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1567            
1568             #Non-zero root. We set the time to the first speciation event
1569 0         0 $next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
1570             }
1571             else {
1572            
1573             #Zero root, we want a speciation event straight away
1574 0         0 $next_speciation = 0;
1575             }
1576            
1577             #While we haven't reached termination
1578 0   0     0 while ( $tree_size < $options{tree_size} && $time < $options{tree_age} ) {
1579            
1580             #Add the time since the last event to all terminal species
1581 0         0 foreach (@terminals) {
1582             $_->set_branch_length( $_->get_branch_length +
1583 0         0 min( $next_speciation, $options{tree_age} - $time ) );
1584             }
1585            
1586             #Update the time
1587 0         0 $time += $next_speciation;
1588            
1589             #If the tree exceeds the time limit we are done
1590 0 0       0 return $tree if ( $time > $options{tree_age} );
1591            
1592             #Get the species effected by this event
1593 0         0 my $rand_select = rand($net_rate);
1594 0         0 my $selected = 0;
1595 0   0     0 for (
1596             ;
1597             $selected < scalar @terminals
1598             && $rand_select > $birth_rates[$selected] ;
1599             $selected++
1600             )
1601             {
1602 0         0 $rand_select -= $birth_rates[$selected];
1603             }
1604            
1605             #Remove it from the terminals list
1606 0         0 my $effected = splice( @terminals, $selected, 1 );
1607 0         0 my $effected_rate = splice( @birth_rates, $selected, 1 );
1608            
1609             #Update the net speciation rate
1610 0         0 $net_rate -= $effected_rate;
1611            
1612             #If we have a speciation event we add two new species
1613 0         0 foreach ( 1, 2 ) {
1614            
1615             #Create a new species
1616 0         0 my $child = Bio::Phylo::Forest::Node->new();
1617 0         0 $child->set_name( 'ID' . $node_id++ );
1618            
1619             #Give it a zero edge length
1620 0         0 $child->set_branch_length(0);
1621            
1622             #Add it as a child to the speciating species
1623 0         0 $effected->set_child($child);
1624            
1625             #Add it to the tree
1626 0         0 $tree->insert($child);
1627            
1628             #Add it to the terminals list
1629 0         0 push( @terminals, $child );
1630            
1631             #New speciation rate
1632             my $new_speciation_rate =
1633 0         0 $effected_rate * ( 1 + qnorm(rand) * $options{evolving_std} );
1634 0 0       0 if ( $new_speciation_rate < 0 ) { $new_speciation_rate = 0; }
  0         0  
1635 0         0 push( @birth_rates, $new_speciation_rate );
1636 0         0 $net_rate += $new_speciation_rate;
1637             }
1638            
1639             # $net_rate = 0;
1640             # foreach (@birth_rates) { $net_rate += $_; }
1641             #Windows sometimes returns 0 values for rand...
1642 0         0 my ( $r1, $r2 ) = ( 0, 0 );
1643 0         0 $r1 = rand until $r1;
1644 0         0 $tree_size = scalar @terminals;
1645            
1646             #If all species have stopped speciating (unlikely)
1647 0 0       0 if ( $net_rate == 0 ) {
1648 0         0 return $tree;
1649             }
1650 0         0 $next_speciation = -log($r1) / $net_rate / $tree_size;
1651 0 0       0 return $tree unless $tree_size;
1652             }
1653 0         0 return $tree;
1654             }
1655            
1656            
1657             =item clade_shifts()
1658            
1659             A constant rate birth-death model with punctuated changes in the speciation
1660             and extinction rates. At each change one lineage receives new pre-specified
1661             speciation and extinction rates.
1662            
1663             Type : Evolutionary model
1664             Title : clade_shifts
1665             Usage : $tree = clade_shifts(%options)
1666             Function: Produces a tree from the model terminating at a given size/time
1667             Returns : Bio::Phylo::Forest::Tree
1668             Args : %options with fields:
1669             birth_rates The speciation rates
1670             death_rates The death rates
1671             rate_times The times at which the rates are introduced to a new
1672             clade. The first time should be zero. The remaining must be in
1673             ascending order.
1674             tree_size The size of the tree at which to terminate
1675             tree_age The age of the tree at which to terminate
1676            
1677             NB: At least one of tree_size and tree_age must be specified
1678            
1679             =cut
1680            
1681             sub clade_shifts {
1682 0     0 1 0 my %options = @_;
1683            
1684             #Check that we have a termination condition
1685 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1686            
1687             #Error here.
1688 0         0 return undef;
1689             }
1690            
1691             #Set the undefined condition to infinity
1692 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1693 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1694            
1695             #Each node gets an ID number this tracks these
1696 0         0 my $node_id = 0;
1697            
1698             #Create a new tree with a root, start the list of terminal species
1699 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1700 0         0 my $root = Bio::Phylo::Forest::Node->new();
1701 0         0 $root->set_branch_length(0);
1702 0         0 $root->set_name( 'ID' . $node_id++ );
1703 0         0 $tree->insert($root);
1704            
1705             #rates
1706 0         0 my @birth_rates_in = @{$options{birth_rates}};
  0         0  
1707 0         0 my @death_rates_in = @{$options{death_rates}};
  0         0  
1708 0         0 my @rate_times_in = @{$options{rate_times}};
  0         0  
1709            
1710 0 0       0 if ($rate_times_in[0] != 0)
1711             {
1712 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
1713             "The first rate time must be 0" );
1714             }
1715 0 0       0 if (scalar @birth_rates_in != scalar @death_rates_in)
1716             {
1717 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
1718             "birth and death rates must have the same length" );
1719             }
1720 0 0       0 if (scalar @birth_rates_in != scalar @rate_times_in)
1721             {
1722 0         0 Bio::Phylo::Util::Exceptions::BadArgs->throw( 'error' =>
1723             "birth/death rates must have the same length as rate times" );
1724             }
1725 0         0 my @birth_rates = ($birth_rates_in[0]);
1726 0         0 my @death_rates = ($death_rates_in[0]);
1727            
1728 0         0 my $net_birth_rate = $birth_rates[0];
1729 0         0 my $net_death_rate = $death_rates[0];
1730            
1731 0         0 shift(@birth_rates_in);
1732 0         0 shift(@death_rates_in);
1733            
1734 0         0 my @terminals = ($root);
1735 0         0 my ( $next_extinction, $next_speciation, $next_rate_change );
1736 0         0 my $time = 0;
1737 0         0 my $tree_size = 1;
1738 0         0 my $inf = 9**9**9**9;
1739            
1740             #Check whether we have a non-zero root edge
1741 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1742            
1743             #Non-zero root. We set the time to the first speciation event
1744 0 0       0 if ($birth_rates[0] > 0)
1745             {
1746 0         0 $next_speciation = -log(rand) / $birth_rates[0] ;
1747             } else
1748             {
1749 0         0 $next_speciation = $inf;
1750             }
1751             }
1752             else {
1753             #Zero root, we want a speciation event straight away
1754 0         0 $next_speciation = 0.0;
1755             }
1756            
1757             #Time of the first extinction event. If no extinction we always
1758             #set the extinction event after the current speciation event
1759 0 0       0 if ($death_rates[0] > 0)
1760             {
1761 0         0 $next_extinction = -log(rand) / $death_rates[0];
1762             } else
1763             {
1764 0         0 $next_extinction = $inf;
1765             }
1766            
1767             #Time of next rate change
1768 0         0 shift(@rate_times_in); #pop the initial 0
1769 0         0 $next_rate_change = shift(@rate_times_in);
1770            
1771             #While the tree has not become extinct and the termination criterion
1772             #has not been achieved we create new speciation and extinction events
1773 0   0     0 while ($tree_size > 0
      0        
1774             && $tree_size < $options{tree_size}
1775             && $time < $options{tree_age} )
1776             {
1777             #print $time."|".$tree_size."\n";
1778             #Update rates if a clade shift is happening
1779             #TODO index rates or pop off one at a time.
1780            
1781            
1782             #Add the time since the last event to all terminal species
1783 0         0 foreach (@terminals) {
1784             $_->set_branch_length(
1785             $_->get_branch_length + min(
1786             $next_extinction,
1787             $next_speciation,
1788             $next_rate_change-$time,
1789 0         0 $options{tree_age} - $time
1790             )
1791             );
1792             }
1793            
1794             #Update the time
1795 0         0 my $time_last = $time;
1796 0         0 $time += min( $next_extinction, $next_speciation, $next_rate_change-$time_last );
1797            
1798             #If the tree exceeds the time limit we are done
1799 0 0       0 return $tree if ( $time > $options{tree_age} );
1800            
1801             #We have a rate change
1802 0 0       0 if ($next_rate_change-$time_last < min($next_extinction, $next_speciation))
1803             {
1804             #Find a random species to effect
1805 0         0 my $effected_species = int(rand($tree_size));
1806             #Subtract current rates
1807 0         0 $net_death_rate -= $death_rates[$effected_species];
1808 0         0 $net_birth_rate -= $birth_rates[$effected_species];
1809             #Get new rates
1810 0         0 $death_rates[$effected_species] = shift(@death_rates_in);
1811 0         0 $birth_rates[$effected_species] = shift(@birth_rates_in);
1812             #Add new rates
1813 0         0 $net_death_rate += $death_rates[$effected_species];
1814 0         0 $net_birth_rate += $birth_rates[$effected_species];
1815             #Get next rate change time
1816 0 0       0 if (scalar(@rate_times_in))
1817             {
1818 0         0 $next_rate_change = shift(@rate_times_in);
1819             } else
1820             {
1821 0         0 $next_rate_change = $inf;
1822             }
1823             }
1824             #Choosing a random species to speciate
1825             else
1826             {
1827 0         0 my $selected = 0;
1828 0 0       0 if ( $next_speciation < $next_extinction )
1829             {
1830 0         0 my $rand_select = rand($net_birth_rate);
1831 0   0     0 for (
1832             ;
1833             $selected < scalar @terminals
1834             && $rand_select > $birth_rates[$selected] ;
1835             $selected++
1836             )
1837             {
1838 0         0 $rand_select -= $birth_rates[$selected];
1839             }
1840             } else
1841             {
1842 0         0 my $rand_select = rand($net_death_rate);
1843 0   0     0 for (
1844             ;
1845             $selected < scalar @terminals
1846             && $rand_select > $death_rates[$selected] ;
1847             $selected++
1848             )
1849             {
1850 0         0 $rand_select -= $death_rates[$selected];
1851             }
1852             }
1853 0 0       0 if ($net_birth_rate == 0)
1854             {
1855 0         0 $selected = 0;
1856             }
1857            
1858             #Remove the species effected by this event and remove it from the terminals list
1859 0         0 my $effected = splice( @terminals, $selected, 1 );
1860 0         0 my $effected_birth_rate = splice( @birth_rates, $selected, 1 );
1861 0         0 my $effected_death_rate = splice( @death_rates, $selected, 1 );
1862            
1863 0         0 $net_birth_rate -= $effected_birth_rate;
1864 0         0 $net_death_rate -= $effected_death_rate;
1865            
1866             #If we have a speciation event we add two new species
1867 0 0 0     0 if ( $next_speciation < $next_extinction || !defined $next_extinction )
1868             {
1869 0         0 foreach ( 1, 2 ) {
1870            
1871             #Create a new species
1872 0         0 my $child = Bio::Phylo::Forest::Node->new();
1873 0         0 $child->set_name( 'ID' . $node_id++ );
1874            
1875             #Give it a zero edge length
1876 0         0 $child->set_branch_length(0);
1877            
1878             #Add it as a child to the speciating species
1879 0         0 $effected->set_child($child);
1880            
1881             #Add it to the tree
1882 0         0 $tree->insert($child);
1883            
1884             #Add it to the terminals list
1885 0         0 push( @terminals, $child );
1886            
1887 0         0 push( @birth_rates, $effected_birth_rate );
1888 0         0 push( @death_rates, $effected_death_rate );
1889            
1890 0         0 $net_death_rate += $effected_death_rate;
1891 0         0 $net_birth_rate += $effected_birth_rate;
1892            
1893             }
1894             }
1895             }
1896             #We calculate the time that the next extinction and speciation
1897             #events will occur (only the earliest of these will actually
1898             #happen). NB: this approach is only appropriate for models where
1899             #speciation and extinction times are exponentially distributed.
1900            
1901             #Windows sometimes returns 0 values for rand...
1902 0         0 my ( $r1, $r2 ) = ( 0, 0 );
1903 0         0 $r1 = rand until $r1;
1904 0         0 $r2 = rand until $r2;
1905            
1906             #The current tree size
1907 0         0 $tree_size = scalar @terminals;
1908            
1909 0 0       0 return $tree unless $tree_size;
1910            
1911 0 0       0 if ($net_birth_rate > 0)
1912             {
1913 0         0 $next_speciation = -log($r1) / $net_birth_rate;
1914             } else
1915             {
1916 0         0 $next_speciation = $inf;
1917             }
1918            
1919 0 0       0 if ($net_death_rate > 0)
1920             {
1921 0         0 $next_extinction = -log($r2) / $net_death_rate;
1922             } else
1923             {
1924 0         0 $next_extinction = $inf;
1925             }
1926            
1927             }
1928 0         0 return $tree;
1929             }
1930            
1931             =item beta_binomial()
1932            
1933             An evolutionary model featuring evolving speciation rates. From Blum2007
1934            
1935             Type : Evolutionary model
1936             Title : beta_binomial
1937             Usage : $tree = beta_binomial(%options)
1938             Function: Produces a tree from the model terminating at a given size/time
1939             Returns : Bio::Phylo::Forest::Tree
1940             Args : %options with fields:
1941             birth_rate The initial speciation rate (default 1)
1942             model_param The parameter as defined in Blum2007
1943             tree_size The size of the tree at which to terminate
1944             tree_age The age of the tree at which to terminate
1945            
1946             NB: At least one of tree_size and tree_age must be specified
1947            
1948             =cut
1949            
1950             sub beta_binomial {
1951 0     0 1 0 my %options = @_;
1952            
1953             #Check that we have a termination condition
1954 0 0 0     0 unless ( defined $options{tree_size} or defined $options{tree_age} ) {
1955            
1956             #Error here.
1957 0         0 return undef;
1958             }
1959            
1960             #Set the undefined condition to infinity
1961 0 0       0 $options{tree_size} = 1e6 unless defined $options{tree_size};
1962 0 0       0 $options{tree_age} = 1e6 unless defined $options{tree_age};
1963            
1964             #Set default rates
1965 0 0       0 $options{birth_rate} = 1 unless defined( $options{birth_rate} );
1966 0 0       0 $options{model_param} = 0 unless defined( $options{model_param} );
1967            
1968             #Each node gets an ID number this tracks these
1969 0         0 my $node_id = 0;
1970            
1971             #Create a new tree with a root, start the list of terminal species
1972 0         0 my $tree = Bio::Phylo::Forest::Tree->new();
1973 0         0 my $root = Bio::Phylo::Forest::Node->new();
1974 0         0 $root->set_branch_length(0);
1975 0         0 $root->set_name( 'ID' . $node_id++ );
1976 0         0 $tree->insert($root);
1977 0         0 my @terminals = ($root);
1978 0         0 my @birth_rates = ( $options{birth_rate} );
1979 0         0 my $net_rate = $options{birth_rate};
1980 0         0 my $next_speciation;
1981 0         0 my $time = 0;
1982 0         0 my $tree_size = 1;
1983            
1984             #Check whether we have a non-zero root edge
1985 0 0 0     0 if ( defined $options{root_edge} && $options{root_edge} ) {
1986            
1987             #Non-zero root. We set the time to the first speciation event
1988 0         0 $next_speciation = -log(rand) / $options{birth_rate} / $tree_size;
1989             }
1990             else {
1991            
1992             #Zero root, we want a speciation event straight away
1993 0         0 $next_speciation = 0;
1994             }
1995            
1996             #While we haven't reached termination
1997 0   0     0 while ( $tree_size < $options{tree_size} && $time < $options{tree_age} ) {
1998            
1999             #Add the time since the last event to all terminal species
2000 0         0 foreach (@terminals) {
2001             $_->set_branch_length( $_->get_branch_length +
2002 0         0 min( $next_speciation, $options{tree_age} - $time ) );
2003             }
2004            
2005             #Update the time
2006 0         0 $time += $next_speciation;
2007            
2008             #If the tree exceeds the time limit we are done
2009 0 0       0 return $tree if ( $time > $options{tree_age} );
2010            
2011             #Get the species effected by this event
2012 0         0 my $rand_select = rand($net_rate);
2013 0         0 my $selected = 0;
2014 0   0     0 for (
2015             ;
2016             $selected < scalar @terminals
2017             && $rand_select > $birth_rates[$selected] ;
2018             $selected++
2019             )
2020             {
2021 0         0 $rand_select -= $birth_rates[$selected];
2022             }
2023            
2024             #Remove it from the terminals list
2025 0         0 my $effected = splice( @terminals, $selected, 1 );
2026 0         0 my $effected_rate = splice( @birth_rates, $selected, 1 );
2027             my $p =
2028 0         0 qbeta( rand, $options{model_param} + 1, $options{model_param} + 1 );
2029            
2030             #If we have a speciation event we add two new species
2031 0         0 foreach ( 1, 2 ) {
2032            
2033             #Create a new species
2034 0         0 my $child = Bio::Phylo::Forest::Node->new();
2035 0         0 $child->set_name( 'ID' . $node_id++ );
2036            
2037             #Give it a zero edge length
2038 0         0 $child->set_branch_length(0);
2039            
2040             #Add it as a child to the speciating species
2041 0         0 $effected->set_child($child);
2042            
2043             #Add it to the tree
2044 0         0 $tree->insert($child);
2045            
2046             #Add it to the terminals list
2047 0         0 push( @terminals, $child );
2048            
2049             #New speciation rate
2050 0         0 my $new_speciation_rate = $effected_rate * $p;
2051 0         0 $p = 1 - $p;
2052 0 0       0 if ( $new_speciation_rate < 0 ) { $new_speciation_rate = 0; }
  0         0  
2053 0         0 push( @birth_rates, $new_speciation_rate );
2054             }
2055            
2056             #Windows sometimes returns 0 values for rand...
2057 0         0 my ( $r1, $r2 ) = ( 0, 0 );
2058 0         0 $r1 = rand until $r1;
2059 0         0 $tree_size = scalar @terminals;
2060            
2061             #If all species have stopped speciating (unlikely)
2062 0 0       0 if ( $net_rate == 0 ) {
2063 0         0 return $tree;
2064             }
2065 0         0 $next_speciation = -log($r1) / $net_rate / $tree_size;
2066 0 0       0 return $tree unless $tree_size;
2067             }
2068 0         0 return $tree;
2069             }
2070            
2071             =back
2072            
2073             =cut
2074            
2075             =begin comment
2076            
2077             ###########################################################
2078             #INTERNAL METHODS
2079             #These are methods that permit additional manipulations of
2080             #a Bio::Phylo::Tree to be easily made. As such some of
2081             #these could easily be moved into Bio::Phylo::Forest::Tree
2082             ###########################################################
2083            
2084             =end comment
2085            
2086             =cut
2087            
2088             =begin comment
2089            
2090             Type : Internal method
2091             Title : copy_tree
2092             Usage : $tree = copy_tree($tree)
2093             Function: Makes a new independent copy of a tree
2094             Returns : the phylogenetic $tree
2095             Args : the phylogenetic $tree
2096            
2097             =end comment
2098            
2099             =cut
2100            
2101             sub copy_tree {
2102 15     15 0 105 return Bio::Phylo::IO->parse(
2103             -format => 'newick',
2104             -string => shift->to_newick( '-nodelabels' => 1 )
2105             )->first;
2106             }
2107            
2108             =begin comment
2109            
2110             Type : Internal method
2111             Title : truncate_tree_time
2112             Usage : truncate_tree_time($tree,$age)
2113             Function: Truncates the tree at the specified age
2114             Returns : N/A
2115             Args : $tree: the phylogenetic tree (which will be modified)
2116             $age: the age at which to cut the $tree
2117            
2118             =end comment
2119            
2120             =cut
2121            
2122             sub truncate_tree_time {
2123            
2124             #$node and $time are used only by this function recursively
2125 987     987 0 1916 my ( $tree, $age, $node, $time ) = @_;
2126            
2127             #If node and time weren't specified we are starting from the root
2128 987 100       2067 $node = $tree->get_root unless defined $node;
2129 987 100       1666 $time = 0 unless defined $time;
2130            
2131             #If we are truncating this branch
2132 987 100       1783 if ( $time + $node->get_branch_length >= $age ) {
2133            
2134             #Collapse the node unless it is terminal
2135 249 100       610 $node->collapse unless $node->is_terminal();
2136            
2137             #Set the branch length appropriately
2138 249         872 $node->set_branch_length( $age - $time );
2139 249         795 return;
2140             }
2141            
2142             #If this node has no children we are done
2143 738 100       1729 return if $node->is_terminal();
2144            
2145             #Call the function recursively on the children
2146 435         603 foreach ( @{ $node->get_children } ) {
  435         798  
2147 972         2222 truncate_tree_time( $tree, $age, $_,
2148             $time + $node->get_branch_length() );
2149             }
2150             }
2151            
2152             =begin comment
2153            
2154             Type : Internal method
2155             Title : truncate_tree_size
2156             Usage : truncate_tree_size($tree,$size)
2157             Function: Truncates the tree to the specified number of species
2158             Returns : N/A
2159             Args : $tree: the phylogenetic tree (which will be modified)
2160             $size: random species are delete so that the tree has this many terminals
2161            
2162             =end comment
2163            
2164             =cut
2165            
2166             sub truncate_tree_size {
2167 5     5 0 17 my ( $tree, $size ) = @_;
2168 5         8 my @terminals = @{ $tree->get_terminals };
  5         23  
2169 5         30 my @names;
2170            
2171             #Calculate the tree height and node distances from the root
2172             #much more efficient to do this in one hit than repeatedly
2173             #calling the analogous functions on the tree
2174 5         23 _calc_node_properties($tree);
2175            
2176             #Only push species that are extant and store the number of those
2177             # my $tree_height = $tree->get_tallest_tip->calc_path_to_root;
2178 5         20 my $tree_height = $tree->get_root->get_generic('tree_height');
2179 5         17 foreach (@terminals) {
2180 782 100       1476 if (
2181             abs(
2182             ( $_->get_generic('root_distance') - $tree_height ) /
2183             $tree_height
2184             ) < 1e-6
2185             )
2186             {
2187 144         301 push( @names, $_->get_name );
2188             }
2189             }
2190 5 50       21 if ( @names < $size ) { print "Internal error\n"; }
  0         0  
2191 5         11 my %deletions;
2192 5         23 while ( scalar( keys %deletions ) < @names - $size ) {
2193 159         365 $deletions{ $names[ int( rand(@names) ) ] } = 1;
2194             }
2195 5         65 $tree = prune_tips( $tree, [ keys %deletions ] );
2196 5         158 return $tree;
2197             }
2198            
2199             sub _get_ultrametric_size {
2200 0     0   0 my ( $tree, $size ) = @_;
2201 0         0 my @terminals = @{ $tree->get_terminals };
  0         0  
2202 0         0 _calc_node_properties($tree);
2203 0         0 my @names;
2204 0         0 my $tree_height = $tree->get_root->get_generic('tree_height');
2205 0         0 foreach (@terminals) {
2206 0 0       0 if (
2207             abs(
2208             ( $_->get_generic('root_distance') - $tree_height ) /
2209             $tree_height
2210             ) < 1e-6
2211             )
2212             {
2213 0         0 push( @names, $_->get_name );
2214             }
2215             }
2216 0         0 return scalar @names;
2217             }
2218            
2219             =begin comment
2220            
2221             Type : Internal method
2222             Title : remove_extinct_species
2223             Usage : remove_extinct_species($tree)
2224             Function: Removes extinct species from the tree. An extinct species
2225             is a terminal that does not extend as far as the furthest
2226             terminal(s).
2227             Returns : N/A
2228             Args : $tree: the phylogenetic tree (which will be modified)
2229             $age: the age at which to cut the $tree
2230            
2231             =end comment
2232            
2233             =cut
2234            
2235             sub remove_extinct_species {
2236 0     0 0 0 my $tree = shift;
2237            
2238             #Calculate the tree height and node distances from the root
2239             #much more efficient to do this in one hit than repeatedly
2240             #calling the analogous functions on the tree
2241 0         0 _calc_node_properties($tree);
2242 0         0 my $height = $tree->get_root->get_generic('tree_height');
2243 0 0       0 return unless $height > 0;
2244 0         0 my $leaves = $tree->get_terminals;
2245 0 0       0 return unless $leaves;
2246 0         0 my @remove;
2247 0         0 foreach ( @{$leaves} ) {
  0         0  
2248            
2249 0 0       0 unless (
2250             abs( ( $_->get_generic('root_distance') - $height ) / $height ) <
2251             1e-6 )
2252             {
2253 0         0 push( @remove, $_->get_name );
2254             }
2255             }
2256 0         0 $tree = prune_tips( $tree, \@remove );
2257 0         0 return $tree;
2258             }
2259            
2260             =begin comment
2261            
2262             Type : Internal method
2263             Title : prune_tips
2264             Usage : prune_tips($tree,$tips)
2265             Function: Removes named terminals from the tree
2266             Returns : N/A
2267             Args : $tree: the phylogenetic tree (which will be modified)
2268             $tips: array ref of terminals to remove from the tree
2269            
2270             NB: Available as $tree->prune_tips($tips), but had some problems with
2271             this.
2272            
2273             =end comment
2274            
2275             =cut
2276            
2277             sub prune_tips {
2278 5     5 0 14 my ( $self, $tips ) = @_;
2279 5         9 my %names_to_delete = map { $_ => 1 } @{$tips};
  94         167  
  5         14  
2280 688         1121 my %keep = map { $_->get_name => 1 }
2281 782         1386 grep { not exists $names_to_delete{ $_->get_name } }
2282 5         15 @{ $self->get_terminals };
  5         22  
2283             $self->visit_depth_first(
2284             -post => sub {
2285 1511     1511   2285 my $node = shift;
2286 1511 100       2910 if ( $node->is_terminal ) {
2287 782 100       1814 if ( not $keep{ $node->get_name } ) {
2288 94         284 $node->set_parent();
2289 94         279 $self->delete($node);
2290             }
2291             }
2292             else {
2293 729         1120 my $seen_tip_to_keep = 0;
2294 729         1094 for my $tip ( @{ $node->get_terminals } ) {
  729         1614  
2295 16505 100       28557 $seen_tip_to_keep++ if $keep{ $tip->get_name };
2296             }
2297 729 100       4615 if ( not $seen_tip_to_keep ) {
2298 25         67 $node->set_parent();
2299 25         60 $self->delete($node);
2300             }
2301             }
2302             }
2303 5         132 );
2304 5         83 $self->remove_unbranched_internals;
2305 5         273 return $self;
2306             }
2307            
2308             =begin comment
2309            
2310             Type : Internal method
2311             Title : lineage_through_time
2312             Usage : ($time,$count) = lineage_through_time($tree)
2313             Function: Alternative to $tree->ltt that permits extinctions
2314             Returns : $time: array ref of times
2315             $count: array ref of species counts corresponding to the times
2316             Args : the phylogenetic $tree for which to produce the ltt data
2317            
2318             =end comment
2319            
2320             =cut
2321            
2322             sub lineage_through_time {
2323 44     44 0 91 my $tree = shift;
2324 44         139 my ( $speciation, $extinction ) = _recursive_ltt_helper($tree);
2325 44         76 my @speciation = sort { $a <=> $b } @{$speciation};
  7876         8586  
  44         296  
2326 44         74 my @extinction = sort { $a <=> $b } @{$extinction};
  8232         8848  
  44         138  
2327 44         104 my @time = (0);
2328 44         83 my @count = (1);
2329 44         67 my $n_species = 1;
2330 44         180 my $end_time = max( @speciation, @extinction );
2331 44 50       137 return ( [], [] ) if ( $end_time == 0 );
2332            
2333             #We remove any extinction events occurring at the very end of the tree (as they are not real extinctions)
2334 44   100     267 while ( scalar @extinction
2335             && ( $end_time - $extinction[-1] ) / $end_time < 1e-6 )
2336             {
2337 594         1314 pop @extinction;
2338             }
2339 44   100     159 while ( scalar @speciation || scalar @extinction ) {
2340 3028 100 100     7896 if ( scalar @extinction == 0
      100        
2341             || ( scalar @speciation && $speciation[0] < $extinction[0] ) )
2342             {
2343 1789         2306 push( @count, ++$n_species );
2344 1789         3273 push( @time, shift(@speciation) );
2345             }
2346             else {
2347 1239         1582 push( @count, --$n_species );
2348 1239         2289 push( @time, shift(@extinction) );
2349             }
2350             }
2351 44         243 return ( \@time, \@count );
2352             }
2353            
2354             =begin comment
2355            
2356             Type : Internal method
2357             Title : _recursive_ltt_helper
2358             Usage : ($speciation, $extinction) = _recursive_ltt_helper($tree)
2359             Function: Helper for lineage_through_time
2360             Returns : $speciation: array ref of speciation times
2361             $extinction: array ref of extinction times
2362             Args : the phylogenetic $tree for which to produce the ltt data
2363            
2364             =end comment
2365            
2366             =cut
2367            
2368             sub _recursive_ltt_helper {
2369 3622     3622   6006 my ( $tree, $node, $time ) = @_;
2370            
2371             #If we are being invoked at the root level
2372 3622 100       5755 $node = $tree->get_root unless defined $node;
2373 3622 100       5167 $time = 0 unless defined $time;
2374            
2375             #The new time
2376 3622         7092 $time += $node->get_branch_length;
2377 3622 100       7328 return ( [], [$time] ) if ( $node->is_terminal );
2378 1789         2762 my @speciation;
2379             my @extinction;
2380 1789         2208 foreach ( @{ $node->get_children } ) {
  1789         3098  
2381 3578         6838 my ( $spec, $ext ) = _recursive_ltt_helper( $tree, $_, $time );
2382 3578         4840 @speciation = ( @speciation, @{$spec} );
  3578         7338  
2383 3578         4416 @extinction = ( @extinction, @{$ext} );
  3578         11691  
2384             }
2385 1789         3004 push( @speciation, $time );
2386 1789         3316 return ( \@speciation, \@extinction );
2387             }
2388            
2389             =begin comment
2390            
2391             Type : Internal method.
2392             Title : _calc_node_properties
2393             Usage : _calc_node_properties($tree);
2394             Function: Calculates the distance of nodes from the root
2395             Returns : The maximum distance from the root
2396             Args :
2397            
2398             =end comment
2399            
2400             =cut
2401            
2402             sub _calc_node_properties {
2403 1511     1511   2582 my ( $node, $root_distance );
2404 1511         2068 my $tree = shift;
2405 1511         3995 my $root = $tree->get_root;
2406            
2407             #Check whether we were given a node and distance
2408 1511 100       3905 if ( scalar @_ ) {
2409 1506         2318 $node = shift;
2410 1506         2083 $root_distance = shift;
2411            
2412             #Otherwise the root is the default
2413             }
2414             else {
2415 5         13 $node = $root;
2416 5         41 $root->set_generic( tree_height => 0 );
2417 5         9 $root_distance = 0;
2418             }
2419 1511         5087 $node->set_generic( root_distance => $root_distance );
2420 1511 100       4164 if ( $root_distance > $root->get_generic('tree_height') ) {
2421 166         431 $root->set_generic( tree_height => $root_distance );
2422             }
2423 1511         2498 my $terminal_count = 0;
2424 1511         3639 my $children = $node->get_children;
2425 1511 50       3728 if ( defined $children ) {
2426 1511         1942 foreach ( @{$children} ) {
  1511         4825  
2427 1506         3813 _calc_node_properties( $tree, $_,
2428             $root_distance + $_->get_branch_length() );
2429             }
2430             }
2431             }
2432            
2433             =begin comment
2434            
2435             Type : Internal method
2436             Title : nchoosek
2437             Usage : $out = nchoosek($n,$k)
2438             Function: Returns the binomial coefficient for $n and $k
2439             Returns : the binomial coefficient
2440             Args : $n, $k
2441            
2442             =end comment
2443            
2444             =cut
2445            
2446             sub nchoosek {
2447 0     0 0   my ( $n, $k ) = @_;
2448 0           my $r = 1;
2449 0 0 0       return 0 if ( $k > $n || $k < 0 );
2450 0           for ( my $d = 1 ; $d <= $k ; $d++ ) {
2451 0           $r *= $n--;
2452 0           $r /= $d;
2453             }
2454 0           return $r;
2455             }
2456             1;