File Coverage

Bio/SeqEvolution/DNAPoint.pm
Criterion Covered Total %
statement 91 103 88.3
branch 23 36 63.8
condition 11 11 100.0
subroutine 14 15 93.3
pod 5 5 100.0
total 144 170 84.7


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SeqEvolution::DNAPoint
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Heikki Lehvaslaiho
7             #
8             # Copyright Heikki Lehvaslaiho
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::SeqEvolution::DNAPoint - evolve a sequence by point mutations
17              
18             =head1 SYNOPSIS
19              
20             # $seq is a Bio::PrimarySeqI to mutate
21             $evolve = Bio::SeqEvolution::Factory->new (-rate => 5,
22             -seq => $seq,
23             -identity => 50
24             );
25             $newseq = $evolve->next_seq;
26              
27              
28             =head1 DESCRIPTION
29              
30             Bio::SeqEvolution::DNAPoint implements the simplest evolution model:
31             nucleotides change by point mutations, only. Transition/transversion
32             rate of the change, rate(), can be set.
33              
34             The new sequences are named with the id of the reference sequence
35             added with a running number. Placing a new sequence into a factory to
36             be evolved resets that counter. It can also be called directly with
37             L.
38              
39             The default sequence type returned is Bio::PrimarySeq. This can be
40             changed to any Bio::PrimarySeqI compliant sequence class.
41              
42             Internally the probability of the change of one nucleotide is mapped
43             to scale from 0 to 100. The probability of the transition occupies
44             range from 0 to some value. The remaining range is divided equally
45             among the two transversion nucleotides. A random number is then
46             generated to pick up one change.
47              
48             Not that the default transition/transversion rate, 1:1, leads to
49             observed transition/transversion ratio of 1:2 simply because there is
50             only one transition nucleotide versus two transversion nucleotides.
51              
52             =head1 FEEDBACK
53              
54             =head2 Mailing Lists
55              
56             User feedback is an integral part of the evolution of this and other
57             Bioperl modules. Send your comments and suggestions preferably to
58             the Bioperl mailing list. Your participation is much appreciated.
59              
60             bioperl-l@bioperl.org - General discussion
61             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62              
63             =head2 Support
64              
65             Please direct usage questions or support issues to the mailing list:
66              
67             I
68              
69             rather than to the module maintainer directly. Many experienced and
70             reponsive experts will be able look at the problem and quickly
71             address it. Please include a thorough description of the problem
72             with code and data examples if at all possible.
73              
74             =head2 Reporting Bugs
75              
76             Report bugs to the Bioperl bug tracking system to help us keep track
77             of the bugs and their resolution. Bug reports can be submitted via the
78             web:
79              
80             https://github.com/bioperl/bioperl-live/issues
81              
82             =head1 AUTHOR
83              
84             Heikki Lehvaslaiho Eheikki at bioperl dot orgE
85              
86             =head1 CONTRIBUTORS
87              
88             Additional contributor's names and emails here
89              
90             =head1 APPENDIX
91              
92             The rest of the documentation details each of the object methods.
93             Internal methods are usually preceded with a _
94              
95             =cut
96              
97              
98             # Let the code begin...
99              
100              
101             package Bio::SeqEvolution::DNAPoint;
102 1     1   5 use strict;
  1         2  
  1         24  
103 1     1   4 use Bio::Root::Root;
  1         2  
  1         19  
104 1     1   3 use Bio::SeqEvolution::Factory;
  1         2  
  1         16  
105              
106 1     1   284 use Bio::Variation::DNAMutation;
  1         3  
  1         38  
107 1     1   273 use Bio::Variation::Allele;
  1         2  
  1         29  
108 1     1   446 use Bio::SimpleAlign;
  1         4  
  1         49  
109              
110 1     1   11 use base qw(Bio::SeqEvolution::Factory);
  1         2  
  1         855  
111              
112              
113             sub _initialize {
114 7     7   15 my($self, @args) = @_;
115              
116 7         19 $self->SUPER::_initialize(@args);
117 7         17 my %param = @args;
118 7         18 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
  25         45  
119              
120 7 100       28 exists $param{'-rate'} && $self->rate($param{'-rate'});
121              
122 7         20 $self->_init_mutation_engine;
123             }
124              
125              
126             sub _init_mutation_engine {
127 12     12   17 my $self = shift;
128              
129             # arrays of possible changes have transitions as first items
130 12         15 my %changes;
131 12         37 $self->{'_changes'}->{'a'} = ['t', 'c', 'g'];
132 12         29 $self->{'_changes'}->{'t'} = ['a', 'c', 'g'];
133 12         22 $self->{'_changes'}->{'c'} = ['g', 'a', 't'];
134 12         24 $self->{'_changes'}->{'g'} = ['c', 'a', 't'];
135              
136              
137             # given the desired rate, find out where cut off points need to be
138             # when random numbers are generated from 0 to 100
139             # we are ignoring identical mutations (e.g. A->A) to speed things up
140 12         21 my $bin_size = 100/($self->rate + 2);
141 12         24 $self->{'_transition'} = 100 - (2*$bin_size);
142 12         20 $self->{'_first_transversion'} = $self->{'_transition'} + $bin_size;
143              
144 12         19 $self->_init_alignment;
145             }
146              
147             sub _init_alignment {
148 17     17   20 my $self = shift;
149              
150             # put the initial sequence into the alignment object
151 17         52 $self->{'_align'} = Bio::SimpleAlign->new(-verbose => -1);
152 17 100       33 return unless $self->seq;
153 14         22 $self->{'_ori_locatableseq'} = Bio::LocatableSeq->new(-id => 'ori',
154             -seq=> $self->seq->seq);
155 14         29 $self->{'_mut_locatableseq'} = Bio::LocatableSeq->new(-id => 'mut',
156             -seq=> $self->seq->seq);
157 14         46 $self->{'_align'}->add_seq($self->{'_ori_locatableseq'});
158 14         30 $self->{'_align'}->add_seq($self->{'_mut_locatableseq'});
159             }
160              
161             =head2 seq
162              
163             Title : seq
164             Usage : $obj->seq($newval)
165             Function: Set the sequence object for the original sequence
166             Returns : The sequence object
167             Args : newvalue (optional)
168              
169             Setting this will reset mutation and generated mutation counters.
170              
171             =cut
172              
173             sub seq{
174 60     60 1 109 my $self = shift;
175              
176 60 100       94 if (@_) {
177 4         6 my $seq = shift;
178 4 50       32 $self->throw('Need a valid Bio::PrimarySeqI, not [', ref($seq), ']')
179             unless $seq->isa('Bio::PrimarySeqI');
180            
181 4 50       11 $self->throw('Only nucleotide sequences are supported')
182             if $seq->alphabet eq 'protein';
183 4 50       11 $self->throw('No ambiquos nucleotides allowed in the input sequence')
184             if $seq->seq =~ m/[^acgt]/;
185              
186 4         8 $self->{'_seq'} = $seq;
187              
188             # unify the look of sequence strings and cache the information
189 4         5 $self->{'_ori_string'} = lc $seq->seq; # lower case
190 4         9 $self->{'_ori_string'} =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
191 4         9 $self->{'_seq_length'} = $seq->length;
192              
193 4         23 $self->reset_sequence_counter;
194             }
195 60         129 return $self->{'_seq'};
196             }
197              
198             =head2 set_mutated_seq
199              
200             Title : seq_mutated_seq
201             Usage : $obj->set_mutated_seq($newval)
202             Function: In case of mutating a sequence with multiple evolvers, this
203             Returns : set_mutated_seq
204             Args : newvalue (optional)
205              
206             =cut
207              
208             sub set_mutated_seq {
209 0     0 1 0 my $self = shift;
210              
211 0 0       0 if (@_) {
212 0         0 my $seq = shift;
213 0 0       0 $self->throw('Need a valid Bio::PrimarySeqI, not [', ref($seq), ']')
214             unless $seq->isa('Bio::PrimarySeqI');
215 0 0       0 $self->throw('Only nucleotide sequences are supported')
216             if $seq->alphabet eq 'protein';
217 0 0       0 $self->throw('No ambiquos nucleotides allowed in the input sequence')
218             if $seq->seq =~ m/[^acgt]/;
219              
220 0         0 $self->{'_seq_mutated'} = $seq;
221              
222             # unify the look of sequence strings and cache the information
223 0         0 $self->{'_mut_string'} = lc $seq->seq; # lower case
224 0         0 $self->{'_mut_string'} =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
225              
226              
227 0         0 $self->reset_sequence_counter;
228             }
229             #set returned sequence to be the last mutated string
230 0         0 $self->{'_seq'}->seq($self->{'_mut_string'});
231 0         0 return $self->{'_seq'};
232             }
233              
234              
235             =head2 rate
236              
237             Title : rate
238             Usage : $obj->rate($newval)
239             Function: Set the transition/transversion rate.
240             Returns : value of rate
241             Args : newvalue (optional)
242              
243             Transition/transversion ratio is an observed attribute of an sequence
244             comparison. We are dealing here with the transition/transversion rate
245             that we set for our model of sequence evolution.
246              
247             Note that we are using standard nucleotide alphabet and that there can
248             there is only one transition versus two possible transversions. Rate 2
249             is needed to have an observed transition/transversion ratio of 1.
250              
251             =cut
252              
253             sub rate{
254 18     18 1 20 my $self = shift;
255 18 100       34 if (@_) {
256 5         11 $self->{'_rate'} = shift @_;
257 5         43 $self->_init_mutation_engine;
258             }
259 18   100     58 return $self->{'_rate'} || 1;
260             }
261              
262             =head2 next_seq
263              
264             Title : next_seq
265             Usage : $obj->next_seq
266             Function: Evolve the reference sequence to desired level
267             Returns : A new sequence object mutated from the reference sequence
268             Args : -
269              
270             =cut
271              
272             sub next_seq {
273 5     5 1 9 my $self = shift;
274 5         11 $self->{'_mut_string'} = $self->{'_ori_string'};
275 5         20 $self->reset_mutation_counter;
276              
277 5         13 $self->{'_mutations'} = [];
278              
279 5         6 while (1) {
280             # find the location in the string to change
281 35         104 my $loc = int (rand length($self->{'_mut_string'})) + 1;
282              
283 35         60 $self->mutate($loc); # for modularity
284              
285             # stop evolving if any of the limit has been reached
286 35 100 100     66 last if $self->identity && $self->get_alignment_identity <= $self->identity;
287 34 100 100     56 last if $self->pam && 100*$self->get_mutation_counter/$self->{'_seq_length'} >= $self->pam;
288 31 100 100     51 last if $self->mutation_count && $self->get_mutation_counter >= $self->mutation_count;
289             }
290 5         14 $self->_increase_sequence_counter;
291              
292 5         9 my $type = $self->seq_type;
293             return $type->new(-id => $self->seq->id. "-". $self->get_sequence_counter,
294             -description => $self->seq->description,
295 5         13 -seq => $self->{'_mut_string'}
296             )
297             }
298              
299             =head2 mutate
300              
301             Title : mutate
302             Usage : $obj->mutate
303             Function: mutate the sequence at the given location according to the model
304             Returns : true
305             Args : integer, start location of the mutation, required argument
306              
307             Called from next_seq().
308              
309             =cut
310              
311             sub mutate {
312 35     35 1 34 my $self = shift;
313 35         37 my $loc = shift;
314 35 50       48 $self->throw('the first argument is the location of the mutation') unless $loc;
315              
316             # nucleotide to change
317 35         52 my $oldnuc = substr $self->{'_mut_string'}, $loc-1, 1;
318 35         34 my $newnuc;
319              
320              
321             # find the nucleotide it is changed to
322 35         42 my $choose = rand(100); # scale is 0-100
323 35 100       62 if ($choose < $self->{'_transition'} ) {
    100          
324 26         43 $newnuc = $self->{'_changes'}->{$oldnuc}[0];
325             } elsif ($choose < $self->{'_first_transversion'} ) {
326 4         11 $newnuc = $self->{'_changes'}->{$oldnuc}[1];
327             } else {
328 5         19 $newnuc = $self->{'_changes'}->{$oldnuc}[2];
329             }
330              
331             # do the change
332 35         61 substr $self->{'_mut_string'}, $loc-1, 1 , $newnuc;
333 35         72 $self->_increase_mutation_counter;
334 35         75 $self->{'_mut_locatableseq'}->seq($self->{'_mut_string'});
335              
336 35 50       75 print STDERR "$loc$oldnuc>$newnuc\n" if $self->verbose > 0;
337              
338 35         41 push @{$self->{'_mutations'}}, "$loc$oldnuc>$newnuc";
  35         102  
339             }
340              
341              
342             1;