File Coverage

Bio/SeqEvolution/DNAPoint.pm
Criterion Covered Total %
statement 91 103 88.3
branch 23 36 63.8
condition 11 11 100.0
subroutine 14 15 93.3
pod 5 5 100.0
total 144 170 84.7


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SeqEvolution::DNAPoint
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Heikki Lehvaslaiho
7             #
8             # Copyright Heikki Lehvaslaiho
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::SeqEvolution::DNAPoint - evolve a sequence by point mutations
17              
18             =head1 SYNOPSIS
19              
20             # $seq is a Bio::PrimarySeqI to mutate
21             $evolve = Bio::SeqEvolution::Factory->new (-rate => 5,
22             -seq => $seq,
23             -identity => 50
24             );
25             $newseq = $evolve->next_seq;
26              
27              
28             =head1 DESCRIPTION
29              
30             Bio::SeqEvolution::DNAPoint implements the simplest evolution model:
31             nucleotides change by point mutations, only. Transition/transversion
32             rate of the change, rate(), can be set.
33              
34             The new sequences are named with the id of the reference sequence
35             added with a running number. Placing a new sequence into a factory to
36             be evolved resets that counter. It can also be called directly with
37             L.
38              
39             The default sequence type returned is Bio::PrimarySeq. This can be
40             changed to any Bio::PrimarySeqI compliant sequence class.
41              
42             Internally the probability of the change of one nucleotide is mapped
43             to scale from 0 to 100. The probability of the transition occupies
44             range from 0 to some value. The remaining range is divided equally
45             among the two transversion nucleotides. A random number is then
46             generated to pick up one change.
47              
48             Not that the default transition/transversion rate, 1:1, leads to
49             observed transition/transversion ratio of 1:2 simply because there is
50             only one transition nucleotide versus two transversion nucleotides.
51              
52             =head1 FEEDBACK
53              
54             =head2 Mailing Lists
55              
56             User feedback is an integral part of the evolution of this and other
57             Bioperl modules. Send your comments and suggestions preferably to
58             the Bioperl mailing list. Your participation is much appreciated.
59              
60             bioperl-l@bioperl.org - General discussion
61             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62              
63             =head2 Support
64              
65             Please direct usage questions or support issues to the mailing list:
66              
67             I
68              
69             rather than to the module maintainer directly. Many experienced and
70             reponsive experts will be able look at the problem and quickly
71             address it. Please include a thorough description of the problem
72             with code and data examples if at all possible.
73              
74             =head2 Reporting Bugs
75              
76             Report bugs to the Bioperl bug tracking system to help us keep track
77             of the bugs and their resolution. Bug reports can be submitted via the
78             web:
79              
80             https://github.com/bioperl/bioperl-live/issues
81              
82             =head1 AUTHOR
83              
84             Heikki Lehvaslaiho Eheikki at bioperl dot orgE
85              
86             =head1 CONTRIBUTORS
87              
88             Additional contributor's names and emails here
89              
90             =head1 APPENDIX
91              
92             The rest of the documentation details each of the object methods.
93             Internal methods are usually preceded with a _
94              
95             =cut
96              
97              
98             # Let the code begin...
99              
100              
101             package Bio::SeqEvolution::DNAPoint;
102 1     1   3 use strict;
  1         1  
  1         22  
103 1     1   3 use Bio::Root::Root;
  1         1  
  1         15  
104 1     1   2 use Bio::SeqEvolution::Factory;
  1         1  
  1         15  
105              
106 1     1   260 use Bio::Variation::DNAMutation;
  1         2  
  1         27  
107 1     1   276 use Bio::Variation::Allele;
  1         1  
  1         25  
108 1     1   497 use Bio::SimpleAlign;
  1         1  
  1         26  
109              
110 1     1   4 use base qw(Bio::SeqEvolution::Factory);
  1         1  
  1         737  
111              
112              
113             sub _initialize {
114 7     7   11 my($self, @args) = @_;
115              
116 7         15 $self->SUPER::_initialize(@args);
117 7         13 my %param = @args;
118 7         12 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
  25         28  
119              
120 7 100       17 exists $param{'-rate'} && $self->rate($param{'-rate'});
121              
122 7         12 $self->_init_mutation_engine;
123             }
124              
125              
126             sub _init_mutation_engine {
127 12     12   10 my $self = shift;
128              
129             # arrays of possible changes have transitions as first items
130 12         8 my %changes;
131 12         23 $self->{'_changes'}->{'a'} = ['t', 'c', 'g'];
132 12         16 $self->{'_changes'}->{'t'} = ['a', 'c', 'g'];
133 12         19 $self->{'_changes'}->{'c'} = ['g', 'a', 't'];
134 12         14 $self->{'_changes'}->{'g'} = ['c', 'a', 't'];
135              
136              
137             # given the desired rate, find out where cut off points need to be
138             # when random numbers are generated from 0 to 100
139             # we are ignoring identical mutations (e.g. A->A) to speed things up
140 12         13 my $bin_size = 100/($self->rate + 2);
141 12         18 $self->{'_transition'} = 100 - (2*$bin_size);
142 12         23 $self->{'_first_transversion'} = $self->{'_transition'} + $bin_size;
143              
144 12         21 $self->_init_alignment;
145             }
146              
147             sub _init_alignment {
148 17     17   14 my $self = shift;
149              
150             # put the initial sequence into the alignment object
151 17         49 $self->{'_align'} = Bio::SimpleAlign->new(-verbose => -1);
152 17 100       30 return unless $self->seq;
153 14         24 $self->{'_ori_locatableseq'} = Bio::LocatableSeq->new(-id => 'ori',
154             -seq=> $self->seq->seq);
155 14         27 $self->{'_mut_locatableseq'} = Bio::LocatableSeq->new(-id => 'mut',
156             -seq=> $self->seq->seq);
157 14         27 $self->{'_align'}->add_seq($self->{'_ori_locatableseq'});
158 14         30 $self->{'_align'}->add_seq($self->{'_mut_locatableseq'});
159             }
160              
161             =head2 seq
162              
163             Title : seq
164             Usage : $obj->seq($newval)
165             Function: Set the sequence object for the original sequence
166             Returns : The sequence object
167             Args : newvalue (optional)
168              
169             Setting this will reset mutation and generated mutation counters.
170              
171             =cut
172              
173             sub seq{
174 60     60 1 41 my $self = shift;
175              
176 60 100       82 if (@_) {
177 4         8 my $seq = shift;
178 4 50       25 $self->throw('Need a valid Bio::PrimarySeqI, not [', ref($seq), ']')
179             unless $seq->isa('Bio::PrimarySeqI');
180            
181 4 50       31 $self->throw('Only nucleotide sequences are supported')
182             if $seq->alphabet eq 'protein';
183 4 50       8 $self->throw('No ambiquos nucleotides allowed in the input sequence')
184             if $seq->seq =~ m/[^acgt]/;
185              
186 4         6 $self->{'_seq'} = $seq;
187              
188             # unify the look of sequence strings and cache the information
189 4         6 $self->{'_ori_string'} = lc $seq->seq; # lower case
190 4         7 $self->{'_ori_string'} =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
191 4         8 $self->{'_seq_length'} = $seq->length;
192              
193 4         12 $self->reset_sequence_counter;
194             }
195 60         105 return $self->{'_seq'};
196             }
197              
198             =head2 set_mutated_seq
199              
200             Title : seq_mutated_seq
201             Usage : $obj->set_mutated_seq($newval)
202             Function: In case of mutating a sequence with multiple evolvers, this
203             Returns : set_mutated_seq
204             Args : newvalue (optional)
205              
206             =cut
207              
208             sub set_mutated_seq {
209 0     0 1 0 my $self = shift;
210              
211 0 0       0 if (@_) {
212 0         0 my $seq = shift;
213 0 0       0 $self->throw('Need a valid Bio::PrimarySeqI, not [', ref($seq), ']')
214             unless $seq->isa('Bio::PrimarySeqI');
215 0 0       0 $self->throw('Only nucleotide sequences are supported')
216             if $seq->alphabet eq 'protein';
217 0 0       0 $self->throw('No ambiquos nucleotides allowed in the input sequence')
218             if $seq->seq =~ m/[^acgt]/;
219              
220 0         0 $self->{'_seq_mutated'} = $seq;
221              
222             # unify the look of sequence strings and cache the information
223 0         0 $self->{'_mut_string'} = lc $seq->seq; # lower case
224 0         0 $self->{'_mut_string'} =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
225              
226              
227 0         0 $self->reset_sequence_counter;
228             }
229             #set returned sequence to be the last mutated string
230 0         0 $self->{'_seq'}->seq($self->{'_mut_string'});
231 0         0 return $self->{'_seq'};
232             }
233              
234              
235             =head2 rate
236              
237             Title : rate
238             Usage : $obj->rate($newval)
239             Function: Set the transition/transversion rate.
240             Returns : value of rate
241             Args : newvalue (optional)
242              
243             Transition/transversion ratio is an observed attribute of an sequence
244             comparison. We are dealing here with the transition/transversion rate
245             that we set for our model of sequence evolution.
246              
247             Note that we are using standard nucleotide alphabet and that there can
248             there is only one transition versus two possible transversions. Rate 2
249             is needed to have an observed transition/transversion ratio of 1.
250              
251             =cut
252              
253             sub rate{
254 18     18 1 17 my $self = shift;
255 18 100       24 if (@_) {
256 5         4 $self->{'_rate'} = shift @_;
257 5         10 $self->_init_mutation_engine;
258             }
259 18   100     44 return $self->{'_rate'} || 1;
260             }
261              
262             =head2 next_seq
263              
264             Title : next_seq
265             Usage : $obj->next_seq
266             Function: Evolve the reference sequence to desired level
267             Returns : A new sequence object mutated from the reference sequence
268             Args : -
269              
270             =cut
271              
272             sub next_seq {
273 5     5 1 4 my $self = shift;
274 5         7 $self->{'_mut_string'} = $self->{'_ori_string'};
275 5         12 $self->reset_mutation_counter;
276              
277 5         8 $self->{'_mutations'} = [];
278              
279 5         5 while (1) {
280             # find the location in the string to change
281 30         69 my $loc = int (rand length($self->{'_mut_string'})) + 1;
282              
283 30         35 $self->mutate($loc); # for modularity
284              
285             # stop evolving if any of the limit has been reached
286 30 100 100     46 last if $self->identity && $self->get_alignment_identity <= $self->identity;
287 29 100 100     34 last if $self->pam && 100*$self->get_mutation_counter/$self->{'_seq_length'} >= $self->pam;
288 26 100 100     34 last if $self->mutation_count && $self->get_mutation_counter >= $self->mutation_count;
289             }
290 5         11 $self->_increase_sequence_counter;
291              
292 5         8 my $type = $self->seq_type;
293             return $type->new(-id => $self->seq->id. "-". $self->get_sequence_counter,
294             -description => $self->seq->description,
295 5         10 -seq => $self->{'_mut_string'}
296             )
297             }
298              
299             =head2 mutate
300              
301             Title : mutate
302             Usage : $obj->mutate
303             Function: mutate the sequence at the given location according to the model
304             Returns : true
305             Args : integer, start location of the mutation, required argument
306              
307             Called from next_seq().
308              
309             =cut
310              
311             sub mutate {
312 30     30 1 19 my $self = shift;
313 30         17 my $loc = shift;
314 30 50       38 $self->throw('the first argument is the location of the mutation') unless $loc;
315              
316             # nucleotide to change
317 30         40 my $oldnuc = substr $self->{'_mut_string'}, $loc-1, 1;
318 30         16 my $newnuc;
319              
320              
321             # find the nucleotide it is changed to
322 30         23 my $choose = rand(100); # scale is 0-100
323 30 100       44 if ($choose < $self->{'_transition'} ) {
    100          
324 19         21 $newnuc = $self->{'_changes'}->{$oldnuc}[0];
325             } elsif ($choose < $self->{'_first_transversion'} ) {
326 4         6 $newnuc = $self->{'_changes'}->{$oldnuc}[1];
327             } else {
328 7         9 $newnuc = $self->{'_changes'}->{$oldnuc}[2];
329             }
330              
331             # do the change
332 30         37 substr $self->{'_mut_string'}, $loc-1, 1 , $newnuc;
333 30         39 $self->_increase_mutation_counter;
334 30         43 $self->{'_mut_locatableseq'}->seq($self->{'_mut_string'});
335              
336 30 50       44 print STDERR "$loc$oldnuc>$newnuc\n" if $self->verbose > 0;
337              
338 30         30 push @{$self->{'_mutations'}}, "$loc$oldnuc>$newnuc";
  30         71  
339             }
340              
341              
342             1;