File Coverage

Bio/SeqEvolution/DNAPoint.pm

Criterion	Covered	Total	%
statement	91	103	88.3
branch	23	36	63.8
condition	11	11	100.0
subroutine	14	15	93.3
pod	5	5	100.0
total	144	170	84.7

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# BioPerl module for Bio::SeqEvolution::DNAPoint
3							#
4							# Please direct questions and support issues to
5							#
6							# Cared for by Heikki Lehvaslaiho
7							#
8							# Copyright Heikki Lehvaslaiho
9							#
10							# You may distribute this module under the same terms as perl itself
11
12							# POD documentation - main docs before the code
13
14							=head1 NAME
15
16							Bio::SeqEvolution::DNAPoint - evolve a sequence by point mutations
17
18							=head1 SYNOPSIS
19
20							# $seq is a Bio::PrimarySeqI to mutate
21							$evolve = Bio::SeqEvolution::Factory->new (-rate => 5,
22							-seq => $seq,
23							-identity => 50
24							);
25							$newseq = $evolve->next_seq;
26
27
28							=head1 DESCRIPTION
29
30							Bio::SeqEvolution::DNAPoint implements the simplest evolution model:
31							nucleotides change by point mutations, only. Transition/transversion
32							rate of the change, rate(), can be set.
33
34							The new sequences are named with the id of the reference sequence
35							added with a running number. Placing a new sequence into a factory to
36							be evolved resets that counter. It can also be called directly with
37							L.
38
39							The default sequence type returned is Bio::PrimarySeq. This can be
40							changed to any Bio::PrimarySeqI compliant sequence class.
41
42							Internally the probability of the change of one nucleotide is mapped
43							to scale from 0 to 100. The probability of the transition occupies
44							range from 0 to some value. The remaining range is divided equally
45							among the two transversion nucleotides. A random number is then
46							generated to pick up one change.
47
48							Not that the default transition/transversion rate, 1:1, leads to
49							observed transition/transversion ratio of 1:2 simply because there is
50							only one transition nucleotide versus two transversion nucleotides.
51
52							=head1 FEEDBACK
53
54							=head2 Mailing Lists
55
56							User feedback is an integral part of the evolution of this and other
57							Bioperl modules. Send your comments and suggestions preferably to
58							the Bioperl mailing list. Your participation is much appreciated.
59
60							bioperl-l@bioperl.org - General discussion
61							http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62
63							=head2 Support
64
65							Please direct usage questions or support issues to the mailing list:
66
67							I
68
69							rather than to the module maintainer directly. Many experienced and
70							reponsive experts will be able look at the problem and quickly
71							address it. Please include a thorough description of the problem
72							with code and data examples if at all possible.
73
74							=head2 Reporting Bugs
75
76							Report bugs to the Bioperl bug tracking system to help us keep track
77							of the bugs and their resolution. Bug reports can be submitted via the
78							web:
79
80							https://github.com/bioperl/bioperl-live/issues
81
82							=head1 AUTHOR
83
84							Heikki Lehvaslaiho Eheikki at bioperl dot orgE
85
86							=head1 CONTRIBUTORS
87
88							Additional contributor's names and emails here
89
90							=head1 APPENDIX
91
92							The rest of the documentation details each of the object methods.
93							Internal methods are usually preceded with a _
94
95							=cut
96
97
98							# Let the code begin...
99
100
101							package Bio::SeqEvolution::DNAPoint;
102	1			1		3	use strict;
	1					1
	1					22
103	1			1		3	use Bio::Root::Root;
	1					1
	1					15
104	1			1		2	use Bio::SeqEvolution::Factory;
	1					1
	1					15
105
106	1			1		260	use Bio::Variation::DNAMutation;
	1					2
	1					27
107	1			1		276	use Bio::Variation::Allele;
	1					1
	1					25
108	1			1		497	use Bio::SimpleAlign;
	1					1
	1					26
109
110	1			1		4	use base qw(Bio::SeqEvolution::Factory);
	1					1
	1					737
111
112
113							sub _initialize {
114	7			7		11	my($self, @args) = @_;
115
116	7					15	$self->SUPER::_initialize(@args);
117	7					13	my %param = @args;
118	7					12	@param{ map { lc $_ } keys %param } = values %param; # lowercase keys
	25					28
119
120	7	100				17	exists $param{'-rate'} && $self->rate($param{'-rate'});
121
122	7					12	$self->_init_mutation_engine;
123							}
124
125
126							sub _init_mutation_engine {
127	12			12		10	my $self = shift;
128
129							# arrays of possible changes have transitions as first items
130	12					8	my %changes;
131	12					23	$self->{'_changes'}->{'a'} = ['t', 'c', 'g'];
132	12					16	$self->{'_changes'}->{'t'} = ['a', 'c', 'g'];
133	12					19	$self->{'_changes'}->{'c'} = ['g', 'a', 't'];
134	12					14	$self->{'_changes'}->{'g'} = ['c', 'a', 't'];
135
136
137							# given the desired rate, find out where cut off points need to be
138							# when random numbers are generated from 0 to 100
139							# we are ignoring identical mutations (e.g. A->A) to speed things up
140	12					13	my $bin_size = 100/($self->rate + 2);
141	12					18	$self->{'_transition'} = 100 - (2*$bin_size);
142	12					23	$self->{'_first_transversion'} = $self->{'_transition'} + $bin_size;
143
144	12					21	$self->_init_alignment;
145							}
146
147							sub _init_alignment {
148	17			17		14	my $self = shift;
149
150							# put the initial sequence into the alignment object
151	17					49	$self->{'_align'} = Bio::SimpleAlign->new(-verbose => -1);
152	17	100				30	return unless $self->seq;
153	14					24	$self->{'_ori_locatableseq'} = Bio::LocatableSeq->new(-id => 'ori',
154							-seq=> $self->seq->seq);
155	14					27	$self->{'_mut_locatableseq'} = Bio::LocatableSeq->new(-id => 'mut',
156							-seq=> $self->seq->seq);
157	14					27	$self->{'_align'}->add_seq($self->{'_ori_locatableseq'});
158	14					30	$self->{'_align'}->add_seq($self->{'_mut_locatableseq'});
159							}
160
161							=head2 seq
162
163							Title : seq
164							Usage : $obj->seq($newval)
165							Function: Set the sequence object for the original sequence
166							Returns : The sequence object
167							Args : newvalue (optional)
168
169							Setting this will reset mutation and generated mutation counters.
170
171							=cut
172
173							sub seq{
174	60			60	1	41	my $self = shift;
175
176	60	100				82	if (@_) {
177	4					8	my $seq = shift;
178	4	50				25	$self->throw('Need a valid Bio::PrimarySeqI, not [', ref($seq), ']')
179							unless $seq->isa('Bio::PrimarySeqI');
180
181	4	50				31	$self->throw('Only nucleotide sequences are supported')
182							if $seq->alphabet eq 'protein';
183	4	50				8	$self->throw('No ambiquos nucleotides allowed in the input sequence')
184							if $seq->seq =~ m/[^acgt]/;
185
186	4					6	$self->{'_seq'} = $seq;
187
188							# unify the look of sequence strings and cache the information
189	4					6	$self->{'_ori_string'} = lc $seq->seq; # lower case
190	4					7	$self->{'_ori_string'} =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
191	4					8	$self->{'_seq_length'} = $seq->length;
192
193	4					12	$self->reset_sequence_counter;
194							}
195	60					105	return $self->{'_seq'};
196							}
197
198							=head2 set_mutated_seq
199
200							Title : seq_mutated_seq
201							Usage : $obj->set_mutated_seq($newval)
202							Function: In case of mutating a sequence with multiple evolvers, this
203							Returns : set_mutated_seq
204							Args : newvalue (optional)
205
206							=cut
207
208							sub set_mutated_seq {
209	0			0	1	0	my $self = shift;
210
211	0	0				0	if (@_) {
212	0					0	my $seq = shift;
213	0	0				0	$self->throw('Need a valid Bio::PrimarySeqI, not [', ref($seq), ']')
214							unless $seq->isa('Bio::PrimarySeqI');
215	0	0				0	$self->throw('Only nucleotide sequences are supported')
216							if $seq->alphabet eq 'protein';
217	0	0				0	$self->throw('No ambiquos nucleotides allowed in the input sequence')
218							if $seq->seq =~ m/[^acgt]/;
219
220	0					0	$self->{'_seq_mutated'} = $seq;
221
222							# unify the look of sequence strings and cache the information
223	0					0	$self->{'_mut_string'} = lc $seq->seq; # lower case
224	0					0	$self->{'_mut_string'} =~ s/u/t/; # simplyfy our life; modules should deal with the change anyway
225
226
227	0					0	$self->reset_sequence_counter;
228							}
229							#set returned sequence to be the last mutated string
230	0					0	$self->{'_seq'}->seq($self->{'_mut_string'});
231	0					0	return $self->{'_seq'};
232							}
233
234
235							=head2 rate
236
237							Title : rate
238							Usage : $obj->rate($newval)
239							Function: Set the transition/transversion rate.
240							Returns : value of rate
241							Args : newvalue (optional)
242
243							Transition/transversion ratio is an observed attribute of an sequence
244							comparison. We are dealing here with the transition/transversion rate
245							that we set for our model of sequence evolution.
246
247							Note that we are using standard nucleotide alphabet and that there can
248							there is only one transition versus two possible transversions. Rate 2
249							is needed to have an observed transition/transversion ratio of 1.
250
251							=cut
252
253							sub rate{
254	18			18	1	17	my $self = shift;
255	18	100				24	if (@_) {
256	5					4	$self->{'_rate'} = shift @_;
257	5					10	$self->_init_mutation_engine;
258							}
259	18		100			44	return $self->{'_rate'} \|\| 1;
260							}
261
262							=head2 next_seq
263
264							Title : next_seq
265							Usage : $obj->next_seq
266							Function: Evolve the reference sequence to desired level
267							Returns : A new sequence object mutated from the reference sequence
268							Args : -
269
270							=cut
271
272							sub next_seq {
273	5			5	1	4	my $self = shift;
274	5					7	$self->{'_mut_string'} = $self->{'_ori_string'};
275	5					12	$self->reset_mutation_counter;
276
277	5					8	$self->{'_mutations'} = [];
278
279	5					5	while (1) {
280							# find the location in the string to change
281	30					69	my $loc = int (rand length($self->{'_mut_string'})) + 1;
282
283	30					35	$self->mutate($loc); # for modularity
284
285							# stop evolving if any of the limit has been reached
286	30	100	100			46	last if $self->identity && $self->get_alignment_identity <= $self->identity;
287	29	100	100			34	last if $self->pam && 100*$self->get_mutation_counter/$self->{'_seq_length'} >= $self->pam;
288	26	100	100			34	last if $self->mutation_count && $self->get_mutation_counter >= $self->mutation_count;
289							}
290	5					11	$self->_increase_sequence_counter;
291
292	5					8	my $type = $self->seq_type;
293							return $type->new(-id => $self->seq->id. "-". $self->get_sequence_counter,
294							-description => $self->seq->description,
295	5					10	-seq => $self->{'_mut_string'}
296							)
297							}
298
299							=head2 mutate
300
301							Title : mutate
302							Usage : $obj->mutate
303							Function: mutate the sequence at the given location according to the model
304							Returns : true
305							Args : integer, start location of the mutation, required argument
306
307							Called from next_seq().
308
309							=cut
310
311							sub mutate {
312	30			30	1	19	my $self = shift;
313	30					17	my $loc = shift;
314	30	50				38	$self->throw('the first argument is the location of the mutation') unless $loc;
315
316							# nucleotide to change
317	30					40	my $oldnuc = substr $self->{'_mut_string'}, $loc-1, 1;
318	30					16	my $newnuc;
319
320
321							# find the nucleotide it is changed to
322	30					23	my $choose = rand(100); # scale is 0-100
323	30	100				44	if ($choose < $self->{'_transition'} ) {
		100
324	19					21	$newnuc = $self->{'_changes'}->{$oldnuc}[0];
325							} elsif ($choose < $self->{'_first_transversion'} ) {
326	4					6	$newnuc = $self->{'_changes'}->{$oldnuc}[1];
327							} else {
328	7					9	$newnuc = $self->{'_changes'}->{$oldnuc}[2];
329							}
330
331							# do the change
332	30					37	substr $self->{'_mut_string'}, $loc-1, 1 , $newnuc;
333	30					39	$self->_increase_mutation_counter;
334	30					43	$self->{'_mut_locatableseq'}->seq($self->{'_mut_string'});
335
336	30	50				44	print STDERR "$loc$oldnuc>$newnuc\n" if $self->verbose > 0;
337
338	30					30	push @{$self->{'_mutations'}}, "$loc$oldnuc>$newnuc";
	30					71
339							}
340
341
342							1;