File Coverage

Bio/LiveSeq/Translation.pm
Criterion Covered Total %
statement 75 136 55.1
branch 15 38 39.4
condition 1 3 33.3
subroutine 13 23 56.5
pod 16 18 88.8
total 120 218 55.0


line stmt bran cond sub pod time code
1             #
2             # bioperl module for Bio::LiveSeq::Translation
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Joseph Insana
7             #
8             # Copyright Joseph Insana
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::LiveSeq::Translation - Translation class for LiveSeq
17              
18             =head1 SYNOPSIS
19              
20             #documentation needed
21              
22             =head1 DESCRIPTION
23              
24             This stores information about aminoacids translations of transcripts.
25             The implementation is that a Translation object is the translation of
26             a Transcript object, with different possibilities of manipulation,
27             different coordinate system and eventually its own ranges (protein domains).
28              
29             =head1 AUTHOR - Joseph A.L. Insana
30              
31             Email: Insana@ebi.ac.uk, jinsana@gmx.net
32              
33             =head1 APPENDIX
34              
35             The rest of the documentation details each of the object
36             methods. Internal methods are usually preceded with a _
37              
38             =cut
39              
40             # Let the code begin...
41              
42             package Bio::LiveSeq::Translation;
43              
44 2     2   10 use strict;
  2         2  
  2         75  
45             #use Carp qw(croak carp cluck);
46 2     2   9 use Bio::LiveSeq::SeqI; # uses SeqI, inherits from it
  2         3  
  2         45  
47 2     2   268 use Bio::PrimarySeq;
  2         4  
  2         47  
48 2     2   9 use base qw(Bio::LiveSeq::Transcript);
  2         2  
  2         1296  
49              
50              
51             =head2 new
52              
53             Title : new
54             Usage : $protein = Bio::LiveSeq::Translation->new(-transcript => $transcr);
55              
56             Function: generates a new Bio::LiveSeq::Translation
57             Returns : reference to a new object of class Translation
58             Errorcode -1
59             Args : reference to an object of class Transcript
60              
61             =cut
62              
63             sub new {
64 6     6 1 23 my ($thing, %args) = @_;
65 6   33     28 my $class = ref($thing) || $thing;
66 6         12 my ($obj,%translation);
67              
68 6         8 my $transcript=$args{-transcript};
69              
70 6         11 $obj = \%translation;
71 6         12 $obj = bless $obj, $class;
72              
73 6 50       17 unless ($transcript) {
74 0         0 $obj->throw("$class not initialised because no -transcript given");
75             }
76 6 50       19 unless (ref($transcript) eq "Bio::LiveSeq::Transcript") {
77 0         0 $obj->throw("$class not initialised because no object of class Transcript given");
78             }
79              
80             #my $startbase = $transcript->start;
81             #my $endbase = $transcript->end;
82 6         30 my $strand = $transcript->strand;
83 6         14 my $seq = $transcript->{'seq'};
84              
85 6         34 $obj->{'strand'}=$strand;
86 6         12 $obj->{'seq'}=$seq;
87 6         10 $obj->{'transcript'}=$transcript;
88 6         14 $obj->{'alphabet'}="protein";
89              
90 6         14 $transcript->{'translation'}=$obj;# set the Translation ref into its Transcript
91 6         16 return $obj;
92             }
93              
94             =head2 get_Transcript
95              
96             Title : valid
97             Usage : $transcript = $obj->get_Transcript()
98             Function: retrieves the reference to the object of class Transcript (if any)
99             attached to a LiveSeq object
100             Returns : object reference
101             Args : none
102              
103             =cut
104              
105             sub get_Transcript {
106 88     88 1 139 my $self=shift;
107 88         388 return ($self->{'transcript'});
108             }
109              
110             # These get redefined here, overriding the SeqI ones
111              
112             sub change {
113 0     0 1 0 my ($self)=@_;
114 0         0 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
115 0         0 return (-1);
116             }
117             sub positionchange {
118 0     0 1 0 my ($self)=@_;
119 0         0 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
120 0         0 return (-1);
121             }
122             sub labelchange {
123 0     0 1 0 my ($self)=@_;
124 0         0 $self->warn("Cannot change a Translation object!\nChanges have to be issued at the nucleotide level!");
125 0         0 return (-1);
126             }
127              
128             # this just returns the translation of the transcript, without checking for
129             # stop codons
130             sub transl_seq {
131 0     0 0 0 my $self=shift;
132 0         0 my $transcript=$self->get_Transcript;
133 0         0 my $translation=$transcript->translate(undef, undef, undef,
134             $self->translation_table)->seq;
135 0         0 return $translation;
136             }
137              
138             # version 1.74 -> now the "*" is printed
139             sub seq {
140 22     22 1 63 my $self=shift;
141 22         36 my $proteinseq;
142 22         82 my $transcript=$self->get_Transcript;
143 22         97 my $translation=$transcript->translate(undef, undef, undef,
144             $self->translation_table)->seq;
145 22         117 my $stop_pos=index($translation,"*");
146 22 50       91 if ($stop_pos == -1) { # no stop present, continue downstream
147 22         125 my $downstreamseq=$transcript->downstream_seq();
148             #carp "the downstream is: $downstreamseq"; # debug
149 22         111 my $cdnaseq=$transcript->seq();
150 22         298 my $extendedseq = Bio::PrimarySeq->new(-seq => "$cdnaseq$downstreamseq",
151             -alphabet => 'dna'
152             );
153              
154 22         128 $translation=$extendedseq->translate(undef, undef, undef,
155             $self->translation_table)->seq;
156             #carp "the new translation is: $translation"; # debug
157 22         51 $stop_pos=index($translation,"*");
158 22 50       83 if ($stop_pos == -1) { # still no stop present, return warning
159 0         0 $self->warn("Warning: no stop codon found in the retrieved sequence downstream of Transcript ",1);
160 0         0 undef $stop_pos;
161 0         0 $proteinseq=$translation;
162             } else {
163 22         97 $proteinseq=substr($translation,0,$stop_pos+1);
164             #carp "the new stopped translation is: $proteinseq, because the stop is at position $stop_pos"; # debug
165             }
166             } else {
167 0         0 $proteinseq=substr($translation,0,$stop_pos+1);
168             }
169 22         132 return $proteinseq;
170             }
171              
172             sub length {
173 0     0 1 0 my $self=shift;
174 0         0 my $seq=$self->seq;
175 0         0 my $length=length($seq);
176 0         0 return $length;
177             }
178              
179             sub all_labels {
180 0     0 1 0 my $self=shift;
181 0         0 return $self->get_Transcript->all_labels;
182             }
183              
184             # counts in triplet. Only a label matching the beginning of a triplet coding
185             # for an aminoacid is considered valid when setting coordinate_start
186             # (i.e. only in frame!)
187             sub valid {
188 0     0 1 0 my ($self,$label)=@_;
189 0         0 my $i;
190 0         0 my @labels=$self->get_Transcript->all_labels;
191 0         0 my $length=$#labels;
192 0         0 while ($i <= $length) {
193 0 0       0 if ($label == $labels[$i]) {
194 0         0 return (1); # found
195             }
196 0         0 $i=$i+3;
197             }
198 0         0 return (0); # not found
199             }
200              
201             # returns the label to the first nucleotide of the triplet coding for $position aminoacid
202             sub label {
203 0     0 1 0 my ($self,$position)=@_;
204 0         0 my $firstlabel=$self->coordinate_start; # this is in_frame checked
205 0 0       0 if ($position > 0) {
206 0         0 $position=$position*3-2;
207             } else { # if position = 0 this will be caught by Transcript, error thrown
208 0         0 $position=$position*3;
209             }
210 0         0 return $self->get_Transcript->label($position,$firstlabel);
211             # check for coord_start different
212             }
213              
214             # returns position (aminoacids numbering) of a particular label
215             # used to return 0 for not in frame labels
216             # now returns the position anyway (after version 1.66)
217             sub position {
218 10     10 1 32 my ($self,$label)=@_;
219 10         48 my $firstlabel=$self->coordinate_start; # this is in_frame checked
220 10         40 my $position=$self->get_Transcript->position($label,$firstlabel);
221 2     2   13 use integer;
  2         3  
  2         12  
222 10         32 my $modulus=$position % 3;
223 10 50       52 if ($position == 0) {
    50          
224 0         0 return (0);
225             } elsif ($position > 0) {
226 10 100       32 if ($modulus != 1) {
227 6 50       65 $self->warn("Attention! Label $label is not in frame ".
228             "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable
229 6 100       19 if ($modulus == 2) {
230 2         11 return ($position / 3 + 1);
231             } else { # i.e. modulus == 0
232 4         22 return ($position / 3);
233             }
234             }
235 4         22 return ($position / 3 + 1);
236             } else { # pos < 0
237 0 0       0 if ($modulus != 0) {
238 0 0       0 $self->warn("Attention! Label $label is not in frame ".
239             "(1st position of triplet) with protein",1) if $self->verbose > 0; # ignorable
240 0         0 return ($position / 3 - 1); # ok for both other positions
241             }
242 0         0 return ($position / 3);
243             }
244 0         0 $self->throw( "WEIRD: execution shouldn't have reached here");
245 0         0 return (0); # this should never happen, but just in case
246             }
247              
248             # note: it inherits subseq and labelsubseq from Transcript!
249              
250             sub start {
251 16     16 1 32 my $self=shift;
252 16         61 return ($self->{'transcript'}->start);
253             }
254              
255             sub end {
256 6     6 1 10 my $self=shift;
257 6         21 return ($self->{'transcript'}->end);
258             }
259              
260             =head2 aa_ranges
261              
262             Title : aa_ranges
263             Usage : @proteinfeatures = $translation->aa_ranges()
264             Function: to retrieve all the LiveSeq AARange objects attached to a
265             Translation, usually created out of a SwissProt database entry
266             crossreferenced from an EMBL CDS feature.
267             Returns : an array
268             Args : none
269              
270             =cut
271              
272             # returns an array of obj_ref of AARange objects attached to the Translation
273             sub aa_ranges {
274 0     0 1 0 my $self=shift;
275 0         0 return ($self->{'aa_ranges'});
276             }
277              
278             sub translation_table {
279 44     44 1 93 my $self=shift;
280 44         131 $self->get_Transcript->translation_table(@_);
281             }
282              
283             # returns all aminoacids "affected" i.e. all aminoacids coded by any codon
284             # "touched" by the range selected between the labels, even if only partially.
285              
286             # it's not optimized for performance but it's useful
287              
288             sub labelsubseq {
289 5     5 1 16 my ($self,$start,$length,$end)=@_;
290 5         10 my ($pos1,$pos2);
291 5         21 my $transcript=$self->get_Transcript;
292 5 50       16 if ($start) {
293 5 50       22 unless ($transcript->valid($start)) {
294 0         0 $self->warn("Start label not valid"); return (-1);
  0         0  
295             }
296 5         37 $pos1=$self->position($start);
297             }
298 5 50       18 if ($end) {
299 5 50       16 if ($end == $start) {
300 5         8 $length=1;
301             } else {
302 0 0       0 unless ($transcript->valid($end)) {
303 0         0 $self->warn("End label not valid"); return (-1);
  0         0  
304             }
305 0 0       0 unless ($transcript->follows($start,$end) == 1) {
306 0         0 $self->warn("End label does not follow Start label!"); return (-1);
  0         0  
307             }
308 0         0 $pos2=$self->position($end);
309 0         0 $length=$pos2-$pos1+1;
310             }
311             }
312 5         21 my $sequence=$self->seq;
313 5         31 return (substr($sequence,$pos1-1,$length));
314             }
315              
316             # return the offset in aminoacids from LiveSeq protein sequence and SwissProt
317             # sequence (usually as a result of an INIT_MET or a gap)
318             sub offset {
319 0     0 0   my $self=shift;
320 0           return ($self->{'offset'});
321             }
322              
323             1;