File Coverage

Bio/LiveSeq/AARange.pm
Criterion Covered Total %
statement 6 141 4.2
branch 0 42 0.0
condition 0 24 0.0
subroutine 2 21 9.5
pod 19 19 100.0
total 27 247 10.9


line stmt bran cond sub pod time code
1             #
2             # bioperl module for Bio::LiveSeq::AARange
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Joseph Insana
7             #
8             # Copyright Joseph Insana
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::LiveSeq::AARange - AARange abstract class for LiveSeq
17              
18             =head1 SYNOPSIS
19              
20             #documentation needed
21              
22             =head1 DESCRIPTION
23              
24             This is used as possible parent for aminoacid range object classes.
25             Or it can be used straight away to define aminoacid ranges. The idea
26             is that the ranges defined are attached to a Translation object and
27             they refer to its coordinate-system when they are first created (via
28             the new() method). When they are created they are anyway linked to
29             the underlying DNA LiveSeq by way of the LiveSeq labels. This allows
30             to preserve the ranges even if the numbering changes in the
31             Translation due to deletions or insertions.
32              
33             The protein sequence associated with the AARange can be accessed via
34             the usual seq() or subseq() methods.
35              
36             The start and end of the AARange in protein coordinate system can be
37             fetched with aa_start() and aa_end() methods. Note: the behaviour of
38             these methods would be influenced by the coordinate_start set in the
39             corresponding Translation object. This can be desirable but can also
40             lead to confusion if the coordinate_start had been changed and the
41             original position of the AARange was to be retrieved.
42              
43             start() and end() methods of the AARange will point to the labels
44             identifying the first nucleotide of the first and last triplet coding
45             for the start and end of the AminoAcidRange.
46              
47             The underlying nucleotide sequence of the AARange can be retrieved
48             with the labelsubseq() method. This would retrieve the whole DNA
49             sequence, including possible introns. This is called "DNA_sequence".
50              
51             To fetch the nucleotide sequence of the Transcript, without introns,
52             the labelsubseq() of the attached Transcript (the Transcript the
53             Translation comes from) has to be accessed. This is called
54             "cDNA_sequence".
55              
56             Here are the operations to retrieve these latter two kinds of
57             sequences:
58              
59             $startlabel=$AARange->start;
60             $endtripletlabel=$AARange->end;
61             $endlabel=$AARange->{'seq'}->label(3,$endtripletlabel,$AARange->strand);
62              
63             $dnaseq=$AARange->labelsubseq($startlabel,undef,$endlabel));
64              
65             $cdnaseq=$AARange->get_Transcript->labelsubseq($startlabel,undef,$endlabel);
66              
67             To simplify, these operations have been included in two additional
68             methods: dna_seq() and cdna_seq().
69              
70             These would return the whole sequence, as in the examples above. But
71             the above general scheme can be used by specifying different labels,
72             to retrieve hypothetical subsequences of interest.
73              
74             =head1 AUTHOR - Joseph A.L. Insana
75              
76             Email: Insana@ebi.ac.uk, jinsana@gmx.net
77              
78             =head1 APPENDIX
79              
80             The rest of the documentation details each of the object
81             methods. Internal methods are usually preceded with a _
82              
83             =cut
84              
85             # Let the code begin...
86              
87             package Bio::LiveSeq::AARange;
88              
89 2     2   8 use strict;
  2         2  
  2         52  
90 2     2   4 use base qw(Bio::LiveSeq::SeqI);
  2         2  
  2         1866  
91              
92             =head2 new
93              
94             Title : new
95             Usage : $aarange = Bio::LiveSeq::AARange->new(-translation => $obj_ref,
96             -start => $beginaa,
97             -end => $endaa,
98             -name => "ABCD",
99             -description => "DCBA",
100             -translength => $length);
101              
102             Function: generates a new AminoAcidRange LiveSeq object
103             Returns : reference to a new object of class AARange
104             Errorcode -1
105             Args : two positions in AminoAcid coordinate numbering
106             an object reference specifying to which translation the aminoacid
107             ranges refer to
108             a name and a description (optional)
109             an optional "translength" argument: this can be given when
110             a lot of AARanges are to be created at the same time for the same
111             Translation object, calculating it with $translation->length
112             This would increase the speed, avoiding the new() function to
113             calculate everytime the same length again and again for every obj.
114              
115             =cut
116              
117             sub new {
118 0     0 1   my ($thing, %args) = @_;
119 0   0       my $class = ref($thing) || $thing;
120 0           my ($obj,%range);
121              
122 0           $obj = \%range;
123 0           $obj = bless $obj, $class;
124 0           my $self=$obj;
125              
126 0           my ($translation,$start,$end,$name,$description,$translength)=($args{-translation},$args{-start},$args{-end},$args{-name},$args{-description},$args{-translength});
127              
128 0 0 0       unless (($translation)&&(ref($translation) eq "Bio::LiveSeq::Translation")) {
129 0           $self->warn("No -translation or wrong type given");
130 0           return (-1);
131             }
132 0 0         unless ($translength) { # if it's not given, fetch it
133 0           $translength=$translation->length;
134             }
135 0           my $seq=$translation->{'seq'};
136              
137 0 0 0       if (($start < 1)&&($start > $translength)) {
138 0           $self->warn("$class not initialised because start aminoacid position not valid");
139 0           return (-1);
140             }
141 0 0 0       if (($end < 1)&&($end > $translength)) {
142 0           $self->warn("$class not initialised because end aminoacid position not valid");
143 0           return (-1);
144             }
145 0 0         if ($start > $end) {
146 0           $self->warn("$class not initialised because start position > end position!");
147 0           return (-1);
148             }
149              
150 0           my ($starttripletlabel,$endtripletlabel);
151 0 0         if ($start == $end) { # trick to increase speed
152 0           $starttripletlabel=$endtripletlabel=$translation->label($start);
153             } else {
154 0           ($starttripletlabel,$endtripletlabel)=($translation->label($start),$translation->label($end));
155             }
156 0 0 0       unless (($starttripletlabel > 0)&&($endtripletlabel > 0)) {
157 0           $self->warn("$class not initialised because of problems in retrieving start or end label!");
158 0           return (-1);
159             }
160              
161             # unsure if needed:
162             #my $endlabel=$seq->label(3,$endtripletlabel); # to get the real end
163             #unless ($endlabel > 0) {
164             #carp "$class not initialised because of problems retrieving the last nucleotide of the triplet coding for the end aminoacid";
165             #return (-1);
166             #}
167 0           $self->{'seq'}=$seq;
168 0           $self->{'start'}=$starttripletlabel;
169 0           $self->{'end'}=$endtripletlabel;
170 0           $self->{'strand'}=$translation->strand;
171 0           $self->{'translation'}=$translation;
172 0           $self->{'name'}=$name;
173 0           $self->{'description'}=$description;
174 0           $self->{'alphabet'}="protein";
175              
176 0           return $obj;
177             }
178              
179             sub coordinate_start {
180 0     0 1   my $self=shift;
181 0           $self->warn("Cannot perform this operation in an AminoAcidRange object!");
182 0           return (-1);
183             }
184              
185             sub all_labels {
186 0     0 1   my $self=shift;
187 0           $self->warn("Cannot perform this operation in an AminoAcidRange object!");
188 0           return (-1);
189             }
190              
191             sub valid {
192 0     0 1   my $self=shift;
193 0           $self->warn("Cannot perform this operation in an AminoAcidRange object!");
194 0           return (-1);
195             }
196              
197             =head2 get_Transcript
198              
199             Title : valid
200             Usage : $transcript = $obj->get_Transcript()
201             Function: retrieves the reference to the object of class Transcript (if any)
202             attached to a LiveSeq object
203             Returns : object reference
204             Args : none
205              
206             =cut
207              
208             sub get_Transcript {
209 0     0 1   my $self=shift;
210 0           return ($self->get_Translation->get_Transcript);
211             }
212              
213             =head2 get_Translation
214              
215             Title : valid
216             Usage : $translation = $obj->get_Translation()
217             Function: retrieves the reference to the object of class Translation (if any)
218             attached to a LiveSeq object
219             Returns : object reference
220             Args : none
221              
222             =cut
223              
224             sub get_Translation {
225 0     0 1   my $self=shift;
226 0           return ($self->{'translation'});
227             }
228              
229             sub change {
230 0     0 1   my $self=shift;
231 0           $self->warn("Cannot change an AminoAcidRange object!");
232 0           return (-1);
233             }
234             sub positionchange {
235 0     0 1   my $self=shift;
236 0           $self->warn("Cannot change an AminoAcidRange object!");
237 0           return (-1);
238             }
239             sub labelchange {
240 0     0 1   my $self=shift;
241 0           $self->warn("Cannot change an AminoAcidRange object!");
242 0           return (-1);
243             }
244              
245             sub subseq {
246 0     0 1   my ($self,$pos1,$pos2,$length) = @_;
247 0 0         if (defined ($length)) {
248 0 0         if ($length < 1) {
249 0           $self->warn("No sense asking for a subseq of length < 1");
250 0           return (-1);
251             }
252             }
253 0 0         unless (defined ($pos1)) {
    0          
254 0           $pos1=1;
255             } elsif ($pos1 < 1) { # if position out of boundaries
256 0           $self->warn("Starting position for AARange cannot be < 1!"); return (-1);
  0            
257 0 0 0       if ((defined ($pos2))&&($pos1>$pos2)) {
258 0           $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
  0            
259             }
260             }
261 0           my $seq=$self->seq;
262 0           my $objlength=length($seq);
263 0 0         unless (defined ($length)) {
264 0           $length=$objlength-$pos1+1;
265             }
266 0 0         if (defined ($pos2)) {
267 0 0         if ($pos2 > $objlength) { # if position out of boundaries
268 0           $self->warn("Ending position for AARange cannot be > length of AARange!"); return (-1);
  0            
269             }
270 0           $length=$pos2-$pos1+1;
271 0 0 0       if ((defined ($pos1))&&($pos1>$pos2)) {
272 0           $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
  0            
273             }
274             }
275 0           my $str=substr($seq,$pos1-1,$length);
276 0 0         if (length($str) < $length) {
277 0           $self->warn("Attention, cannot return the length requested for subseq",1);
278             }
279 0           return $str;
280             }
281              
282             sub seq {
283 0     0 1   my $self=shift;
284 0           my ($aa_start,$aa_end)=($self->aa_start,$self->aa_end);
285 0 0 0       unless (($aa_start)&&($aa_end)) { # they must both exist
286 0           $self->warn("Not able to find start or end of the AminoAcid Range");
287 0           return (0);
288             }
289 0           my $translseq=$self->get_Translation->seq;
290 0           return substr($translseq,$aa_start-1,$aa_end-$aa_start+1);
291             # Note: it will return "undef" if the translation stops before the start
292             # of the aarange (because of upstream nonsense mutation creating STOP).
293             # For the same reason it would return uncomplete (up to the STOP) string
294             # if the stop happens in between aarange's start and stop
295             }
296              
297             sub length {
298 0     0 1   my $self=shift;
299 0           my $seq=$self->seq;
300 0           my $length=length($seq);
301 0           return $length;
302             }
303              
304             sub label {
305 0     0 1   my ($self,$position)=@_;
306 0           my $translation=$self->get_Translation;
307 0           my $origstart=$translation->coordinate_start; # preserve it
308 0           $translation->coordinate_start($self->start); # change it
309 0           my $label=$translation->label($position);
310 0           $translation->coordinate_start($origstart); # restore it
311 0           return ($label);
312             }
313              
314             sub position {
315 0     0 1   my ($self,$label)=@_;
316 0           my $translation=$self->get_Translation;
317 0           my $origstart=$translation->coordinate_start; # preserve it
318 0           $translation->coordinate_start($self->start); # change it
319 0           my $position=$translation->position($label);
320 0           $translation->coordinate_start($origstart); # restore it
321 0           return ($position);
322             }
323              
324             =head2 aa_start
325              
326             Title : aa_start
327             Usage : $end = $aarange->aa_start()
328             Returns : integer (position, according to Translation coordinate system) of
329             the start of an AminoAcidRange object
330             Args : none
331              
332             =cut
333              
334             sub aa_start {
335 0     0 1   my $self=shift;
336 0           my $aastart=$self->get_Translation->position($self->{'start'});
337             }
338              
339             =head2 aa_end
340              
341             Title : aa_end
342             Usage : $end = $aarange->aa_end()
343             Returns : integer (position, according to Translation coordinate system) of
344             the end of an AminoAcidRange object
345             Args : none
346              
347             =cut
348              
349             sub aa_end {
350 0     0 1   my $self=shift;
351 0           my $aastart=$self->get_Translation->position($self->{'end'});
352             }
353              
354             =head2 dna_seq
355              
356             Title : dna_seq
357             Usage : $end = $aarange->dna_seq()
358             Returns : the sequence at DNA level of the entire AminoAcidRange
359             this would include introns (if present)
360             Args : none
361              
362             =cut
363              
364             sub dna_seq {
365 0     0 1   my $self=shift;
366 0           my $startlabel=$self->start;
367 0           my $endtripletlabel=$self->end;
368 0           my $endlabel=$self->{'seq'}->label(3,$endtripletlabel,$self->strand);
369 0           return ($self->labelsubseq($startlabel,undef,$endlabel));
370             }
371              
372             =head2 cdna_seq
373              
374             Title : cdna_seq
375             Usage : $end = $aarange->cdna_seq()
376             Returns : the sequence at cDNA level of the entire AminoAcidRange
377             i.e. this is the part of the Transcript that codes for the
378             AminoAcidRange. It would be composed just of exonic DNA.
379             Args : none
380              
381             =cut
382              
383             sub cdna_seq {
384 0     0 1   my $self=shift;
385 0           my $startlabel=$self->start;
386 0           my $endtripletlabel=$self->end;
387 0           my $endlabel=$self->{'seq'}->label(3,$endtripletlabel,$self->strand);
388 0           return ($self->get_Transcript->labelsubseq($startlabel,undef,$endlabel));
389             }
390              
391             # this checks if the attached Transcript has a Gene object attached
392             sub gene {
393 0     0 1   my ($self,$value) = @_;
394 0 0         if (defined $value) {
395 0           $self->{'gene'} = $value;
396             }
397 0 0         unless (exists $self->{'gene'}) {
398 0 0         unless (exists $self->get_Transcript->{'gene'}) {
399 0           return (0);
400             } else {
401 0           return ($self->get_Transcript->{'gene'});
402             }
403             } else {
404 0           return $self->{'gene'};
405             }
406             }
407              
408             1;