File Coverage

Bio/LiveSeq/Transcript.pm
Criterion Covered Total %
statement 204 411 49.6
branch 69 204 33.8
condition 14 45 31.1
subroutine 21 25 84.0
pod 16 19 84.2
total 324 704 46.0


line stmt bran cond sub pod time code
1             #
2             # bioperl module for Bio::LiveSeq::Transcript
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Joseph Insana
7             #
8             # Copyright Joseph Insana
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::LiveSeq::Transcript - Transcript class for LiveSeq
17              
18             =head1 SYNOPSIS
19              
20             # documentation needed
21              
22             =head1 DESCRIPTION
23              
24             This stores information about coding sequences (CDS).
25             The implementation is that a Transcript object accesses a collection of
26             Exon objects, inferring from them the nucleotide structure and sequence.
27              
28             =head1 AUTHOR - Joseph A.L. Insana
29              
30             Email: Insana@ebi.ac.uk, jinsana@gmx.net
31              
32             =head1 APPENDIX
33              
34             The rest of the documentation details each of the object
35             methods. Internal methods are usually preceded with a _
36              
37             =cut
38              
39             # Let the code begin...
40              
41             package Bio::LiveSeq::Transcript;
42              
43 2     2   6 use strict;
  2         2  
  2         46  
44             # use Carp qw(carp cluck);
45 2     2   6 use Bio::LiveSeq::Exon; # uses Exon to create new exon in case of deletion
  2         2  
  2         30  
46 2     2   5 use base qw(Bio::LiveSeq::SeqI);
  2         2  
  2         4703  
47              
48             =head2 new
49              
50             Title : new
51             Usage : $transcript = Bio::LiveSeq::Transcript->new(-exons => \@obj_refs);
52              
53             Function: generates a new Bio::LiveSeq::Transcript
54             Returns : reference to a new object of class Transcript
55             Errorcode -1
56             Args : reference to an array of Exon object references
57              
58             =cut
59              
60             sub new {
61 6     6 1 14 my ($thing, %args) = @_;
62 6   33     27 my $class = ref($thing) || $thing;
63 6         8 my ($obj,%transcript);
64              
65 6         7 my @exons=@{$args{-exons}};
  6         20  
66              
67 6         10 $obj = \%transcript;
68 6         11 $obj = bless $obj, $class;
69              
70 6 50       13 unless (@exons) {
71 0         0 $obj->warn("$class not initialised because exons array empty");
72 0         0 return(-1);
73             }
74              
75             # now useless, after start and end methods have been overridden here
76 6         11 my $firstexon = $exons[0];
77             #my $lastexon = $exons[-1];
78             #my $start = $firstexon->start;
79             #my $end = $lastexon->end;
80 6         25 my $strand = $firstexon->strand;
81 6         12 my $seq = $firstexon->{'seq'};
82 6         21 $obj->alphabet('rna');
83              
84 6 50       20 unless (_checkexons(\@exons)) {
85 0         0 $obj->warn("$class not initialised because of problems in the exon structure");
86 0         0 return(-1);
87             }
88 6         15 $obj->{'strand'}=$strand;
89 6         18 $obj->{'exons'}=\@exons;
90 6         10 $obj->{'seq'}=$seq;
91              
92             # set Transcript into each Exon
93 6         7 my $exon;
94 6         13 foreach $exon (@exons) {
95 34         36 $exon->{'transcript'}=$obj;
96             }
97 6         26 return $obj;
98             }
99              
100              
101             =head2 all_Exons
102              
103             Title : all_Exons
104             Usage : $transcript_obj->all_Exons()
105             Function: returns references to all Exon objects the Transcript is composed of
106             Example : foreach $exon ($transcript->all_Exons()) { do_something }
107             Returns : array of object references
108             Args : none
109              
110             =cut
111              
112             sub all_Exons {
113 306     306 1 319 my $self=shift;
114 306         776 my $exonsref=$self->{'exons'};
115 306         304 my @exons=@{$exonsref};
  306         632  
116 306         360 my @newexons;
117             my $exon;
118 306         466 foreach $exon (@exons) {
119 1642 50       2358 unless ($exon->obj_valid) {
120 0         0 $self->warn("$exon no more valid, start or end label lost, skipping....",1); # ignorable
121             } else {
122 1642         1723 push(@newexons,$exon);
123             }
124             }
125 306 50       691 if ($#exons != $#newexons) {
126             # update exons field
127 0         0 $self->{'exons'}=\@newexons;
128             }
129 306         722 return (@newexons);
130             }
131              
132             =head2 downstream_seq
133              
134             Title : downstream_seq
135             Usage : $transcript_obj->downstream_seq()
136             : $transcript_obj->downstream_seq(64)
137             Function: returns a string of nucleotides downstream of the end of the
138             CDS. If there is some information of the real mRNA, from features in
139             an attached Gene object, it will return up to those boundaries.
140             Otherwise it will return 1000 nucleotides.
141             If an argument is given it will override the default 1000 number
142             and return instead /that/ requested number of nucleotides.
143             But if a Gene object is attached, this argument will be ignored.
144             Returns : string
145             Args : an optional integer number of nucleotides to be returned instead of
146             the default if no gene attached
147              
148             =cut
149              
150             sub downstream_seq {
151 23     23 1 60 my ($self,$howmany)=@_;
152 23         45 my $str;
153 23 50       91 if (defined ($howmany)) {
154 0 0       0 unless ($howmany > 0) {
155 0         0 $self->throw("No sense in asking less than 1 downstream nucleotides!");
156             }
157             } else {
158 23 100       127 unless ($self->{'seq'}->alphabet eq 'rna') { # if rna retrieve until the end
159             #$str=$DNAobj->labelsubseq($self->end,undef,undef,"unsecuremoderequested");
160             #return(substr($str,1)); # delete first nucleotide that is the last of Transcript
161 19 50       79 if ($self->gene) { # if there is Gene object attached fetch relevant info
162 19         94 $str=$self->{'seq'}->labelsubseq($self->end,undef,$self->gene->maxtranscript->end); # retrieve from end of this Transcript to end of the maxtranscript
163 19         77 $str=substr($str,1); # delete first nucleotide that is the last of Transcript
164 19 50       70 if (CORE::length($str) > 0) {
165 19         70 return($str);
166             } else { # if there was no downstream through the gene's maxtranscript, go the usual way
167 0         0 $howmany = 1000;
168             }
169             } else {
170 0         0 $howmany = 1000;
171             }
172             }
173             }
174 4         13 my @exons=$self->all_Exons;
175 4         12 my $strand=$self->strand();
176 4         7 my $lastexon=$exons[-1];
177 4         19 my $lastexonlength=$lastexon->length;
178             # $howmany nucs after end of last exon
179             #my $downstream_seq=$lastexon->subseq($lastexonlength+1,undef,$howmany);
180 4         8 my $downstream_seq;
181              
182 4 50       18 if ($howmany) {
183 0         0 $downstream_seq=substr($lastexon->labelsubseq($self->end,$howmany,undef,"unsecuremoderequested"),1);
184             } else {
185 4 50       14 if ($strand == 1) {
186 4         21 $downstream_seq=substr($lastexon->labelsubseq($self->end,undef,$self->{'seq'}->end,"unsecuremoderequested"),1);
187             } else {
188 0         0 $downstream_seq=substr($lastexon->labelsubseq($self->end,undef,$self->{'seq'}->start,"unsecuremoderequested"),1);
189             }
190             }
191 4         15 return $downstream_seq;
192             }
193              
194             =head2 upstream_seq
195              
196             Title : upstream_seq
197             Usage : $transcript_obj->upstream_seq()
198             : $transcript_obj->upstream_seq(64)
199             Function: just like downstream_seq but returns nucleotides before the ATG
200             Note : the default, if no Gene information present and no nucleotides
201             number given, is to return up to 400 nucleotides.
202              
203             =cut
204              
205             sub upstream_seq {
206 1     1 1 3 my ($self,$howmany)=@_;
207 1 50       3 if (defined ($howmany)) {
208 0 0       0 unless ($howmany > 0) {
209 0         0 $self->throw("No sense in asking less than 1 upstream nucleotides!");
210             }
211             } else {
212 1 50       5 unless ($self->{'seq'}->alphabet eq 'rna') { # if rna retrieve from the start
213 1 50       3 if ($self->gene) { # if there is Gene object attached fetch relevant info
214 1         4 my $str=$self->{'seq'}->labelsubseq($self->gene->maxtranscript->start,undef,$self->start); # retrieve from start of maxtranscript to start of this Transcript
215 1         4 chop $str; # delete last nucleotide that is the A of starting ATG
216 1 50       3 if (length($str) > 0) {
217 1         4 return($str);
218             } else { # if there was no upstream through the gene's maxtranscript, go the usual way
219 0         0 $howmany = 400;
220             }
221             } else {
222 0         0 $howmany = 400;
223             }
224             }
225             }
226 0         0 my @exons=$self->all_Exons;
227 0         0 my $firstexon=$exons[0];
228            
229 0         0 my $upstream_seq;
230 0         0 my $strand=$self->strand();
231              
232 0 0       0 if ($howmany) {# $howmany nucs before begin of first exon
233 0         0 my $labelbefore=$firstexon->label(-$howmany,$firstexon->start);
234 0 0       0 if ($labelbefore < 1) {
235 0 0       0 if ($strand == 1) {
236 0         0 $labelbefore=$self->{'seq'}->start;
237             } else {
238 0         0 $labelbefore=$self->{'seq'}->end;
239             }
240             }
241 0         0 $upstream_seq=$firstexon->labelsubseq($labelbefore,undef,$firstexon->start,"unsecuremoderequested");
242 0         0 chop $upstream_seq;
243             } else {
244 0 0       0 if ($strand == 1) {
245 0         0 $upstream_seq=$firstexon->labelsubseq($self->{'seq'}->start,undef,$self->start,"unsecuremoderequested");
246 0         0 chop $upstream_seq; # delete last nucleotide that is the A of starting ATG
247             } else {
248 0         0 $upstream_seq=$firstexon->labelsubseq($self->{'seq'}->end,undef,$self->start,"unsecuremoderequested");
249 0         0 chop $upstream_seq; # delete last nucleotide that is the A of starting ATG
250             }
251             }
252 0         0 return $upstream_seq;
253             }
254              
255             # These get redefined here, overriding the SeqI one because they draw their
256             # information from the Exons a Transcript is built of
257             # optional argument: firstlabel. If not given, it checks coordinate_start
258             # This is useful when called by Translation
259             # also used by _delete
260             sub label {
261 23     23 1 46 my ($self,$position,$firstlabel)=@_;
262 23 50       66 unless ($position) { # if position = 0 complain ?
263 0         0 $self->warn("Position not given or position 0");
264 0         0 return (-1);
265             }
266 23         91 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
267 23         36 my ($label,@labels,$length,$arraypos);
268 23 100       94 unless (defined ($firstlabel)) {
269 3         19 $firstlabel=$self->coordinate_start; # this is inside Transcript obj
270             }
271 23         65 my $coord_pos=$self->_inside_position($firstlabel);
272 23         113 $length=$self->length;
273             #if ($strand == 1) {
274 23 100       78 if ($position < 1) {
275 10         17 $position++; # to account for missing of 0 position
276             }
277 23         50 $arraypos=$position+$coord_pos-2;
278             #print "\n=-=-=-=-DEBUG: arraypos $arraypos, pos $position, coordpos: $coord_pos";
279 23 50       94 if ($arraypos < 0) {
    50          
280 0         0 $label=$self->{'seq'}->label($arraypos,$start,$strand); #?
281             } elsif ($arraypos >= $length) {
282 0         0 $label=$self->{'seq'}->label($arraypos-$length+2,$end,$strand); #?
283             } else { # inside the Transcript
284 23         78 @labels=$self->all_labels;
285 23         1824 $label=$labels[$arraypos];
286             }
287             #}
288             }
289              
290             # argument: label
291             # returns: position of label according to coord_start
292             # errorcode: 0 label not found
293             # optional argument: firstlabel. If not given, it checks coordinate_start
294             # This is useful when called by Translation
295             sub position {
296 16     16 1 28 my ($self,$label,$firstlabel)=@_;
297 16 50       88 unless ($self->{'seq'}->valid($label)) {
298 0         0 $self->warn("label is not valid");
299 0         0 return (0);
300             }
301 16 100       63 unless (defined ($firstlabel)) {
302 1         3 $firstlabel=$self->coordinate_start; # this is inside Transcript obj
303             }
304 16 50       61 if ($label == $firstlabel) {
305 0         0 return (1);
306             }
307 16         46 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
308 16         24 my ($position,$in_pos,$out_pos,$coord_pos);
309 16         37 my $length=$self->length;
310 16         75 $coord_pos=$self->_inside_position($firstlabel);
311 16 50       92 if ($self->valid($label)) { # if label is inside the Transcript
312 16         70 $in_pos=$self->_inside_position($label);
313 16         58 $position=$in_pos-$coord_pos+1;
314 16 50       51 if ($position <= 0) {
315 0         0 return ($position-1); # accounts for the missing of the 0 position
316             }
317             } else {
318 0 0       0 if ($self->follows($end,$label)) { # label after end of transcript
    0          
319 0         0 $out_pos=$self->{'seq'}->position($label,$end,$strand);
320             #print "\n+++++++++DEBUG label $label FOLLOWS end $end outpos $out_pos coordpos $coord_pos";
321 0         0 $position=$out_pos+$length-$coord_pos;
322             } elsif ($self->follows($label,$start)) { # label before begin of transcript
323             #print "\n+++++++++DEBUG label $label BEFORE start $start outpos $out_pos coordpos $coord_pos";
324 0         0 $out_pos=$self->{'seq'}->position($label,$start,$strand);
325 0         0 $position=$out_pos-$coord_pos+1;
326             } else { # label is in intron (not valid, not after, not before)!
327 0         0 $self->warn("Cannot give position of label pointing to intron according to CDS numbering!",1);
328 0         0 return (0);
329             }
330             }
331 16         99 return ($position);
332             }
333              
334             sub seq {
335 99     99 1 149 my $self=shift;
336 99         147 my ($exon,$str);
337 99         287 my @exons=$self->all_Exons();
338 99         146 foreach $exon (@exons) {
339 531         1501 $str .= $exon->seq();
340             }
341 99         540 return $str;
342             }
343              
344             sub length {
345 39     39 1 68 my $self=shift;
346 39         52 my ($exon,$length);
347 39         109 my @exons=$self->all_Exons();
348 39         80 foreach $exon (@exons) {
349 211         479 $length += $exon->length();
350             }
351 39         97 return $length;
352             }
353              
354             sub all_labels {
355 130     130 1 252 my $self=shift;
356 130         130 my ($exon,@labels);
357 130         368 my @exons=$self->all_Exons();
358 130         194 foreach $exon (@exons) {
359 710         1573 push (@labels,$exon->all_labels());
360             }
361 130         26015 return @labels;
362             }
363              
364             # redefined here so that it will retrieve effective subseq without introns
365             # otherwise it would have retrieved an underlying DNA (possibly with introns)
366             # subsequence
367             # Drawback: this is really bulky, label->position and then a call to
368             # subseq that will do the opposite position-> label
369             #
370             # one day this can be rewritten as the main one so that the normal subseq
371             # will rely on this one and hence avoid this double (useless and lengthy)
372             # conversion between labels and positions
373             sub old_labelsubseq {
374 0     0 0 0 my ($self,$start,$length,$end)=@_;
375 0         0 my ($pos1,$pos2);
376 0 0       0 if ($start) {
377 0 0       0 unless ($self->valid($start)) {
378 0         0 $self->warn("Start label not valid"); return (-1);
  0         0  
379             }
380 0         0 $pos1=$self->position($start);
381             }
382 0 0       0 if ($end) {
383 0 0       0 if ($end == $start) {
384 0         0 $length=1;
385             } else {
386 0 0       0 unless ($self->valid($end)) {
387 0         0 $self->warn("End label not valid"); return (-1);
  0         0  
388             }
389 0 0       0 unless ($self->follows($start,$end) == 1) {
390 0         0 $self->warn("End label does not follow Start label!"); return (-1);
  0         0  
391             }
392 0         0 $pos2=$self->position($end);
393 0         0 undef $length;
394             }
395             }
396 0         0 return ($self->subseq($pos1,$pos2,$length));
397             }
398              
399             # rewritten, eventually
400              
401             sub labelsubseq {
402 12     12 1 27 my ($self,$start,$length,$end,$unsecuremode)=@_;
403 12 50 33     49 unless (defined $unsecuremode &&
404             $unsecuremode eq "unsecuremoderequested")
405             { # to skip security checks (faster)
406 12 50       28 if ($start) {
407 12 50       42 unless ($self->valid($start)) {
408 0         0 $self->warn("Start label not valid"); return (-1);
  0         0  
409             }
410             } else {
411 0         0 $start=$self->start;
412             }
413 12 100       38 if ($end) {
414 6 50       19 if ($end == $start) {
415 0         0 $length=1;
416 0         0 undef $end;
417             } else {
418 6         16 undef $length; # end argument overrides length argument
419 6 50       25 unless ($self->valid($end)) {
420 0         0 $self->warn("End label not valid"); return (-1);
  0         0  
421             }
422 6 50       52 unless ($self->follows($start,$end) == 1) {
423 0         0 $self->warn("End label does not follow Start label!"); return (-1);
  0         0  
424             }
425             }
426             } else {
427 6         30 $end=$self->end;
428             }
429             }
430 12         28 my ($seq,$exon,$startexon,$endexon); my @exonlabels;
  0         0  
431 12         38 my @exons=$self->all_Exons;
432             EXONCHECK:
433 12         27 foreach $exon (@exons) {
434 56 100 100     193 if ((!(defined($startexon)))&&($exon->valid($start))) { # checks only if not yet found
435 12         22 $startexon=$exon;
436             }
437 56 100       142 if ($exon->valid($end)) {
438 10         19 $endexon=$exon;
439             }
440 56 100 100     230 if ((!(defined($seq)) && (defined($startexon)))) { # initializes only once
441 12 100 66     69 if ((defined($endexon)) && ($endexon eq $startexon)) { # then perfect, we are finished
442 5 100       15 if ($length) {
443 1         6 $seq = $startexon->labelsubseq($start,$length,undef,"unsecuremoderequested");
444              
445              
446 1         4 last EXONCHECK;
447             } else {
448 4         22 $seq = $startexon->labelsubseq($start,undef,$end,"unsecuremoderequested");
449             }
450 4         13 last EXONCHECK;
451             } else { # get up to the end of the exon
452 7         46 $seq = $startexon->labelsubseq($start,undef,undef,"unsecuremoderequested");
453             }
454             }
455 51 100 100     169 if (($startexon)&&($exon ne $startexon)) {
456 7 100       24 if (defined($endexon)) { # we arrived to the last exon
    50          
457 5         17 $seq .= $endexon->labelsubseq(undef,undef,$end,"unsecuremoderequested"); # get from the start of the exon
458 5         19 last EXONCHECK;
459              
460             } elsif (defined($startexon)) { # we are in a whole-exon-in-the-middle case
461 2         16 $seq .= $exon->seq; # we add it completely to the seq
462             } # else, we still have to reach the start point, exon useless, we move on
463 2 50       12 if ($length) { # if length argument specified
464 2 50 33     15 if (($seq && (CORE::length($seq) >= $length))) {
465 2         6 last EXONCHECK;
466             }
467             }
468             }
469             }
470 12 100       32 if ($length) {
471 6         45 return (substr($seq,0,$length));
472             } else {
473 6         39 return ($seq);
474             }
475             }
476              
477              
478             # argument: label
479             # returns: the objref and progressive number of the Exon containing that label
480             # errorcode: -1
481             sub in_which_Exon {
482 1     1 0 2 my ($self,$label)=@_;
483 1         2 my ($count,$exon);
484 1         119 my @exons=$self->all_Exons;
485 1         4 foreach $exon (@exons) {
486 7         8 $count++; # 1st exon is numbered "1"
487 7 100       13 if ($exon->valid($label)) {
488 1         4 return ($exon,$count)
489             }
490             }
491 0         0 return (-1); # if nothing found
492             }
493              
494             # recoded to exploit the new fast labelsubseq()
495             # valid only inside Transcript
496             sub subseq {
497 0     0 1 0 my ($self,$pos1,$pos2,$length) = @_;
498 0         0 my ($str,$startlabel,$endlabel);
499 0 0       0 if (defined ($pos1)) {
500 0 0       0 if ($pos1 == 0) { # if position = 0 complain
501 0         0 $self->warn("Position cannot be 0!"); return (-1);
  0         0  
502             }
503 0 0 0     0 if ((defined ($pos2))&&($pos1>$pos2)) {
504 0         0 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
  0         0  
505             }
506 0         0 $startlabel=$self->label($pos1);
507 0 0       0 unless ($self->valid($startlabel)) {
508 0         0 $self->warn("Start label not valid"); return (-1);
  0         0  
509             }
510 0 0       0 if ($startlabel < 1) {
511 0         0 $self->warn("position $pos1 not valid as start of subseq!"); return (-1);
  0         0  
512             }
513             } else {
514 0         0 $startlabel=$self->start;
515             }
516 0 0       0 if (defined ($pos2)) {
517 0 0       0 if ($pos2 == 0) { # if position = 0 complain
518 0         0 $self->warn("Position cannot be 0!"); return (-1);
  0         0  
519             }
520 0         0 undef $length;
521 0 0 0     0 if ((defined ($pos1))&&($pos1>$pos2)) {
522 0         0 $self->warn("1st position($pos1) cannot be > 2nd position($pos2)!"); return (-1);
  0         0  
523             }
524 0         0 $endlabel=$self->label($pos2);
525 0 0       0 unless ($self->valid($endlabel)) {
526 0         0 $self->warn("End label not valid"); return (-1);
  0         0  
527             }
528 0 0       0 if ($endlabel < 1) {
529 0         0 $self->warn("position $pos2 not valid as end of subseq!"); return (-1);
  0         0  
530             }
531             } else {
532 0 0       0 unless (defined ($length)) {
533 0         0 $endlabel=$self->end;
534             }
535             }
536 0         0 return ($self->labelsubseq($startlabel,$length,$endlabel,"unsecuremoderequested"));
537             }
538              
539             # works only inside the transcript, complains if asked outside
540             sub old_subseq {
541 0     0 0 0 my ($self,$pos1,$pos2,$length) = @_;
542 0         0 my ($str,$startcount,$endcount,$seq,$seqlength);
543 0 0       0 if (defined ($length)) {
544 0 0       0 if ($length < 1) {
545 0         0 $self->warn("No sense asking for a subseq of length < 1");
546 0         0 return (-1);
547             }
548             }
549 0         0 my $firstlabel=$self->coordinate_start; # this is inside Transcript obj
550 0         0 my $coord_pos=$self->_inside_position($firstlabel); # TESTME old
551 0         0 $seq=$self->seq;
552 0         0 $seqlength=CORE::length($seq);
553 0 0       0 unless (defined ($pos1)) {
554 0         0 $startcount=1+$coord_pos-1; # i.e. coord_pos
555             } else {
556 0 0       0 if ($pos1 == 0) { # if position = 0 complain
    0          
557 0         0 $self->warn("Position cannot be 0!"); return (-1);
  0         0  
558             } elsif ($pos1 < 0) {
559 0         0 $pos1++;
560             }
561 0 0 0     0 if ((defined ($pos2))&&($pos1>$pos2)) {
562 0         0 $self->warn("1st position ($pos1) cannot be > 2nd position ($pos2)!");
563 0         0 return (-1);
564             }
565 0         0 $startcount=$pos1+$coord_pos-1;
566             }
567 0 0       0 unless (defined ($pos2)) {
568             ;
569             } else {
570 0 0       0 if ($pos2 == 0) { # if position = 0 complain
    0          
571 0         0 $self->warn("Position cannot be 0!"); return (-1);
  0         0  
572             } elsif ($pos2 < 0) {
573 0         0 $pos2++;
574             }
575 0 0 0     0 if ((defined ($pos1))&&($pos1>$pos2)) {
576 0         0 $self->warn("1st position ($pos1) cannot be > 2nd position ($pos2)!");
577 0         0 return (-1);
578             }
579 0         0 $endcount=$pos2+$coord_pos-1;
580 0 0       0 if ($endcount > $seqlength) {
581             #print "\n###DEBUG###: pos1 $pos1 pos2 $pos2 coordpos $coord_pos endcount $endcount seqln $seqlength\n";
582 0         0 $self->warn("Cannot access end position after the end of Transcript");
583 0         0 return (-1);
584             }
585 0         0 $length=$endcount-$startcount+1;
586             }
587             #print "\n###DEBUG pos1 $pos1 pos2 $pos2 start $startcount end $endcount length $length coordpos $coord_pos\n";
588 0         0 my $offset=$startcount-1;
589 0 0       0 if ($offset < 0) {
    0          
590 0         0 $self->warn("Cannot access startposition before the beginning of Transcript, returning from start",1); # ignorable
591 0         0 return (substr($seq,0,$length));
592             } elsif ($offset >= $seqlength) {
593 0         0 $self->warn("Cannot access startposition after the end of Transcript");
594 0         0 return (-1);
595             } else {
596 0         0 $str=substr($seq,$offset,$length);
597 0 0       0 if (CORE::length($str) < $length) {
598 0 0       0 $self->warn("Attention, cannot return the length requested ".
599             "for subseq",1) if $self->verbose > 0; # ignorable
600             }
601 0         0 return $str;
602             }
603             }
604              
605             # redefined so that it doesn't require other methods (after deletions) to
606             # reset it.
607             sub start {
608 131     131 1 145 my $self = shift;
609 131         185 my $exonsref=$self->{'exons'};
610 131         117 my @exons=@{$exonsref};
  131         270  
611 131         357 return ($exons[0]->start);
612             }
613              
614             sub end {
615 140     140 1 201 my $self = shift;
616 140         160 my $exonsref=$self->{'exons'};
617 140         134 my @exons=@{$exonsref};
  140         292  
618 140         278 return ($exons[-1]->end);
619             }
620              
621              
622             # internal methods begin here
623              
624             # returns: position of label in transcript's all_labels
625             # with STARTlabel == 1
626             # errorcode 0 -> label not found
627             # argument: label
628             sub _inside_position {
629 55     55   100 my ($self,$label)=@_;
630 55         137 my ($start,$end,$strand)=($self->start(),$self->end(),$self->strand());
631 55         63 my ($position,$checkme);
632 55         161 my @labels=$self->all_labels;
633 55         2064 foreach $checkme (@labels) {
634 59926         29761 $position++;
635 59926 100       64359 if ($label == $checkme) {
636 55         2179 return ($position);
637             }
638             }
639 0         0 return (0);
640             }
641              
642             # returns 1 OK or 0 ERROR
643             # arguments: reference to array of Exon object references
644             sub _checkexons {
645 6     6   11 my ($exon,$thisstart);
646 6         8 my $self=$exon;
647 6         8 my $exonsref=$_[0];
648 6         7 my @exons=@{$exonsref};
  6         13  
649              
650 6         9 my $firstexon = $exons[0];
651              
652 6 50       19 unless (ref($firstexon) eq "Bio::LiveSeq::Exon") {
653 0         0 $self->warn("Object not of class Exon");
654 0         0 return (0);
655             }
656 6         12 my $strand = $firstexon->strand;
657              
658 6         24 my $prevend = $firstexon->end;
659 6         6 shift @exons; # skip first one
660 6         14 foreach $exon (@exons) {
661 28 50       65 unless (ref($exon) eq "Bio::LiveSeq::Exon") { # object class check
662 0         0 $self->warn("Object not of class Exon");
663 0         0 return (0);
664             }
665 28 50       44 if ($exon->strand != $strand) { # strand consistency check
666 0         0 $self->warn("Exons' strands not consistent when trying to create Transcript");
667 0         0 return (0);
668             }
669 28         51 $thisstart = $exon->start;
670 28 50       57 unless ($exon->{'seq'}->follows($prevend,$thisstart,$strand)) {
671 0         0 $self->warn("Exons not in correct order when trying to create Transcript");
672 0         0 return (0);
673             }
674 28         70 $prevend = $exon->end;
675             }
676 6         28 return (1);
677             }
678              
679             =head2 get_Translation
680              
681             Title : valid
682             Usage : $translation = $obj->get_Translation()
683             Function: retrieves the reference to the object of class Translation (if any)
684             attached to a LiveSeq object
685             Returns : object reference
686             Args : none
687              
688             =cut
689              
690             sub get_Translation {
691 21     21 1 37 my $self=shift;
692 21         94 return ($self->{'translation'}); # this is set when Translation->new is called
693             }
694              
695             # this checks so that deletion spanning multiple exons is
696             # handled accordingly and correctly
697             # arguments: begin and end label of a deletion
698             # this is called BEFORE any deletion in the chain
699             sub _deletecheck {
700 0     0   0 my ($self,$startlabel,$endlabel)=@_;
701 0         0 my $exonsref=$self->{'exons'};
702 0         0 my @exons=@{$exonsref};
  0         0  
703 0         0 my ($startexon,$endexon,$exon);
704 0         0 $startexon=$endexon=0;
705 0         0 foreach $exon (@exons) {
706 0 0 0     0 if (($startexon == 0)&&($exon->valid($startlabel))) {
707 0         0 $startexon=$exon; # exon containing start of deletion
708             }
709 0 0 0     0 if (($endexon == 0)&&($exon->valid($endlabel))) {
710 0         0 $endexon=$exon; # exon containing end of deletion
711             }
712 0 0 0     0 if (($startexon)&&($endexon)) {
713 0         0 last; # don't check further
714             }
715             }
716 0         0 my $nextend=$self->label(2,$endlabel); # retrieve the next label
717 0         0 my $prevstart=$self->label(-1,$startlabel); # retrieve the prev label
718              
719 0 0       0 if ($startexon eq $endexon) { # intra-exon deletion
720 0 0 0     0 if (($startexon->start eq $startlabel) && ($startexon->end eq $endlabel)) {
    0          
    0          
721             # let's delete the entire exon
722 0         0 my @newexons;
723 0         0 foreach $exon (@exons) {
724 0 0       0 unless ($exon eq $startexon) {
725 0         0 push(@newexons,$exon);
726             }
727             }
728 0         0 $self->{'exons'}=\@newexons;
729             } elsif ($startexon->start eq $startlabel) { # special cases
730 0         0 $startexon->{'start'}=$nextend; # set a new start of exon
731             } elsif ($startexon->end eq $endlabel) {
732 0         0 $startexon->{'end'}=$prevstart; # set a new end of exon
733             } else {
734 0         0 return; # no problem
735             }
736             } else { # two new exons to be created, inter-exons deletion
737 0         0 my @newexons;
738             my $exonobj;
739 0         0 my $dna=$self->{'seq'};
740 0         0 my $strand=$self->strand;
741 0         0 my $notmiddle=1; # flag for skipping exons in the middle of deletion
742 0         0 foreach $exon (@exons) {
743 0 0       0 if ($exon eq $startexon) {
    0          
744 0         0 $exonobj=Bio::LiveSeq::Exon->new('-seq'=>$dna,'-start'=>$exon->start,'-end'=>$prevstart,'-strand'=>$strand); # new partial exon
745 0         0 push(@newexons,$exonobj);
746 0         0 $notmiddle=0; # now we enter totally deleted exons
747             } elsif ($exon eq $endexon) {
748 0         0 $exonobj=Bio::LiveSeq::Exon->new('-seq'=>$dna,'-start'=>$nextend,'-end'=>$exon->end,'-strand'=>$strand); # new partial exon
749 0         0 push(@newexons,$exonobj);
750 0         0 $notmiddle=1; # exiting totally deleted exons
751             } else {
752 0 0       0 if ($notmiddle) { # if before or after exons with deletion
753 0         0 push(@newexons,$exon);
754             }# else skip them
755             }
756             }
757 0         0 $self->{'exons'}=\@newexons;
758             }
759             }
760              
761             =head2 translation_table
762              
763             Title : translation_table
764             Usage : $name = $obj->translation_table;
765             : $name = $obj->translation_table(11);
766             Function: Returns or sets the translation_table used for translating the
767             transcript.
768             If it has never been set, it will return undef.
769             Returns : an integer
770              
771             =cut
772              
773             sub translation_table {
774 46     46 1 85 my ($self,$value) = @_;
775 46 100       122 if (defined $value) {
776 1         5 $self->{'translation_table'} = $value;
777             }
778 46 100       126 unless (exists $self->{'translation_table'}) {
779 42         256 return;
780             } else {
781 4         20 return $self->{'translation_table'};
782             }
783             }
784              
785             =head2 frame
786              
787             Title : frame
788             Usage : $frame = $transcript->frame($label);
789             Function: Returns the frame of a particular nucleotide.
790             Frame can be 0 1 or 2 and means the position in the codon triplet
791             of the particulat nucleotide. 0 is the first codon_position.
792             Codon_position (1 2 3) is simply frame+1.
793             If the label asked for is not inside the Transcript, -1 will be
794             returned.
795             Args : a label
796             Returns : 0 1 or 2
797             Errorcode -1
798              
799             =cut
800              
801             # args: label
802             # returns: frame of nucleotide (0 1 2)
803             # errorcode: -1
804             sub frame {
805 7     7 1 445 my ($self,$inputlabel)=@_;
806 7         21 my @labels=$self->all_labels;
807 7         240 my ($label,$frame,$count);
808 7         17 foreach $label (@labels) {
809 9447 100       9140 if ($inputlabel == $label) {
810 7         330 return ($count % 3);
811             }
812 9440         5667 $count++; # 0 1 2 3 4....
813             }
814 0           return (-1); # label not found amid Transcript labels
815             }
816              
817             1;