File Coverage

Bio/LiveSeq/Gene.pm
Criterion Covered Total %
statement 148 220 67.2
branch 53 86 61.6
condition 32 96 33.3
subroutine 17 23 73.9
pod 2 19 10.5
total 252 444 56.7


line stmt bran cond sub pod time code
1             #
2             # bioperl module for Bio::LiveSeq::Gene
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Joseph Insana
7             #
8             # Copyright Joseph Insana
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::LiveSeq::Gene - Range abstract class for LiveSeq
17              
18             =head1 SYNOPSIS
19              
20             # documentation needed
21              
22             =head1 DESCRIPTION
23              
24             This is used as storage for all object references concerning a particular gene.
25              
26             =head1 AUTHOR - Joseph A.L. Insana
27              
28             Email: Insana@ebi.ac.uk, jinsana@gmx.net
29              
30             =head1 APPENDIX
31              
32             The rest of the documentation details each of the object
33             methods. Internal methods are usually preceded with a _
34              
35             =cut
36              
37             # Let the code begin...
38              
39             package Bio::LiveSeq::Gene;
40 2     2   6 use strict;
  2         3  
  2         49  
41 2     2   6 use Carp;
  2         1  
  2         90  
42 2     2   472 use Bio::LiveSeq::Prim_Transcript; # needed to create maxtranscript obj
  2         3  
  2         2739  
43              
44             =head2 new
45              
46             Title : new
47             Usage : $gene = Bio::LiveSeq::Gene->new(-name => "name",
48             -features => $hashref
49             -upbound => $min
50             -downbound => $max);
51              
52             Function: generates a new Bio::LiveSeq::Gene
53             Returns : reference to a new object of class Gene
54             Errorcode -1
55             Args : one string and one hashreference containing all features defined
56             for the Gene and the references to the LiveSeq objects for those
57             features.
58             Two labels for defining boundaries of the gene. Usually the
59             boundaries will reflect max span of transcript, exon... features,
60             while the DNA sequence will be created with some flanking regions
61             (e.g. with the EMBL_SRS::gene2liveseq routine).
62             If these two labels are not given, they will default to the start
63             and end of the DNA object.
64             Note : the format of the hash has to be like
65             DNA => reference to LiveSeq::DNA object
66             Transcripts => reference to array of transcripts objrefs
67             Transclations => reference to array of transcripts objrefs
68             Exons => ....
69             Introns => ....
70             Prim_Transcripts => ....
71             Repeat_Units => ....
72             Repeat_Regions => ....
73             Only DNA and Transcripts are mandatory
74              
75             =cut
76              
77             sub new {
78 6     6 1 30 my ($thing, %args) = @_;
79 6   33     33 my $class = ref($thing) || $thing;
80 6         9 my ($i,$self,%gene);
81              
82 6         26 my ($name,$inputfeatures,$upbound,$downbound)=($args{-name},$args{-features},$args{-upbound},$args{-downbound});
83              
84 6 50       21 unless (ref($inputfeatures) eq "HASH") {
85 0         0 carp "$class not initialised because features hash not given";
86 0         0 return (-1);
87             }
88              
89 6         6 my %features=%{$inputfeatures}; # this is done to make our own hash&ref, not
  6         41  
90 6         11 my $features=\%features; # the ones input'ed, that could get destroyed
91            
92 6         10 my $DNA=$features->{'DNA'};
93 6 50       21 unless (ref($DNA) eq "Bio::LiveSeq::DNA") {
94 0         0 carp "$class not initialised because DNA feature not found";
95 0         0 return (-1);
96             }
97              
98 6         10 my ($minstart,$maxend);# used to calculate Gene->maxtranscript from Exon, Transcript (CDS) and Prim_Transcript features
99              
100 0         0 my ($start,$end);
101              
102 6         6 my @Transcripts=@{$features->{'Transcripts'}};
  6         13  
103              
104 6         9 my $strand;
105 6 50       20 unless (ref($Transcripts[0]) eq "Bio::LiveSeq::Transcript") {
106 0         0 $self->warn("$class not initialised: first Transcript not a LiveSeq object");
107 0         0 return (-1);
108             } else {
109 6         24 $strand=$Transcripts[0]->strand; # for maxtranscript consistency check
110             }
111              
112 6         12 for $i (@Transcripts) {
113 6         28 ($start,$end)=($i->start,$i->end);
114 6 50 33     45 unless ((ref($i) eq "Bio::LiveSeq::Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) {
115 0         0 $self->warn("$class not initialised because of problems in Transcripts feature");
116 0         0 return (-1);
117             } else {
118             }
119 6 50       15 unless($minstart) { $minstart=$start; } # initialize
  6         12  
120 6 50       13 unless($maxend) { $maxend=$end; } # initialize
  6         9  
121 6 50       14 if ($i->strand != $strand) {
122 0         0 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
123 0         0 return (-1);
124             }
125 6 50 33     67 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
  0   33     0  
      33        
126 6 50 33     47 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
  0   33     0  
      33        
127             }
128 6         8 my @Translations; my @Introns; my @Repeat_Units; my @Repeat_Regions;
  0         0  
  0         0  
129 0         0 my @Prim_Transcripts; my @Exons;
  0         0  
130 6 50       13 if (defined($features->{'Translations'})) {
131 6         10 @Translations=@{$features->{'Translations'}}; }
  6         16  
132 6 100       15 if (defined($features->{'Exons'})) {
133 1         1 @Exons=@{$features->{'Exons'}}; }
  1         4  
134 6 100       16 if (defined($features->{'Introns'})) {
135 1         3 @Introns=@{$features->{'Introns'}}; }
  1         3  
136 6 50       16 if (defined($features->{'Repeat_Units'})) {
137 0         0 @Repeat_Units=@{$features->{'Repeat_Units'}}; }
  0         0  
138 6 50       18 if (defined($features->{'Repeat_Regions'})) {
139 0         0 @Repeat_Regions=@{$features->{'Repeat_Regions'}}; }
  0         0  
140 6 100       13 if (defined($features->{'Prim_Transcripts'})) {
141 5         10 @Prim_Transcripts=@{$features->{'Prim_Transcripts'}}; }
  5         11  
142              
143            
144 6 50       12 if (@Translations) {
145 6         10 for $i (@Translations) {
146 6         22 ($start,$end)=($i->start,$i->end);
147 6 50 33     27 unless ((ref($i) eq "Bio::LiveSeq::Translation")&&($DNA->valid($start))&&($DNA->valid($end))) {
      33        
148 0         0 $self->warn("$class not initialised because of problems in Translations feature");
149 0         0 return (-1);
150             }
151             }
152             }
153 6 100       14 if (@Exons) {
154 1         1 for $i (@Exons) {
155 9         14 ($start,$end)=($i->start,$i->end);
156 9 50 33     22 unless ((ref($i) eq "Bio::LiveSeq::Exon")&&($DNA->valid($start))&&($DNA->valid($end))) {
      33        
157 0         0 $self->warn("$class not initialised because of problems in Exons feature");
158 0         0 return (-1);
159             }
160 9 50       12 if ($i->strand != $strand) {
161 0         0 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
162 0         0 return (-1);
163             }
164 9 50 33     41 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
  0   33     0  
      33        
165 9 50 33     41 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
  0   33     0  
      33        
166             }
167             }
168 6 100       16 if (@Introns) {
169 1         2 for $i (@Introns) {
170 8         22 ($start,$end)=($i->start,$i->end);
171 8 50 33     20 unless ((ref($i) eq "Bio::LiveSeq::Intron")&&($DNA->valid($start))&&($DNA->valid($end))) {
      33        
172 0         0 $self->warn("$class not initialised because of problems in Introns feature");
173 0         0 return (-1);
174             }
175             }
176             }
177 6 50       15 if (@Repeat_Units) {
178 0         0 for $i (@Repeat_Units) {
179 0         0 ($start,$end)=($i->start,$i->end);
180 0 0 0     0 unless ((ref($i) eq "Bio::LiveSeq::Repeat_Unit")&&($DNA->valid($start))&&($DNA->valid($end))) {
      0        
181 0         0 $self->warn("$class not initialised because of problems in Repeat_Units feature");
182 0         0 return (-1);
183             }
184             }
185             }
186 6 50       15 if (@Repeat_Regions) {
187 0         0 for $i (@Repeat_Regions) {
188 0         0 ($start,$end)=($i->start,$i->end);
189 0 0 0     0 unless ((ref($i) eq "Bio::LiveSeq::Repeat_Region")&&($DNA->valid($start))&&($DNA->valid($end))) {
      0        
190 0         0 $self->warn("$class not initialised because of problems in Repeat_Regions feature");
191 0         0 return (-1);
192             }
193             }
194             }
195 6 100       13 if (@Prim_Transcripts) {
196 5         12 for $i (@Prim_Transcripts) {
197 7         30 ($start,$end)=($i->start,$i->end);
198 7 50 33     37 unless ((ref($i) eq "Bio::LiveSeq::Prim_Transcript")&&($DNA->valid($start))&&($DNA->valid($end))) {
      33        
199 0         0 $self->warn("$class not initialised because of problems in Prim_Transcripts feature");
200 0         0 return (-1);
201             }
202 7 50       23 if ($i->strand != $strand) {
203 0         0 $self->warn("$class not initialised because exon-CDS-prim_transcript features do not share the same strand!");
204 0         0 return (-1);
205             }
206 7 100 66     71 if (($strand == 1)&&($start < $minstart)||($strand == -1)&&($start > $minstart)) { $minstart=$start; }
  5   33     7  
      66        
207 7 100 66     44 if (($strand == 1)&&($end > $maxend)||($strand == -1)&&($end < $maxend)) { $maxend=$end; }
  5   33     10  
      66        
208             }
209             }
210              
211             # create an array containing all obj references for all Gene Features
212             # useful for _set_Gene_in_all
213 6         10 my @allfeatures;
214 6         20 push (@allfeatures,$DNA,@Transcripts,@Translations,@Exons,@Introns,@Repeat_Units,@Repeat_Regions,@Prim_Transcripts);
215              
216             # create hash holding numbers for Gene Features
217 6         8 my %multiplicity;
218 0         0 my $key; my @array;
219 6         16 foreach $key (keys(%features)) {
220 25 100       43 unless ($key eq "DNA") {
221 19         14 @array=@{$features{$key}};
  19         31  
222 19         27 $multiplicity{$key}=scalar(@array);
223             }
224             }
225 6         12 $multiplicity{DNA}=1;
226              
227             # create maxtranscript object. It's a Prim_Transcript with start as the
228             # minimum start and end as the maximum end.
229             # usually these start and end will be the same as the gene->upbound and
230             # gene->downbound, but maybe there could be cases when this will be false
231             # (e.g. with repeat_units just before the prim_transcript or first exon,
232             # but still labelled with the same /gene qualifier)
233              
234 6         35 my $maxtranscript=Bio::LiveSeq::Prim_Transcript->new(-start => $minstart, -end => $maxend, -strand => $strand, -seq => $DNA);
235              
236              
237             # check the upbound downbound parameters
238 6 50       25 if (defined($upbound)) {
239 6 50       28 unless ($DNA->valid($upbound)) {
240 0         0 $self->warn("$class not initialised because upbound label not valid");
241 0         0 return (-1);
242             }
243             } else {
244 0         0 $upbound=$DNA->start;
245             }
246 6 50       14 if (defined($downbound)) {
247 6 50       14 unless ($DNA->valid($downbound)) {
248 0         0 $self->warn("$class not initialised because downbound label not valid");
249 0         0 return (-1);
250             }
251             } else {
252 0         0 $downbound=$DNA->end;
253             }
254              
255 6         55 %gene = (name => $name, features => $features,multiplicity => \%multiplicity,
256             upbound => $upbound, downbound => $downbound, allfeatures => \@allfeatures, maxtranscript => $maxtranscript);
257 6         13 $self = \%gene;
258 6         19 $self = bless $self, $class;
259 6         23 _set_Gene_in_all($self,@allfeatures);
260 6         193 return $self;
261             }
262              
263             # this sets the "gene" objref in all the objects "belonging" to the Gene,
264             # i.e. in all its Features.
265             sub _set_Gene_in_all {
266 6     6   10 my $Gene=shift;
267 6         10 my $self;
268 6         15 foreach $self (@_) {
269 42         101 $self->gene($Gene);
270             }
271             }
272              
273             # you can get or set the name of the gene
274             sub name {
275 1     1 0 854 my ($self,$value) = @_;
276 1 50       5 if (defined $value) {
277 0         0 $self->{'name'} = $value;
278             }
279 1 50       4 unless (exists $self->{'name'}) {
280 0         0 return "unknown";
281             } else {
282 1         4 return $self->{'name'};
283             }
284             }
285              
286             # gets the features hash
287             sub features {
288 0     0 0 0 my $self=shift;
289 0         0 return ($self->{'features'});
290             }
291             sub get_DNA {
292 21     21 0 28 my $self=shift;
293 21         81 return ($self->{'features'}->{'DNA'});
294             }
295             sub get_Transcripts {
296 6     6 0 9 my $self=shift;
297 6         19 return ($self->{'features'}->{'Transcripts'});
298             }
299             sub get_Translations {
300 1     1 0 3 my $self=shift;
301 1         3 return ($self->{'features'}->{'Translations'});
302             }
303             sub get_Prim_Transcripts {
304 0     0 0 0 my $self=shift;
305 0         0 return ($self->{'features'}->{'Prim_Transcripts'});
306             }
307             sub get_Repeat_Units {
308 1     1 0 2 my $self=shift;
309 1         5 return ($self->{'features'}->{'Repeat_Units'});
310             }
311             sub get_Repeat_Regions {
312 0     0 0 0 my $self=shift;
313 0         0 return ($self->{'features'}->{'Repeat_Regions'});
314             }
315             sub get_Introns {
316 1     1 0 5 my $self=shift;
317 1         3 return ($self->{'features'}->{'Introns'});
318             }
319             sub get_Exons {
320 1     1 0 2 my $self=shift;
321 1         24 return ($self->{'features'}->{'Exons'});
322             }
323             sub featuresnum {
324 0     0 0 0 my $self=shift;
325 0         0 return ($self->{'multiplicity'});
326             }
327             sub upbound {
328 1     1 0 2 my $self=shift;
329 1         4 return ($self->{'upbound'});
330             }
331             sub downbound {
332 1     1 0 1 my $self=shift;
333 1         4 return ($self->{'downbound'});
334             }
335             sub printfeaturesnum {
336 0     0 0 0 my $self=shift;
337 0         0 my ($key,$value);
338 0         0 my %hash=%{$self->featuresnum};
  0         0  
339 0         0 foreach $key (keys(%hash)) {
340 0         0 $value=$hash{$key};
341 0         0 print "\t$key => $value\n";
342             }
343             }
344             sub maxtranscript {
345 22     22 0 51 my $self=shift;
346 22         102 return ($self->{'maxtranscript'});
347             }
348              
349             sub delete_Obj {
350 24     24 0 18 my $self = shift;
351 24         17 my @values= values %{$self};
  24         32  
352 24         16 my @keys= keys %{$self};
  24         25  
353              
354 24         21 foreach my $key ( @keys ) {
355 7         8 delete $self->{$key};
356             }
357 24         21 foreach my $value ( @values ) {
358 7 100       31 if (index(ref($value),"LiveSeq") != -1) { # object case
    100          
    100          
359 1         2 eval {
360             # delete $self->{$value};
361 1         3 $value->delete_Obj;
362             };
363             } elsif (index(ref($value),"ARRAY") != -1) { # array case
364 1         3 my @array=@{$value};
  1         10  
365 1         2 my $element;
366 1         3 foreach $element (@array) {
367 23         15 eval {
368 23         50 $element->delete_Obj;
369             };
370             }
371             } elsif (index(ref($value),"HASH") != -1) { # object case
372 2         4 my %hash=%{$value};
  2         11  
373 2         2 my $element;
374 2         5 foreach $element (%hash) {
375 24         20 eval {
376 24         128 $element->delete_Obj;
377             };
378             }
379             }
380             }
381 24         43 return(1);
382             }
383              
384              
385             =head2 verbose
386              
387             Title : verbose
388             Usage : $self->verbose(0)
389             Function: Sets verbose level for how ->warn behaves
390             -1 = silent: no warning
391             0 = reduced: minimal warnings
392             1 = default: all warnings
393             2 = extended: all warnings + stack trace dump
394             3 = paranoid: a warning becomes a throw and the program dies
395              
396             Note: a quick way to set all LiveSeq objects at the same verbosity
397             level is to change the DNA level object, since they all look to
398             that one if their verbosity_level attribute is not set.
399             But the method offers fine tuning possibility by changing the
400             verbose level of each object in a different way.
401              
402             So for example, after $loader= and $gene= have been retrieved
403             by a program, the command $gene->verbose(0); would
404             set the default verbosity level to 0 for all objects.
405              
406             Returns : the current verbosity level
407             Args : -1,0,1,2 or 3
408              
409             =cut
410              
411              
412             sub verbose {
413 1     1 1 2 my $self=shift;
414 1         2 my $value = shift;
415 1         11 return $self->{'features'}->{'DNA'}->verbose($value);
416             }
417              
418             sub warn {
419 0     0 0   my $self=shift;
420 0           my $value = shift;
421 0           return $self->{'features'}->{'DNA'}->warn($value);
422             }
423              
424              
425              
426             1;