File Coverage

Bio/Seq/PrimaryQual.pm
Criterion Covered Total %
statement 79 106 74.5
branch 44 56 78.5
condition 14 16 87.5
subroutine 14 17 82.3
pod 14 15 93.3
total 165 210 78.5


line stmt bran cond sub pod time code
1             #
2             # bioperl module for Bio::PrimaryQual
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Chad Matsalla
7             #
8             # Copyright Chad Matsalla
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Seq::PrimaryQual - Bioperl lightweight Quality Object
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Seq::PrimaryQual;
21              
22             # you can use either a space-delimited string for quality
23              
24             my $string_quals = "10 20 30 40 50 40 30 20 10";
25             my $qualobj = Bio::Seq::PrimaryQual->new(
26             -qual => $string_quals,
27             -id => 'QualityFragment-12',
28             -accession_number => 'X78121',
29             );
30              
31             # _or_ you can use an array of quality values
32              
33             my @q2 = split/ /,$string_quals;
34             $qualobj = Bio::Seq::PrimaryQual->new(
35             -qual => \@q2,
36             -primary_id => 'chads primary_id',
37             -desc => 'chads desc',
38             -accession_number => 'chads accession_number',
39             -id => 'chads id'
40             );
41              
42             # to get the quality values out:
43              
44             my @quals = @{$qualobj->qual()};
45              
46             # to give _new_ quality values
47              
48             my $newqualstring = "50 90 1000 20 12 0 0";
49             $qualobj->qual($newqualstring);
50              
51              
52             =head1 DESCRIPTION
53              
54             This module provides a mechanism for storing quality
55             values. Much more useful as part of
56             Bio::Seq::SeqWithQuality where these quality values
57             are associated with the sequence information.
58              
59             =head1 FEEDBACK
60              
61             =head2 Mailing Lists
62              
63             User feedback is an integral part of the evolution of this and other
64             Bioperl modules. Send your comments and suggestions preferably to one
65             of the Bioperl mailing lists. Your participation is much appreciated.
66              
67             bioperl-l@bioperl.org - General discussion
68             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
69              
70             =head2 Support
71              
72             Please direct usage questions or support issues to the mailing list:
73              
74             I
75              
76             rather than to the module maintainer directly. Many experienced and
77             reponsive experts will be able look at the problem and quickly
78             address it. Please include a thorough description of the problem
79             with code and data examples if at all possible.
80              
81             =head2 Reporting Bugs
82              
83             Report bugs to the Bioperl bug tracking system to help us keep track
84             the bugs and their resolution. Bug reports can be submitted via the
85             web:
86              
87             https://github.com/bioperl/bioperl-live/issues
88              
89             =head1 AUTHOR - Chad Matsalla
90              
91             Email bioinformatics@dieselwurks.com
92              
93             =head1 APPENDIX
94              
95             The rest of the documentation details each of the object methods. Internal methods are usually preceded with a _
96              
97             =cut
98              
99              
100             package Bio::Seq::PrimaryQual;
101              
102 5     5   2244 use strict;
  5         10  
  5         188  
103              
104 5     5   25 use base qw(Bio::Root::Root Bio::Seq::QualI);
  5         37  
  5         2540  
105              
106             our $MATCHPATTERN = '0-9eE\.\s+-';
107              
108              
109             =head2 new()
110              
111             Title : new()
112             Usage : $qual = Bio::Seq::PrimaryQual->new(
113             -qual => '10 20 30 40 50 50 20 10',
114             -id => 'human_id',
115             -accession_number => 'AL000012',
116             );
117              
118             Function: Returns a new Bio::Seq::PrimaryQual object from basic
119             constructors, being a string _or_ a reference to an array for the
120             sequence and strings for id and accession_number. Note that you
121             can provide an empty quality string.
122             Returns : a new Bio::Seq::PrimaryQual object
123              
124             =cut
125              
126             sub new {
127 28     28 1 140 my ($class, @args) = @_;
128 28         81 my $self = $class->SUPER::new(@args);
129              
130             # default: turn ON the warnings (duh)
131 28         107 my($qual,$id,$acc,$pid,$desc,$given_id,$header) =
132             $self->_rearrange([qw(QUAL
133             DISPLAY_ID
134             ACCESSION_NUMBER
135             PRIMARY_ID
136             DESC
137             ID
138             HEADER
139             )],
140             @args);
141 28 100 100     137 if( defined $id && defined $given_id ) {
142 17 50       43 if( $id ne $given_id ) {
143 0         0 $self->throw("Provided both id and display_id constructor functions. [$id] [$given_id]");
144             }
145             }
146 28 100       47 if( defined $given_id ) { $id = $given_id; }
  20         27  
147            
148             # note: the sequence string may be empty
149 28 50       88 $self->qual(defined($qual) ? $qual : []);
150 28 100       51 $header && $self->header($header);
151 28 100       71 $id && $self->display_id($id);
152 28 100       57 $acc && $self->accession_number($acc);
153 28 100       61 $pid && $self->primary_id($pid);
154 28 100       40 $desc && $self->desc($desc);
155              
156 28         159 return $self;
157             }
158              
159              
160             =head2 qual()
161              
162             Title : qual()
163             Usage : @quality_values = @{$obj->qual()};
164             Function: Get or set the quality as a reference to an array containing the
165             quality values. An error is generated if the quality scores are
166             invalid, see validate_qual().
167             Returns : A reference to an array.
168              
169             =cut
170              
171             sub qual {
172 71     71 1 2577 my ($self,$value) = @_;
173              
174 71 100 100     256 if( ! defined $value || length($value) == 0 ) {
    100          
175 35   100     75 $self->{'qual'} ||= [];
176             } elsif( ref($value) =~ /ARRAY/i ) {
177             # if the user passed in a reference to an array
178 3         8 $self->{'qual'} = $value;
179             } else {
180 33         71 $self->validate_qual($value, 1);
181 32         59 $value =~ s/^\s+//;
182 32         3835 $self->{'qual'} = [split(/\s+/,$value)];
183             }
184            
185 70         1135 return $self->{'qual'};
186             }
187              
188              
189             =head2 seq()
190              
191             Title : seq()
192             Usager : $sequence = $obj->seq();
193             Function : Returns the quality numbers as a space-separated string.
194             Returns : Single string.
195             Args : None.
196              
197             =cut
198              
199             sub seq {
200 0     0 1 0 return join ' ', @{ shift->qual };
  0         0  
201             }
202              
203              
204             =head2 validate_qual($qualstring)
205              
206             Title : validate_qual($qualstring)
207             Usage : print("Valid.") if { &validate_qual($self, $quality_string); }
208             Function: Test that the given quality string is valid. It is expected to
209             contain space-delimited numbers that can be parsed using split /\d+/.
210             However, this validation takes shortcuts and only tests that the
211             string contains characters valid in numbers: 0-9 . eE +-
212             Note that empty quality strings are valid too.
213             Returns : 1 for a valid sequence, 0 otherwise
214             Args : - Scalar containing the quality string to validate.
215             - Boolean to optionally throw an error if validation failed
216              
217             =cut
218              
219             sub validate_qual {
220 46     46 1 118 my ($self, $qualstr, $throw) = @_;
221 46 100 100     624 if ( (defined $qualstr ) &&
222             ($qualstr !~ /^[$MATCHPATTERN]*$/) ) {
223 4 100       9 if ($throw) {
224 2   50     6 $self->throw("Failed validation of quality score from '".
225             (defined($self->id)||'[unidentified sequence]')."'. No numeric ".
226             "value found.\n");
227             }
228 2         9 return 0;
229             }
230 42         99 return 1;
231             }
232              
233              
234             =head2 subqual($start,$end)
235              
236             Title : subqual($start,$end)
237             Usage : @subset_of_quality_values = @{$obj->subqual(10,40)};
238             Function: returns the quality values from $start to $end, where the
239             first value is 1 and the number is inclusive, ie 1-2 are the
240             first two bases of the sequence. Start cannot be larger than
241             end but can be equal.
242             Returns : A reference to an array.
243             Args : a start position and an end position
244              
245             =cut
246              
247             sub subqual {
248 240     240 1 418 my ($self,$start,$end) = @_;
249              
250 240 100       356 if( $start > $end ){
251 1         9 $self->throw("in subqual, start [$start] has to be greater than end [$end]");
252             }
253              
254 239 100 66     422 if( $start <= 0 || $end > $self->length ) {
255 1         5 $self->throw("You have to have start positive and length less than the total length of sequence [$start:$end] Total ".$self->length."");
256             }
257              
258             # remove one from start, and then length is end-start
259              
260 238         239 $start--;
261 238         185 $end--;
262 238         299 my @sub_qual_array = @{$self->{qual}}[$start..$end];
  238         1010  
263              
264             # return substr $self->seq(), $start, ($end-$start);
265 238         1015 return \@sub_qual_array;
266              
267             }
268              
269              
270             =head2 display_id()
271              
272             Title : display_id()
273             Usage : $id_string = $obj->display_id();
274             Function: returns the display id, aka the common name of the Quality
275             object.
276             The semantics of this is that it is the most likely string to be
277             used as an identifier of the quality sequence, and likely to have
278             "human" readability. The id is equivalent to the ID field of the
279             GenBank/EMBL databanks and the id field of the Swissprot/sptrembl
280             database. In fasta format, the >(\S+) is presumed to be the id,
281             though some people overload the id to embed other information.
282             Bioperl does not use any embedded information in the ID field,
283             and people are encouraged to use other mechanisms (accession
284             field for example, or extending the sequence object) to solve
285             this. Notice that $seq->id() maps to this function, mainly for
286             legacy/convience issues
287             Returns : A string
288             Args : None
289              
290             =cut
291              
292             sub display_id {
293 55     55 1 81 my ($obj,$value) = @_;
294 55 100       85 if( defined $value) {
295 24         30 $obj->{'display_id'} = $value;
296             }
297 55         144 return $obj->{'display_id'};
298             }
299              
300              
301             =head2 header()
302              
303             Title : header()
304             Usage : $header = $obj->header();
305             Function: Get/set the header that the user wants printed for this
306             quality object.
307             Returns : A string
308             Args : None
309              
310             =cut
311              
312             sub header {
313 19     19 1 39 my ($obj,$value) = @_;
314 19 100       36 if( defined $value) {
315 3         7 $obj->{'header'} = $value;
316             }
317 19         78 return $obj->{'header'};
318              
319             }
320              
321              
322             =head2 accession_number()
323              
324             Title : accession_number()
325             Usage : $unique_biological_key = $obj->accession_number();
326             Function: Returns the unique biological id for a sequence, commonly
327             called the accession_number. For sequences from established
328             databases, the implementors should try to use the correct
329             accession number. Notice that primary_id() provides the unique id
330             for the implementation, allowing multiple objects to have the same
331             accession number in a particular implementation. For sequences
332             with no accession number, this method should return "unknown".
333             Returns : A string
334             Args : None
335              
336             =cut
337              
338             sub accession_number {
339 8     8 1 16 my( $obj, $acc ) = @_;
340              
341 8 100       19 if (defined $acc) {
342 4         7 $obj->{'accession_number'} = $acc;
343             } else {
344 4         8 $acc = $obj->{'accession_number'};
345 4 100       12 $acc = 'unknown' unless defined $acc;
346             }
347 8         25 return $acc;
348             }
349              
350              
351             =head2 primary_id()
352              
353             Title : primary_id()
354             Usage : $unique_implementation_key = $obj->primary_id();
355             Function: Returns the unique id for this object in this implementation.
356             This allows implementations to manage their own object ids in a
357             way the implementation can control clients can expect one id to
358             map to one object. For sequences with no accession number, this
359             method should return a stringified memory location.
360             Returns : A string
361             Args : None
362              
363             =cut
364              
365             sub primary_id {
366 22     22 1 31 my ($obj,$value) = @_;
367 22 100       38 if( defined $value) {
368 19         26 $obj->{'primary_id'} = $value;
369             }
370 22         35 return $obj->{'primary_id'};
371             }
372              
373              
374             =head2 desc()
375              
376             Title : desc()
377             Usage : $qual->desc($newval);
378             $description = $qual->desc();
379             Function: Get/set description text for a qual object
380             Example :
381             Returns : Value of desc
382             Args : newvalue (optional)
383              
384             =cut
385              
386             sub desc {
387 15     15 1 29 my ($obj,$value) = @_;
388 15 100       29 if( defined $value) {
389 2         3 $obj->{'desc'} = $value;
390             }
391 15         29 return $obj->{'desc'};
392             }
393              
394              
395             =head2 id()
396              
397             Title : id()
398             Usage : $id = $qual->id();
399             Function: Return the ID of the quality. This should normally be (and
400             actually is in the implementation provided here) just a synonym
401             for display_id().
402             Returns : A string.
403             Args : None.
404              
405             =cut
406              
407             sub id {
408 25     25 1 42 my ($self,$value) = @_;
409 25 100       41 if( defined $value ) {
410 1         4 return $self->display_id($value);
411             }
412 24         35 return $self->display_id();
413             }
414              
415              
416             =head2 length()
417              
418             Title : length()
419             Usage : $length = $qual->length();
420             Function: Return the length of the array holding the quality values.
421             Under most circumstances, this should match the number of quality
422             values but no validation is done when the PrimaryQual object is
423             constructed and non-digits could be put into this array. Is this
424             a bug? Just enough rope...
425             Returns : A scalar (the number of elements in the quality array).
426             Args : None.
427              
428             =cut
429              
430             sub length {
431 272     272 1 260 my $self = shift;
432 272 50       447 if (ref($self->{qual}) ne "ARRAY") {
433 0         0 $self->warn("{qual} is not an array here. Why? It appears to be ".ref($self->{qual})."(".$self->{qual}."). Good thing this can _never_ happen.");
434             }
435 272         251 return scalar(@{$self->{qual}});
  272         612  
436             }
437              
438              
439             =head2 qualat()
440              
441             Title : qualat
442             Usage : $quality = $obj->qualat(10);
443             Function: Return the quality value at the given location, where the
444             first value is 1 and the number is inclusive, ie 1-2 are the first
445             two bases of the sequence. Start cannot be larger than end but can
446             be equal.
447             Returns : A scalar.
448             Args : A position.
449              
450             =cut
451              
452             sub qualat {
453 1     1 1 2 my ($self,$val) = @_;
454 1         2 my @qualat = @{$self->subqual($val,$val)};
  1         3  
455 1 50       4 if (scalar(@qualat) == 1) {
456 1         4 return $qualat[0];
457             } else {
458 0           $self->throw("qualat() provided more than one quality.");
459             }
460             }
461              
462             =head2 to_string()
463              
464             Title : to_string()
465             Usage : $quality = $obj->to_string();
466             Function: Return a textual representation of what the object contains.
467             For this module, this function will return:
468             qual
469             display_id
470             accession_number
471             primary_id
472             desc
473             id
474             length
475             Returns : A scalar.
476             Args : None.
477              
478             =cut
479              
480             sub to_string {
481 0     0 1   my ($self,$out,$result) = shift;
482 0           $out = "qual: ".join(',',@{$self->qual()});
  0            
483 0           foreach (qw(display_id accession_number primary_id desc id length)) {
484 0           $result = $self->$_();
485 0 0         if (!$result) { $result = ""; }
  0            
486 0           $out .= "$_: $result\n";
487             }
488 0           return $out;
489             }
490              
491              
492             sub to_string_automatic {
493 0     0 0   my ($self,$sub_result,$out) = shift;
494 0           foreach (sort keys %$self) {
495 0           print("Working on $_\n");
496 0           eval { $self->$_(); };
  0            
497 0 0         if ($@) { $sub_result = ref($_); }
  0 0          
498             elsif (!($sub_result = $self->$_())) {
499 0           $sub_result = "";
500             }
501 0 0         if (ref($sub_result) eq "ARRAY") {
502 0           print("This thing ($_) is an array!\n");
503 0           $sub_result = join(',',@$sub_result);
504             }
505 0           $out .= "$_: ".$sub_result."\n";
506             }
507 0           return $out;
508             }
509              
510             1;