File Coverage

Bio/Seq/Meta/Array.pm
Criterion Covered Total %
statement 129 150 86.0
branch 49 74 66.2
condition 13 34 38.2
subroutine 19 20 95.0
pod 16 16 100.0
total 226 294 76.8


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Seq::Meta::Array
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Heikki Lehvaslaiho
7             #
8             # Copyright Heikki Lehvaslaiho
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Seq::Meta::Array - array-based generic implementation of a
17             sequence class with residue-based meta information
18              
19             =head1 SYNOPSIS
20              
21             use Bio::LocatableSeq;
22             use Bio::Seq::Meta::Array;
23              
24             my $seq = Bio::Seq::Meta::Array->new(-id=>'test',
25             -seq=>'ACTGCTAGCT',
26             -start=>2434,
27             -start=>2443,
28             -strand=>1,
29             -verbose=>1, # to see warnings
30             );
31              
32             # to test this is a meta seq object
33             $seq->isa("Bio::Seq::Meta::Array")
34             || $seq->throw("$seq is not a Bio::Seq::Meta::Array");
35              
36             $seq->meta('1 2 3 4 5 6 7 8 9 10');
37              
38             # or you could create the Meta object directly
39             $seq = Bio::Seq::Meta::Array->new(-id=>'test',
40             -seq=>'ACTGCTAGCT',
41             -start=>2434,
42             -start=>2443,
43             -strand=>1,
44             -meta=>'1 2 3 4 5 6 7 8 9 10',
45             -verbose=>1, # to see warnings
46             );
47              
48              
49             # accessors
50             $arrayref = $seq->meta();
51             $string = $seq->meta_text();
52             $substring = $seq->submeta_text(2,5);
53             $unique_key = $seq->accession_number();
54              
55             =head1 DESCRIPTION
56              
57             This class implements generic methods for sequences with residue-based
58             meta information. Meta sequences with meta data are Bio::LocatableSeq
59             objects with additional methods to store that meta information. See
60             L and L.
61              
62             The meta information in this class can be a string of variable length
63             and can be a complex structure. Blank values are undef or zero.
64              
65             Application specific implementations should inherit from this class to
66             override and add to these methods.
67              
68             This class can be used for storing sequence quality values but
69             Bio::Seq::Quality has named methods that make it easier.
70              
71             =head1 SEE ALSO
72              
73             L,
74             L,
75             L,
76             L
77              
78             =head1 NOTE
79              
80             This Bio::Seq::MetaI implementation inherits from Bio::LocatableSeq, which
81             itself inherits from Bio::PrimarySeq. It is not a Bio::SeqI, so bless-ing
82             objects of this class into a Bio::SeqI or vice versa and will not work as
83             expected (see bug 2262). This may be addressed in a future refactor of
84             Bio::LocatableSeq.
85              
86             =head1 FEEDBACK
87              
88             =head2 Mailing Lists
89              
90             User feedback is an integral part of the evolution of this and other
91             Bioperl modules. Send your comments and suggestions preferably to one
92             of the Bioperl mailing lists. Your participation is much appreciated.
93              
94             bioperl-l@bioperl.org - General discussion
95             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
96              
97             =head2 Support
98              
99             Please direct usage questions or support issues to the mailing list:
100              
101             I
102              
103             rather than to the module maintainer directly. Many experienced and
104             reponsive experts will be able look at the problem and quickly
105             address it. Please include a thorough description of the problem
106             with code and data examples if at all possible.
107              
108             =head2 Reporting Bugs
109              
110             Report bugs to the Bioperl bug tracking system to help us keep track
111             the bugs and their resolution. Bug reports can be submitted via the
112             web:
113              
114             https://github.com/bioperl/bioperl-live/issues
115              
116             =head1 AUTHOR - Heikki Lehvaslaiho
117              
118             Email heikki-at-bioperl-dot-org
119              
120             =head1 CONTRIBUTORS
121              
122             Chad Matsalla, bioinformatics@dieselwurks.com
123             Aaron Mackey, amackey@virginia.edu
124              
125             =head1 APPENDIX
126              
127             The rest of the documentation details each of the object methods.
128             Internal methods are usually preceded with a _
129              
130             =cut
131              
132              
133             # Let the code begin...
134              
135              
136             package Bio::Seq::Meta::Array;
137 9     9   1093 use strict;
  9         11  
  9         231  
138              
139 9     9   982 use base qw(Bio::LocatableSeq Bio::Seq Bio::Seq::MetaI);
  9         8  
  9         2993  
140              
141             our $DEFAULT_NAME = 'DEFAULT';
142             our $GAP = '-';
143             our $META_GAP = 0;
144              
145             =head2 new
146              
147             Title : new
148             Usage : $metaseq = Bio::Seq::Meta::Array->new
149             ( -meta => 'aaaaaaaabbbbbbbb',
150             -seq => 'TKLMILVSHIVILSRM'
151             -id => 'human_id',
152             -accession_number => 'S000012',
153             );
154             Function: Constructor for Bio::Seq::Meta::Array class, meta data being in a
155             string. Note that you can provide an empty quality string.
156             Returns : a new Bio::Seq::Meta::Array object
157              
158             =cut
159              
160             sub new {
161 866     866 1 2248 my ($class, %args) = @_;
162            
163             # run-time modification of @ISA is extremely evil (you should't pick your
164             # interface on the fly); this has no obvious effect on any tests so
165             # commenting out - cjfields 2011-4-6
166            
167             #defined inheritance according to stated baseclass,
168             #if undefined then will be PrimarySeq
169             #if (defined($args{'-baseclass'})) {
170             # @ISA = ($args{'-baseclass'},"Bio::Seq::MetaI");
171             # }
172             #else {
173             # @ISA = qw( Bio::LocatableSeq Bio::Seq Bio::Seq::MetaI );
174             # }
175              
176 866         2555 my $self = $class->SUPER::new(%args);
177              
178 866         2616 my($meta, $forceflush) =
179             $self->_rearrange([qw(META
180             FORCE_FLUSH
181             )],
182             %args);
183              
184 866         2377 $self->{'_meta'}->{$DEFAULT_NAME} = [];
185              
186 866 100       1269 $meta && $self->meta($meta);
187 866 100       1084 $forceflush && $self->force_flush($forceflush);
188              
189 866         1876 return $self;
190             }
191              
192              
193             =head2 meta
194              
195             Title : meta
196             Usage : $meta_values = $obj->meta($values_string);
197             Function:
198              
199             Get and set method for the meta data starting from residue
200             position one. Since it is dependent on the length of the
201             sequence, it needs to be manipulated after the sequence.
202              
203             The length of the returned value always matches the length
204             of the sequence.
205              
206             Returns : reference to an array of meta data
207             Args : new value, string or array ref, optional
208              
209             =cut
210              
211             sub meta {
212 37     37 1 115 shift->named_meta($DEFAULT_NAME, shift);
213             }
214              
215             =head2 meta_text
216              
217             Title : meta_text
218             Usage : $meta_values = $obj->meta_text($values_arrayref);
219             Function: Variant of meta() guarantied to return a string
220             representation of meta data. For details, see L.
221             Returns : a string
222             Args : new value, string or array ref, optional
223              
224             =cut
225              
226             sub meta_text {
227 20 100   20 1 26 return join ' ', map {0 unless $_} @{shift->meta(shift)};
  143         315  
  20         39  
228             }
229              
230             =head2 named_meta
231              
232             Title : named_meta()
233             Usage : $meta_values = $obj->named_meta($name, $values_arrayref);
234             Function: A more general version of meta(). Each meta data set needs
235             to be named. See also L.
236             Returns : reference to an array of meta data
237             Args : scalar, name of the meta data set
238             new value, string or array ref, optional
239              
240             =cut
241              
242             sub named_meta {
243 1564     1564 1 1518 my ($self, $name, $value) = @_;
244              
245 1564   33     2268 $name ||= $DEFAULT_NAME;
246              
247 1564 100       2123 if (defined $value) {
248 851         558 my ($arrayref);
249              
250 851 100       1267 if (ref $value eq 'ARRAY' ) { # array ref
    50          
251 721         630 $arrayref = $value;
252             }
253             elsif (not ref($value)) { # scalar
254 130         30161 $arrayref = [split /\s+/, $value];
255             } else {
256 0         0 $self->throw("I need a scalar or array ref, not [". ref($value). "]");
257             }
258              
259             # test for length
260 851         4961 my $diff = $self->length - @{$arrayref};
  851         1056  
261 851 100       1281 if ($diff > 0) {
262 5         11 foreach (1..$diff) { push @{$arrayref}, 0;}
  22         15  
  22         25  
263             }
264              
265 851         1054 $self->{'_meta'}->{$name} = $arrayref;
266              
267             #$self->_test_gap_positions($name) if $self->verbose > 0;
268             }
269              
270 1564 100       2343 $self->_do_flush if $self->force_flush;
271              
272 1564   33     3953 return $self->{'_meta'}->{$name} || (" " x $self->length);
273              
274             }
275              
276             =head2 _test_gap_positions
277              
278             Title : _test_gap_positions
279             Usage : $meta_values = $obj->_test_gap_positions($name);
280             Function: Internal test for correct position of gap characters.
281             Gap being only '-' this time.
282              
283             This method is called from named_meta() when setting meta
284             data but only if verbose is positive as this can be an
285             expensive process on very long sequences. Set verbose(1) to
286             see warnings when gaps do not align in sequence and meta
287             data and turn them into errors by setting verbose(2).
288              
289             Returns : true on success, prints warnings
290             Args : none
291              
292             =cut
293              
294             sub _test_gap_positions {
295 0     0   0 my $self = shift;
296 0         0 my $name = shift;
297 0         0 my $success = 1;
298              
299 0 0       0 $self->seq || return $success;
300 0         0 my $len = CORE::length($self->seq);
301 0         0 for (my $i=0; $i < $len; $i++) {
302 0         0 my $s = substr $self->{seq}, $i, 1;
303 0         0 my $m = substr $self->{_meta}->{$name}, $i, 1;
304 0 0 0     0 $self->warn("Gap mismatch in column [". ($i+1). "] of [$name] meta data in seq [". $self->id. "]")
      0        
305             and $success = 0
306             if ($m eq '-') && $s ne $m;
307             }
308 0         0 return $success;
309             }
310              
311             =head2 named_meta_text
312              
313             Title : named_meta_text()
314             Usage : $meta_values = $obj->named_meta_text($name, $values_arrayref);
315             Function: Variant of named_meta() guarantied to return a textual
316             representation of the named meta data.
317             For details, see L.
318             Returns : a string
319             Args : scalar, name of the meta data set
320             new value, string or array ref, optional
321              
322             =cut
323              
324             sub named_meta_text {
325 4     4 1 5 return join ' ', @{shift->named_meta(@_)};
  4         8  
326              
327             }
328              
329             =head2 submeta
330              
331             Title : submeta
332             Usage : $subset_of_meta_values = $obj->submeta(10, 20, $value_string);
333             $subset_of_meta_values = $obj->submeta(10, undef, $value_string);
334             Function:
335              
336             Get and set method for meta data for subsequences.
337              
338             Numbering starts from 1 and the number is inclusive, ie 1-2
339             are the first two residue of the sequence. Start cannot be
340             larger than end but can be equal.
341              
342             If the second argument is missing the returned values
343             should extend to the end of the sequence.
344              
345             The return value may be a string or an array reference,
346             depending on the implentation. If in doubt, use
347             submeta_text() which is a variant guarantied to return a
348             string. See L.
349              
350             Returns : A reference to an array or a string
351             Args : integer, start position
352             integer, end position, optional when a third argument present
353             new value, string or array ref, optional
354              
355             =cut
356              
357             sub submeta {
358 1034     1034 1 1392 shift->named_submeta($DEFAULT_NAME, @_);
359             }
360              
361             =head2 submeta_text
362              
363             Title : submeta_text
364             Usage : $meta_values = $obj->submeta_text(20, $value_string);
365             Function: Variant of submeta() guarantied to return a textual
366             representation of meta data. For details, see L.
367             Returns : a string
368             Args : new value, string or array ref, optional
369              
370              
371             =cut
372              
373             sub submeta_text {
374 6     6 1 8 return join ' ', @{shift->named_submeta($DEFAULT_NAME, @_)};
  6         13  
375             }
376              
377             =head2 named_submeta
378              
379             Title : named_submeta
380             Usage : $subset_of_meta_values = $obj->named_submeta($name, 10, 20, $value_string);
381             $subset_of_meta_values = $obj->named_submeta($name, 10);
382             Function: Variant of submeta() guarantied to return a textual
383             representation of meta data. For details, see L.
384             Returns : A reference to an array or a string
385             Args : scalar, name of the meta data set
386             integer, start position
387             integer, end position, optional when a third argument present (can be undef)
388             new value, string or array ref, optional
389              
390             =cut
391              
392              
393             sub named_submeta {
394 2144     2144 1 1833 my ($self, $name, $start, $end, $value) = @_;
395              
396 2144   33     2890 $name ||= $DEFAULT_NAME;
397 2144   100     2681 $start ||=1;
398 2144 50 33     9398 $start =~ /^[+]?\d+$/ and $start > 0 or
399             $self->throw("Need at least a positive integer start value");
400 2144         1795 $start--;
401 2144         1375 my $meta_len = scalar(@{$self->{_meta}->{$name}});
  2144         2785  
402 2144 100       2538 if (defined $value) {
403 12         11 my $arrayref;
404              
405 12 50       29 if (ref $value eq 'ARRAY' ) { # array ref
    50          
406 0         0 $arrayref = $value;
407             }
408             elsif (not ref($value)) { # scalar
409 12         49 $arrayref = [split /\s+/, $value];
410             } else {
411 0         0 $self->throw("I need a space separated scalar or array ref, not [". ref($value). "]");
412             }
413              
414             $self->warn("You are setting meta values beyond the length of the sequence\n".
415             "[$start > ". length($self->seq)."] in sequence ". $self->id)
416 12 50       13 if $start + scalar @{$arrayref} -1 > $self->length;
  12         31  
417              
418              
419 12 100       20 $end or $end = @{$arrayref} + $start;
  2         4  
420 12         11 $end--;
421              
422             # test for length; pad if needed
423 12         12 my $diff = $end - $start - scalar @{$arrayref};
  12         11  
424 12 50       22 if ($diff > 0) {
425 0         0 foreach (1..$diff) { push @{$arrayref}, $META_GAP}
  0         0  
  0         0  
426             }
427              
428 12         7 @{$self->{_meta}->{$name}}[$start..$end] = @{$arrayref};
  12         28  
  12         20  
429              
430 12 100       21 $self->_do_flush if $self->force_flush;
431              
432 12         53 return $arrayref;
433              
434             } else {
435             # don't set by seq length; use meta array length instead; bug 2478
436 2132   66     2529 $end ||= $meta_len;
437 2132 50       2639 if ($end > $meta_len) {
438 0         0 $self->warn("End is longer than meta sequence $name length; resetting to $meta_len");
439 0         0 $end = $meta_len;
440             }
441             # warn but don't reset (push use of trunc() instead)
442 2132 50       2855 $self->warn("End is longer than sequence length; use trunc() \n".
443             "if you want a fully truncated object") if $end > $self->length;
444 2132         1477 $end--;
445 2132         1946 return [@{$self->{_meta}->{$name}}[$start..$end]];
  2132         8265  
446             }
447             }
448              
449              
450             =head2 named_submeta_text
451              
452             Title : named_submeta_text
453             Usage : $meta_values = $obj->named_submeta_text($name, 20, $value_string);
454             Function: Variant of submeta() guarantied to return a textual
455             representation of meta data. For details, see L.
456             Returns : a string
457             Args : scalar, name of the meta data
458             Args : integer, start position, optional
459             integer, end position, optional
460             new value, string or array ref, optional
461              
462             =cut
463              
464             sub named_submeta_text {
465 5     5 1 7 return join ' ', @{shift->named_submeta(@_)};
  5         10  
466             }
467              
468             =head2 meta_names
469              
470             Title : meta_names
471             Usage : @meta_names = $obj->meta_names()
472             Function: Retrives an array of meta data set names. The default
473             (unnamed) set name is guarantied to be the first name if it
474             contains any data.
475             Returns : an array of names
476             Args : none
477              
478             =cut
479              
480             sub meta_names {
481 36     36 1 30 my ($self) = @_;
482              
483 36         31 my @r;
484 36         21 foreach ( sort keys %{$self->{'_meta'}} ) {
  36         130  
485 55 100       120 push (@r, $_) unless $_ eq $DEFAULT_NAME;
486             }
487 36 50       102 unshift @r, $DEFAULT_NAME if $self->{'_meta'}->{$DEFAULT_NAME};
488 36         66 return @r;
489             }
490              
491              
492             =head2 meta_length
493              
494             Title : meta_length()
495             Usage : $meta_len = $obj->meta_length();
496             Function: return the number of elements in the meta set
497             Returns : integer
498             Args : -
499              
500             =cut
501              
502             sub meta_length {
503 4     4 1 5 my ($self) = @_;
504 4         9 return $self->named_meta_length($DEFAULT_NAME);
505             }
506              
507              
508             =head2 named_meta_length
509              
510             Title : named_meta_length()
511             Usage : $meeta_len = $obj->named_meta_length($name);
512             Function: return the number of elements in the named meta set
513             Returns : integer
514             Args : -
515              
516             =cut
517              
518             sub named_meta_length {
519 104     104 1 85 my ($self, $name) = @_;
520 104   33     124 $name ||= $DEFAULT_NAME;
521 104         59 return scalar @{$self->{'_meta'}->{$name}};
  104         253  
522             }
523              
524              
525              
526             =head2 force_flush
527              
528             Title : force_flush()
529             Usage : $force_flush = $obj->force_flush(1);
530             Function: Automatically pad with empty values or truncate meta values
531             to sequence length. Not done by default.
532             Returns : boolean 1 or 0
533             Args : optional boolean value
534              
535             Note that if you turn this forced padding off, the previously padded
536             values are not changed.
537              
538             =cut
539              
540             sub force_flush {
541 1597     1597 1 1192 my ($self, $value) = @_;
542              
543 1597 100       2135 if (defined $value) {
544 7 100       9 if ($value) {
545 5         8 $self->{force_flush} = 1;
546 5         11 $self->_do_flush;
547             } else {
548 2         3 $self->{force_flush} = 0;
549             }
550             }
551              
552 1597         2516 return $self->{force_flush};
553             }
554              
555              
556             =head2 _do_flush
557              
558             Title : _do_flush
559             Usage :
560             Function: internal method to do the force that meta values are same
561             length as sequence . Called from L
562             Returns :
563             Args :
564              
565             =cut
566              
567              
568             sub _do_flush {
569 26     26   20 my ($self) = @_;
570              
571 26         40 foreach my $name ($self->meta_names) {
572             #print "seq: ", $self->length , " ", $name, ": ", $self->named_meta_length($name), "======\n";
573            
574             # elongnation
575 36 100       60 if ($self->length > $self->named_meta_length($name)) {
    100          
576 10         14 my $diff = $self->length - $self->named_meta_length($name);
577 10         18 foreach (1..$diff) { push @{$self->{'_meta'}->{$name}}, $META_GAP}
  64         36  
  64         92  
578             }
579             # truncation
580             elsif ( $self->length < $self->named_meta_length($name) ) {
581 4         8 $self->{_meta}->{$name} = [@{$self->{_meta}->{$name}}[0..($self->length-1)]]
  4         17  
582             }
583             }
584             }
585              
586              
587             =head2 is_flush
588              
589             Title : is_flush
590             Usage : $is_flush = $obj->is_flush()
591             or $is_flush = $obj->is_flush($my_meta_name)
592             Function: Boolean to tell if all meta values are in
593             flush with the sequence length.
594             Returns true if force_flush() is set
595             Set verbosity to a positive value to see failed meta sets
596             Returns : boolean 1 or 0
597             Args : optional name of the meta set
598              
599             =cut
600              
601             sub is_flush {
602              
603 14     14 1 23 my ($self, $name) = shift;
604              
605 14 100       23 return 1 if $self->force_flush;
606              
607 9         9 my $sticky = '';
608              
609              
610 9 50       15 if ($name) {
611 0 0       0 $sticky .= "$name " if $self->length != $self->named_meta_length($name);
612             } else {
613 9         24 foreach my $m ($self->meta_names) {
614 16 50 66     47 $sticky .= "$m " if ($self->named_meta_length($m) > 0) && ($self->length != $self->named_meta_length($m)) ;
615             }
616             }
617              
618 9 50       15 if ($sticky) {
619 0 0       0 print "These meta set are not flush: $sticky\n" if $self->verbose;
620 0         0 return 0;
621             }
622              
623 9         18 return 1;
624             }
625              
626              
627             =head1 Bio::PrimarySeqI methods
628              
629             =head2 revcom
630              
631             Title : revcom
632             Usage : $newseq = $seq->revcom();
633             Function: Produces a new Bio::Seq::MetaI implementing object where
634             the order of residues and their meta information is reversed.
635             Returns : A new (fresh) Bio::Seq::Meta object
636             Args : none
637             Throws : if the object returns false on is_flush()
638              
639             Note: The method does nothing to meta values, it reorders them, only.
640              
641             =cut
642              
643             sub revcom {
644 9     9 1 18 my $self = shift;
645              
646 9 50       19 $self->throw("Can not get a reverse complement. The object is not flush.")
647             unless $self->is_flush;
648              
649 9         23 my $new = $self->SUPER::revcom;
650 9         16 my $end = $self->length - 1;
651             map {
652 16         21 $new->{_meta}->{$_} = [ reverse @{$self->{_meta}->{$_}}[0..$end]]
  16         73  
653 9         7 } keys %{$self->{_meta}};
  9         23  
654              
655 9         22 return $new;
656             }
657              
658             =head2 trunc
659              
660             Title : trunc
661             Usage : $subseq = $seq->trunc(10,100);
662             Function: Provides a truncation of a sequence together with meta data
663             Returns : a fresh Bio::Seq::Meta implementing object
664             Args : Two integers denoting first and last residue of the sub-sequence.
665              
666             =cut
667              
668             sub trunc {
669 54     54 1 6486 my ($self, $start, $end) = @_;
670              
671             # test arguments
672 54 50 33     367 $start =~ /^[+]?\d+$/ and $start > 0 or
673             $self->throw("Need at least a positive integer start value as start; got [$start]");
674 54 50 33     224 $end =~ /^[+]?\d+$/ and $end > 0 or
675             $self->throw("Need at least a positive integer start value as end; got [$end]");
676 54 50       85 $end >= $start or
677             $self->throw("End position has to be larger or equal to start; got [$start..$end]");
678 54 50       108 $end <= $self->length or
679             $self->throw("End position can not be larger than sequence length; got [$end]");
680              
681 54         124 my $new = $self->SUPER::trunc($start, $end);
682 54         59 $start--;
683 54         55 $end--;
684             map {
685 107         120 $new->{_meta}->{$_} = [@{$self->{_meta}->{$_}}[$start..$end]]
  107         446  
686 54         38 } keys %{$self->{_meta}};
  54         140  
687 54         172 return $new;
688             }
689              
690             1;