File Coverage

Bio/Seq/LargeLocatableSeq.pm
Criterion Covered Total %
statement 59 94 62.7
branch 25 56 44.6
condition 6 17 35.2
subroutine 10 11 90.9
pod 6 6 100.0
total 106 184 57.6


line stmt bran cond sub pod time code
1             # BioPerl module for Bio::Seq::LargeLocatableSeq
2             #
3             # Please direct questions and support issues to
4             #
5             # Cared for by Albert Vilella
6             #
7             # based on the Bio::LargePrimarySeq module
8             # by Ewan Birney
9             #
10             # and the Bio::LocatableSeq module
11             # by Ewan Birney
12             #
13             # Copyright Albert Vilella
14             #
15             # You may distribute this module under the same terms as perl itself
16              
17             # POD documentation - main docs before the code
18              
19             =head1 NAME
20              
21             Bio::Seq::LargeLocatableSeq - LocatableSeq object that stores sequence as
22             files in the tempdir
23              
24             =head1 SYNOPSIS
25              
26             # normal primary seq usage
27             use Bio::Seq::LargeLocatableSeq;
28             my $seq = Bio::Seq::LargeLocatableSeq->new(-seq => "CAGT-GGT",
29             -id => "seq1",
30             -start => 1,
31             -end => 7);
32              
33             =head1 DESCRIPTION
34              
35             Bio::Seq::LargeLocatableSeq - object with start/end points on it that
36             can be projected into a MSA or have coordinates relative to another
37             seq.
38              
39             This object, unlike Bio::LocatableSeq, stores a sequence as a series
40             of files in a temporary directory. The aim is to allow someone the
41             ability to store very large sequences (eg, E 100MBases) in a file
42             system without running out of memory (eg, on a 64 MB real memory
43             machine!).
44              
45             Of course, to actually make use of this functionality, the programs
46             which use this object B not call $primary_seq-Eseq otherwise
47             the entire sequence will come out into memory and probably crash your
48             machine. However, calls like $primary_seq-Esubseq(10,100) will cause
49             only 90 characters to be brought into real memory.
50              
51             =head1 FEEDBACK
52              
53             =head2 Mailing Lists
54              
55             User feedback is an integral part of the evolution of this and other
56             Bioperl modules. Send your comments and suggestions preferably to
57             the Bioperl mailing list. Your participation is much appreciated.
58              
59             bioperl-l@bioperl.org - General discussion
60             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
61              
62             =head2 Support
63              
64             Please direct usage questions or support issues to the mailing list:
65              
66             I
67              
68             rather than to the module maintainer directly. Many experienced and
69             reponsive experts will be able look at the problem and quickly
70             address it. Please include a thorough description of the problem
71             with code and data examples if at all possible.
72              
73             =head2 Reporting Bugs
74              
75             Report bugs to the Bioperl bug tracking system to help us keep track
76             of the bugs and their resolution. Bug reports can be submitted via
77             the web:
78              
79             https://github.com/bioperl/bioperl-live/issues
80              
81             =head1 AUTHOR - Albert Vilella
82              
83             Email avilella-AT-gmail-DOT-com
84              
85             =head1 APPENDIX
86              
87             The rest of the documentation details each of the object methods.
88             Internal methods are usually preceded with a _
89              
90             =cut
91              
92              
93             # Let the code begin...
94              
95             package Bio::Seq::LargeLocatableSeq;
96 2     2   404 use vars qw($AUTOLOAD);
  2         4  
  2         67  
97 2     2   9 use strict;
  2         3  
  2         37  
98              
99              
100 2     2   6 use base qw(Bio::Seq::LargePrimarySeq Bio::LocatableSeq Bio::Root::IO);
  2         4  
  2         483  
101              
102              
103             =head2 new
104              
105             Title : new
106             Usage : my $obj = Bio::Seq::LargeLocatableSeq->new();
107             Function: Builds a new Bio::Seq::LargeLocatableSeq object
108             Returns : an instance of Bio::Seq::LargeLocatableSeq
109             Args :
110              
111              
112             =cut
113              
114             sub new {
115 8     8 1 162 my ($class, %params) = @_;
116              
117             # don't let PrimarySeq set seq until we have
118             # opened filehandle
119              
120 8   66     30 my $seq = $params{'-seq'} || $params{'-SEQ'};
121 8 100       15 if($seq ) {
122 1         2 delete $params{'-seq'};
123 1         2 delete $params{'-SEQ'};
124             }
125 8         43 my $self = $class->SUPER::new(%params);
126 8 50       20 my $mapping = exists $params{'-mapping'} ? $params{'-mapping'} : [1,1];
127 8         38 $self->mapping($mapping);
128 8         25 $self->_initialize_io(%params);
129 8         18 my $tempdir = $self->tempdir( CLEANUP => 1);
130 8         2156 my ($tfh,$file) = $self->tempfile( DIR => $tempdir );
131              
132 8 50       34 $tfh && $self->_fh($tfh);
133 8 50       27 $file && $self->_filename($file);
134 8         17 $self->length(0);
135 8 100       20 $seq && $self->seq($seq);
136              
137 8         34 return $self;
138             }
139              
140              
141             =head2 length
142              
143             Title : length
144             Usage :
145             Function:
146             Example :
147             Returns :
148             Args :
149              
150              
151             =cut
152              
153             sub length {
154 146     146 1 176 my ($obj,$value) = @_;
155 146 100       186 if( defined $value) {
156 24         31 $obj->{'length'} = $value;
157             }
158              
159 146 50       335 return (defined $obj->{'length'}) ? $obj->{'length'} : 0;
160             }
161              
162             =head2 seq
163              
164             Title : seq
165             Usage :
166             Function:
167             Example :
168             Returns :
169             Args :
170              
171              
172             =cut
173              
174             sub seq {
175 50     50 1 63 my ($self, $data) = @_;
176 50 100       68 if( defined $data ) {
177 1 50       2 if( $self->length() == 0) {
178 1         2 $self->add_sequence_as_string($data);
179             } else {
180 0         0 $self->warn("Trying to reset the seq string, cannot do this with a LargeLocatableSeq - must allocate a new object");
181             }
182             }
183 50         69 return $self->subseq(1,$self->length);
184             }
185              
186              
187             =head2 subseq
188              
189             Title : subseq
190             Usage :
191             Function:
192             Example :
193             Returns :
194             Args :
195              
196              
197             =cut
198              
199             sub subseq{
200 50     50 1 60 my ($self,$start,$end) = @_;
201 50         48 my $string;
202 50         82 my $fh = $self->_fh();
203              
204 50 50 33     84 if( ref($start) && $start->isa('Bio::LocationI') ) {
205 0         0 my $loc = $start;
206 0 0       0 if( $loc->length == 0 ) {
    0          
207 0         0 $self->warn("Expect location lengths to be > 0");
208 0         0 return '';
209             } elsif( $loc->end < $loc->start ) {
210             # what about circular seqs
211 0         0 $self->warn("Expect location start to come before location end");
212             }
213 0         0 my $seq = '';
214 0 0       0 if( $loc->isa('Bio::Location::SplitLocationI') ) {
215 0         0 foreach my $subloc ( $loc->sub_Location ) {
216 0 0       0 if(! seek($fh,$subloc->start() - 1,0)) {
217 0         0 $self->throw("Unable to seek on file $start:$end $!");
218             }
219 0         0 my $ret = read($fh, $string, $subloc->length());
220 0 0       0 if( !defined $ret ) {
221 0         0 $self->throw("Unable to read $start:$end $!");
222             }
223 0 0       0 if( $subloc->strand < 0 ) {
224             # $string = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
225 0         0 $string = Bio::Seq::LargePrimarySeq->new(-seq => $string)->revcom()->seq();
226             }
227 0         0 $seq .= $string;
228             }
229             } else {
230 0 0       0 if(! seek($fh,$loc->start()-1,0)) {
231 0         0 $self->throw("Unable to seek on file ".$loc->start.":".
232             $loc->end ." $!");
233             }
234 0         0 my $ret = read($fh, $string, $loc->length());
235 0 0       0 if( !defined $ret ) {
236 0         0 $self->throw("Unable to read ".$loc->start.":".
237             $loc->end ." $!");
238             }
239 0         0 $seq = $string;
240             }
241 0 0 0     0 if( defined $loc->strand &&
242             $loc->strand < 0 ) {
243             # $seq = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
244 0         0 $seq = Bio::Seq::LargePrimarySeq->new(-seq => $seq)->revcom()->seq();
245             }
246 0         0 return $seq;
247             }
248 50 50 33     101 if( $start <= 0 || $end > $self->length ) {
249 0         0 $self->throw("Attempting to get a subseq out of range $start:$end vs ".
250             $self->length);
251             }
252 50 50       80 if( $end < $start ) {
253 0         0 $self->throw("Attempting to subseq with end ($end) less than start ($start). To revcom use the revcom function with trunc");
254             }
255              
256 50 50       312 if(! seek($fh,$start-1,0)) {
257 0         0 $self->throw("Unable to seek on file $start:$end $!");
258             }
259 50         206 my $ret = read($fh, $string, $end-$start+1);
260 50 50       86 if( !defined $ret ) {
261 0         0 $self->throw("Unable to read $start:$end $!");
262             }
263 50         195 return $string;
264             }
265              
266              
267             =head2 add_sequence_as_string
268              
269             Title : add_sequence_as_string
270             Usage : $seq->add_sequence_as_string("CATGAT");
271             Function: Appends additional residues to an existing LargeLocatableSeq object.
272             This allows one to build up a large sequence without storing
273             entire object in memory.
274             Returns : Current length of sequence
275             Args : string to append
276              
277             =cut
278              
279             sub add_sequence_as_string{
280 8     8 1 15 my ($self,$str) = @_;
281 8         13 my $len = $self->length + CORE::length($str);
282 8         16 my $fh = $self->_fh();
283 8 50       42 if(! seek($fh,0,2)) {
284 0         0 $self->throw("Unable to seek end of file: $!");
285             }
286 8         35 $self->_print($str);
287 8         15 $self->length($len);
288             }
289              
290              
291             =head2 _filename
292              
293             Title : _filename
294             Usage : $obj->_filename($newval)
295             Function:
296             Example :
297             Returns : value of _filename
298             Args : newvalue (optional)
299              
300              
301             =cut
302              
303             sub _filename{
304 60     60   104 my ($obj,$value) = @_;
305 60 100       82 if( defined $value) {
306 16         26 $obj->{'_filename'} = $value;
307             }
308 60         522 return $obj->{'_filename'};
309              
310             }
311              
312              
313             =head2 alphabet
314              
315             Title : alphabet
316             Usage : $obj->alphabet($newval)
317             Function:
318             Example :
319             Returns : value of alphabet
320             Args : newvalue (optional)
321              
322              
323             =cut
324              
325             sub alphabet{
326 0     0 1 0 my ($self,$value) = @_;
327 0 0       0 if( defined $value) {
328 0         0 $self->SUPER::alphabet($value);
329             }
330 0   0     0 return $self->SUPER::alphabet() || 'dna';
331              
332             }
333              
334             sub DESTROY {
335 10     10   425 my $self = shift;
336 10         24 my $fh = $self->_fh();
337 10 100       62 close($fh) if( defined $fh );
338             # this should be handled by Tempfile removal, but we'll unlink anyways.
339 10 100 66     27 unlink $self->_filename() if defined $self->_filename() && -e $self->_filename;
340 10         74 $self->SUPER::DESTROY();
341             }
342              
343             1;