File Coverage

Bio/Seq/LargePrimarySeq.pm
Criterion Covered Total %
statement 80 94 85.1
branch 40 56 71.4
condition 14 20 70.0
subroutine 11 11 100.0
pod 6 6 100.0
total 151 187 80.7


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Seq::LargePrimarySeq
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Ewan Birney
7             #
8             # Copyright Ewan Birney
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # updated to utilize File::Temp - jason 2000-12-12
13             # POD documentation - main docs before the code
14              
15             =head1 NAME
16              
17             Bio::Seq::LargePrimarySeq - PrimarySeq object that stores sequence as
18             files in the tempdir (as found by File::Temp) or the default method in
19             Bio::Root::Root
20              
21             =head1 SYNOPSIS
22              
23             # normal primary seq usage
24              
25             =head1 DESCRIPTION
26              
27             This object stores a sequence as a series of files in a temporary
28             directory. The aim is to allow someone the ability to store very large
29             sequences (eg, E 100MBases) in a file system without running out
30             of memory (eg, on a 64 MB real memory machine!).
31              
32             Of course, to actually make use of this functionality, the programs
33             which use this object B not call $primary_seq-Eseq otherwise
34             the entire sequence will come out into memory and probably paste your
35             machine. However, calls $primary_seq-Esubseq(10,100) will cause
36             only 90 characters to be brought into real memory.
37              
38             =head1 FEEDBACK
39              
40             =head2 Mailing Lists
41              
42             User feedback is an integral part of the evolution of this and other
43             Bioperl modules. Send your comments and suggestions preferably to one
44             of the Bioperl mailing lists. Your participation is much appreciated.
45              
46             bioperl-l@bioperl.org - General discussion
47             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48              
49             =head2 Support
50              
51             Please direct usage questions or support issues to the mailing list:
52              
53             I
54              
55             rather than to the module maintainer directly. Many experienced and
56             reponsive experts will be able look at the problem and quickly
57             address it. Please include a thorough description of the problem
58             with code and data examples if at all possible.
59              
60             =head2 Reporting Bugs
61              
62             Report bugs to the Bioperl bug tracking system to help us keep track
63             the bugs and their resolution. Bug reports can be submitted via the
64             web:
65              
66             https://github.com/bioperl/bioperl-live/issues
67              
68             =head1 AUTHOR - Ewan Birney, Jason Stajich
69              
70             Email birney@ebi.ac.uk
71             Email jason@bioperl.org
72              
73             =head1 APPENDIX
74              
75             The rest of the documentation details each of the object
76             methods. Internal methods are usually preceded with a _
77              
78             =cut
79              
80              
81             # Let the code begin...
82              
83              
84             package Bio::Seq::LargePrimarySeq;
85 4     4   414 use vars qw($AUTOLOAD);
  4         5  
  4         140  
86 4     4   16 use strict;
  4         5  
  4         92  
87              
88              
89 4     4   15 use base qw(Bio::PrimarySeq Bio::Root::IO Bio::Seq::LargeSeqI);
  4         4  
  4         1148  
90              
91             sub new {
92 22     22 1 231 my ($class, %params) = @_;
93            
94             # don't let PrimarySeq set seq until we have
95             # opened filehandle
96              
97 22   66     73 my $seq = $params{'-seq'} || $params{'-SEQ'};
98 22 100       46 if($seq ) {
99 10         17 delete $params{'-seq'};
100 10         12 delete $params{'-SEQ'};
101             }
102 22         96 my $self = $class->SUPER::new(%params);
103 22         105 $self->_initialize_io(%params);
104 22         72 my $tempdir = $self->tempdir( CLEANUP => 1);
105 22         7187 my ($tfh,$file) = $self->tempfile( DIR => $tempdir );
106 22         47 $self->{tempdir} = $tempdir;
107 22 50       100 $tfh && $self->_fh($tfh);
108 22 50       130 $file && $self->_filename($file);
109 22         47 $self->length(0);
110 22 100       50 $seq && $self->seq($seq);
111              
112 22         79 return $self;
113             }
114              
115              
116             =head2 length
117              
118             Title : length
119             Usage :
120             Function:
121             Example :
122             Returns :
123             Args :
124              
125              
126             =cut
127              
128             sub length {
129 14185     14185 1 14998 my ($obj,$value) = @_;
130 14185 100       17502 if( defined $value) {
131 5649         5724 $obj->{'length'} = $value;
132             }
133 14185 50       25386 return (defined $obj->{'length'}) ? $obj->{'length'} : 0;
134             }
135              
136             =head2 seq
137              
138             Title : seq
139             Usage :
140             Function:
141             Example :
142             Returns :
143             Args :
144              
145              
146             =cut
147              
148             sub seq {
149 29     29 1 44 my ($self, $data) = @_;
150 29 100       44 if( defined $data ) {
151 11 50       17 if( $self->length() == 0) {
152 11         22 $self->add_sequence_as_string($data);
153             } else {
154 0         0 $self->warn("Trying to reset the seq string, cannot do this with a LargePrimarySeq - must allocate a new object");
155             }
156             }
157 29         46 return $self->subseq(1,$self->length);
158             }
159              
160             =head2 subseq
161              
162             Title : subseq
163             Usage :
164             Function:
165             Example :
166             Returns :
167             Args :
168              
169              
170             =cut
171              
172             sub subseq{
173 2859     2859 1 4326 my ($self,$start,$end) = @_;
174 2859         2315 my $string;
175 2859         3426 my $fh = $self->_fh();
176              
177 2859 100 66     3950 if( ref($start) && $start->isa('Bio::LocationI') ) {
178 3         3 my $loc = $start;
179 3 50       11 if( $loc->length == 0 ) {
    50          
180 0         0 $self->warn("Expect location lengths to be > 0");
181 0         0 return '';
182             } elsif( $loc->end < $loc->start ) {
183             # what about circular seqs
184 0         0 $self->warn("Expect location start to come before location end");
185             }
186 3         5 my $seq = '';
187 3 100       13 if( $loc->isa('Bio::Location::SplitLocationI') ) {
188 1         2 foreach my $subloc ( $loc->sub_Location ) {
189 2 50       5 if(! seek($fh,$subloc->start() - 1,0)) {
190 0         0 $self->throw("Unable to seek on file $start:$end $!");
191             }
192 2         4 my $ret = read($fh, $string, $subloc->length());
193 2 50       4 if( !defined $ret ) {
194 0         0 $self->throw("Unable to read $start:$end $!");
195             }
196 2 100       5 if( $subloc->strand < 0 ) {
197 1         4 $string = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
198             }
199 2         6 $seq .= $string;
200             }
201             } else {
202 2 50       5 if(! seek($fh,$loc->start()-1,0)) {
203 0         0 $self->throw("Unable to seek on file ".$loc->start.":".
204             $loc->end ." $!");
205             }
206 2         5 my $ret = read($fh, $string, $loc->length());
207 2 50       5 if( !defined $ret ) {
208 0         0 $self->throw("Unable to read ".$loc->start.":".
209             $loc->end ." $!");
210             }
211 2         4 $seq = $string;
212             }
213 3 50 66     8 if( defined $loc->strand &&
214             $loc->strand < 0 ) {
215 0         0 $seq = Bio::PrimarySeq->new(-seq => $seq)->revcom()->seq();
216             }
217 3         14 return $seq;
218             }
219 2856 50 33     4571 if( $start <= 0 || $end > $self->length ) {
220 0         0 $self->throw("Attempting to get a subseq out of range $start:$end vs ".
221             $self->length);
222             }
223 2856 50       3602 if( $end < $start ) {
224 0         0 $self->throw("Attempting to subseq with end ($end) less than start ($start). To revcom use the revcom function with trunc");
225             }
226              
227 2856 50       7250 if(! seek($fh,$start-1,0)) {
228 0         0 $self->throw("Unable to seek on file $start:$end $!");
229             }
230 2856         9271 my $ret = read($fh, $string, $end-$start+1);
231 2856 50       4079 if( !defined $ret ) {
232 0         0 $self->throw("Unable to read $start:$end $!");
233             }
234 2856         5832 return $string;
235             }
236              
237             =head2 add_sequence_as_string
238              
239             Title : add_sequence_as_string
240             Usage : $seq->add_sequence_as_string("CATGAT");
241             Function: Appends additional residues to an existing LargePrimarySeq object.
242             This allows one to build up a large sequence without storing
243             entire object in memory.
244             Returns : Current length of sequence
245             Args : string to append
246              
247             =cut
248              
249             sub add_sequence_as_string{
250 5635     5635 1 7560 my ($self,$str) = @_;
251 5635         6218 my $len = $self->length + CORE::length($str);
252 5635         7872 my $fh = $self->_fh();
253 5635 50       33400 if(! seek($fh,0,2)) {
254 0         0 $self->throw("Unable to seek end of file: $!");
255             }
256 5635         10898 $self->_print($str);
257 5635         6651 $self->length($len);
258             }
259              
260              
261             =head2 _filename
262              
263             Title : _filename
264             Usage : $obj->_filename($newval)
265             Function:
266             Example :
267             Returns : value of _filename
268             Args : newvalue (optional)
269              
270              
271             =cut
272              
273             sub _filename{
274 59     59   75 my ($obj,$value) = @_;
275 59 100       88 if( defined $value) {
276 14         20 $obj->{'_filename'} = $value;
277             }
278 59         804 return $obj->{'_filename'};
279              
280             }
281              
282              
283             =head2 alphabet
284              
285             Title : alphabet
286             Usage : $obj->alphabet($newval)
287             Function:
288             Example :
289             Returns : value of alphabet
290             Args : newvalue (optional)
291              
292              
293             =cut
294              
295             sub alphabet{
296 29     29 1 49 my ($self,$value) = @_;
297 29 100       47 if( defined $value) {
298 14         42 $self->SUPER::alphabet($value);
299             }
300 29   100     57 return $self->SUPER::alphabet() || 'dna';
301              
302             }
303              
304             sub DESTROY {
305 27     27   941 my $self = shift;
306 27         69 my $fh = $self->_fh();
307 27 100       172 close($fh) if( defined $fh );
308             # this should be handled by Tempfile removal, but we'll unlink anyways.
309 27 100 100     60 unlink $self->_filename() if defined $self->_filename() && -e $self->_filename;
310             # remove tempdirs as well
311 27 100 66     606 rmdir $self->{tempdir} if defined $self->{tempdir} && -e $self->{tempdir};
312 27         96 $self->SUPER::DESTROY();
313             }
314              
315             1;