File Coverage

Bio/Seq/LargePrimarySeq.pm
Criterion Covered Total %
statement 80 94 85.1
branch 40 56 71.4
condition 14 20 70.0
subroutine 11 11 100.0
pod 6 6 100.0
total 151 187 80.7


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Seq::LargePrimarySeq
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Ewan Birney
7             #
8             # Copyright Ewan Birney
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # updated to utilize File::Temp - jason 2000-12-12
13             # POD documentation - main docs before the code
14              
15             =head1 NAME
16              
17             Bio::Seq::LargePrimarySeq - PrimarySeq object that stores sequence as
18             files in the tempdir (as found by File::Temp) or the default method in
19             Bio::Root::Root
20              
21             =head1 SYNOPSIS
22              
23             # normal primary seq usage
24              
25             =head1 DESCRIPTION
26              
27             This object stores a sequence as a series of files in a temporary
28             directory. The aim is to allow someone the ability to store very large
29             sequences (eg, E 100MBases) in a file system without running out
30             of memory (eg, on a 64 MB real memory machine!).
31              
32             Of course, to actually make use of this functionality, the programs
33             which use this object B not call $primary_seq-Eseq otherwise
34             the entire sequence will come out into memory and probably paste your
35             machine. However, calls $primary_seq-Esubseq(10,100) will cause
36             only 90 characters to be brought into real memory.
37              
38             =head1 FEEDBACK
39              
40             =head2 Mailing Lists
41              
42             User feedback is an integral part of the evolution of this and other
43             Bioperl modules. Send your comments and suggestions preferably to one
44             of the Bioperl mailing lists. Your participation is much appreciated.
45              
46             bioperl-l@bioperl.org - General discussion
47             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48              
49             =head2 Support
50              
51             Please direct usage questions or support issues to the mailing list:
52              
53             I
54              
55             rather than to the module maintainer directly. Many experienced and
56             reponsive experts will be able look at the problem and quickly
57             address it. Please include a thorough description of the problem
58             with code and data examples if at all possible.
59              
60             =head2 Reporting Bugs
61              
62             Report bugs to the Bioperl bug tracking system to help us keep track
63             the bugs and their resolution. Bug reports can be submitted via the
64             web:
65              
66             https://github.com/bioperl/bioperl-live/issues
67              
68             =head1 AUTHOR - Ewan Birney, Jason Stajich
69              
70             Email birney@ebi.ac.uk
71             Email jason@bioperl.org
72              
73             =head1 APPENDIX
74              
75             The rest of the documentation details each of the object
76             methods. Internal methods are usually preceded with a _
77              
78             =cut
79              
80              
81             # Let the code begin...
82              
83              
84             package Bio::Seq::LargePrimarySeq;
85 4     4   450 use vars qw($AUTOLOAD);
  4         4  
  4         136  
86 4     4   11 use strict;
  4         4  
  4         79  
87              
88              
89 4     4   12 use base qw(Bio::PrimarySeq Bio::Root::IO Bio::Seq::LargeSeqI);
  4         2  
  4         1345  
90              
91             sub new {
92 22     22 1 182 my ($class, %params) = @_;
93            
94             # don't let PrimarySeq set seq until we have
95             # opened filehandle
96              
97 22   66     55 my $seq = $params{'-seq'} || $params{'-SEQ'};
98 22 100       44 if($seq ) {
99 10         12 delete $params{'-seq'};
100 10         10 delete $params{'-SEQ'};
101             }
102 22         78 my $self = $class->SUPER::new(%params);
103 22         78 $self->_initialize_io(%params);
104 22         58 my $tempdir = $self->tempdir( CLEANUP => 1);
105 22         6440 my ($tfh,$file) = $self->tempfile( DIR => $tempdir );
106 22         42 $self->{tempdir} = $tempdir;
107 22 50       82 $tfh && $self->_fh($tfh);
108 22 50       60 $file && $self->_filename($file);
109 22         41 $self->length(0);
110 22 100       41 $seq && $self->seq($seq);
111              
112 22         74 return $self;
113             }
114              
115              
116             =head2 length
117              
118             Title : length
119             Usage :
120             Function:
121             Example :
122             Returns :
123             Args :
124              
125              
126             =cut
127              
128             sub length {
129 14185     14185 1 9713 my ($obj,$value) = @_;
130 14185 100       16232 if( defined $value) {
131 5649         4617 $obj->{'length'} = $value;
132             }
133 14185 50       24416 return (defined $obj->{'length'}) ? $obj->{'length'} : 0;
134             }
135              
136             =head2 seq
137              
138             Title : seq
139             Usage :
140             Function:
141             Example :
142             Returns :
143             Args :
144              
145              
146             =cut
147              
148             sub seq {
149 29     29 1 26 my ($self, $data) = @_;
150 29 100       42 if( defined $data ) {
151 11 50       13 if( $self->length() == 0) {
152 11         16 $self->add_sequence_as_string($data);
153             } else {
154 0         0 $self->warn("Trying to reset the seq string, cannot do this with a LargePrimarySeq - must allocate a new object");
155             }
156             }
157 29         45 return $self->subseq(1,$self->length);
158             }
159              
160             =head2 subseq
161              
162             Title : subseq
163             Usage :
164             Function:
165             Example :
166             Returns :
167             Args :
168              
169              
170             =cut
171              
172             sub subseq{
173 2859     2859 1 2986 my ($self,$start,$end) = @_;
174 2859         1568 my $string;
175 2859         3025 my $fh = $self->_fh();
176              
177 2859 100 66     3860 if( ref($start) && $start->isa('Bio::LocationI') ) {
178 3         4 my $loc = $start;
179 3 50       9 if( $loc->length == 0 ) {
    50          
180 0         0 $self->warn("Expect location lengths to be > 0");
181 0         0 return '';
182             } elsif( $loc->end < $loc->start ) {
183             # what about circular seqs
184 0         0 $self->warn("Expect location start to come before location end");
185             }
186 3         4 my $seq = '';
187 3 100       11 if( $loc->isa('Bio::Location::SplitLocationI') ) {
188 1         3 foreach my $subloc ( $loc->sub_Location ) {
189 2 50       4 if(! seek($fh,$subloc->start() - 1,0)) {
190 0         0 $self->throw("Unable to seek on file $start:$end $!");
191             }
192 2         4 my $ret = read($fh, $string, $subloc->length());
193 2 50       6 if( !defined $ret ) {
194 0         0 $self->throw("Unable to read $start:$end $!");
195             }
196 2 100       4 if( $subloc->strand < 0 ) {
197 1         5 $string = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
198             }
199 2         7 $seq .= $string;
200             }
201             } else {
202 2 50       4 if(! seek($fh,$loc->start()-1,0)) {
203 0         0 $self->throw("Unable to seek on file ".$loc->start.":".
204             $loc->end ." $!");
205             }
206 2         5 my $ret = read($fh, $string, $loc->length());
207 2 50       6 if( !defined $ret ) {
208 0         0 $self->throw("Unable to read ".$loc->start.":".
209             $loc->end ." $!");
210             }
211 2         2 $seq = $string;
212             }
213 3 50 66     7 if( defined $loc->strand &&
214             $loc->strand < 0 ) {
215 0         0 $seq = Bio::PrimarySeq->new(-seq => $seq)->revcom()->seq();
216             }
217 3         11 return $seq;
218             }
219 2856 50 33     4448 if( $start <= 0 || $end > $self->length ) {
220 0         0 $self->throw("Attempting to get a subseq out of range $start:$end vs ".
221             $self->length);
222             }
223 2856 50       3148 if( $end < $start ) {
224 0         0 $self->throw("Attempting to subseq with end ($end) less than start ($start). To revcom use the revcom function with trunc");
225             }
226              
227 2856 50       6230 if(! seek($fh,$start-1,0)) {
228 0         0 $self->throw("Unable to seek on file $start:$end $!");
229             }
230 2856         9026 my $ret = read($fh, $string, $end-$start+1);
231 2856 50       3350 if( !defined $ret ) {
232 0         0 $self->throw("Unable to read $start:$end $!");
233             }
234 2856         4839 return $string;
235             }
236              
237             =head2 add_sequence_as_string
238              
239             Title : add_sequence_as_string
240             Usage : $seq->add_sequence_as_string("CATGAT");
241             Function: Appends additional residues to an existing LargePrimarySeq object.
242             This allows one to build up a large sequence without storing
243             entire object in memory.
244             Returns : Current length of sequence
245             Args : string to append
246              
247             =cut
248              
249             sub add_sequence_as_string{
250 5635     5635 1 5452 my ($self,$str) = @_;
251 5635         5150 my $len = $self->length + CORE::length($str);
252 5635         6712 my $fh = $self->_fh();
253 5635 50       40459 if(! seek($fh,0,2)) {
254 0         0 $self->throw("Unable to seek end of file: $!");
255             }
256 5635         7817 $self->_print($str);
257 5635         5572 $self->length($len);
258             }
259              
260              
261             =head2 _filename
262              
263             Title : _filename
264             Usage : $obj->_filename($newval)
265             Function:
266             Example :
267             Returns : value of _filename
268             Args : newvalue (optional)
269              
270              
271             =cut
272              
273             sub _filename{
274 59     59   57 my ($obj,$value) = @_;
275 59 100       94 if( defined $value) {
276 14         17 $obj->{'_filename'} = $value;
277             }
278 59         1089 return $obj->{'_filename'};
279              
280             }
281              
282              
283             =head2 alphabet
284              
285             Title : alphabet
286             Usage : $obj->alphabet($newval)
287             Function:
288             Example :
289             Returns : value of alphabet
290             Args : newvalue (optional)
291              
292              
293             =cut
294              
295             sub alphabet{
296 29     29 1 30 my ($self,$value) = @_;
297 29 100       42 if( defined $value) {
298 14         33 $self->SUPER::alphabet($value);
299             }
300 29   100     42 return $self->SUPER::alphabet() || 'dna';
301              
302             }
303              
304             sub DESTROY {
305 27     27   783 my $self = shift;
306 27         60 my $fh = $self->_fh();
307 27 100       264 close($fh) if( defined $fh );
308             # this should be handled by Tempfile removal, but we'll unlink anyways.
309 27 100 100     57 unlink $self->_filename() if defined $self->_filename() && -e $self->_filename;
310             # remove tempdirs as well
311 27 100 66     1121 rmdir $self->{tempdir} if defined $self->{tempdir} && -e $self->{tempdir};
312 27         94 $self->SUPER::DESTROY();
313             }
314              
315             1;