File Coverage

Bio/Seq/LargePrimarySeq.pm
Criterion Covered Total %
statement 80 94 85.1
branch 40 56 71.4
condition 14 20 70.0
subroutine 11 11 100.0
pod 6 6 100.0
total 151 187 80.7


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Seq::LargePrimarySeq
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Ewan Birney
7             #
8             # Copyright Ewan Birney
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # updated to utilize File::Temp - jason 2000-12-12
13             # POD documentation - main docs before the code
14              
15             =head1 NAME
16              
17             Bio::Seq::LargePrimarySeq - PrimarySeq object that stores sequence as
18             files in the tempdir (as found by File::Temp) or the default method in
19             Bio::Root::Root
20              
21             =head1 SYNOPSIS
22              
23             # normal primary seq usage
24              
25             =head1 DESCRIPTION
26              
27             This object stores a sequence as a series of files in a temporary
28             directory. The aim is to allow someone the ability to store very large
29             sequences (eg, E 100MBases) in a file system without running out
30             of memory (eg, on a 64 MB real memory machine!).
31              
32             Of course, to actually make use of this functionality, the programs
33             which use this object B not call $primary_seq-Eseq otherwise
34             the entire sequence will come out into memory and probably paste your
35             machine. However, calls $primary_seq-Esubseq(10,100) will cause
36             only 90 characters to be brought into real memory.
37              
38             =head1 FEEDBACK
39              
40             =head2 Mailing Lists
41              
42             User feedback is an integral part of the evolution of this and other
43             Bioperl modules. Send your comments and suggestions preferably to one
44             of the Bioperl mailing lists. Your participation is much appreciated.
45              
46             bioperl-l@bioperl.org - General discussion
47             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
48              
49             =head2 Support
50              
51             Please direct usage questions or support issues to the mailing list:
52              
53             I
54              
55             rather than to the module maintainer directly. Many experienced and
56             reponsive experts will be able look at the problem and quickly
57             address it. Please include a thorough description of the problem
58             with code and data examples if at all possible.
59              
60             =head2 Reporting Bugs
61              
62             Report bugs to the Bioperl bug tracking system to help us keep track
63             the bugs and their resolution. Bug reports can be submitted via the
64             web:
65              
66             https://github.com/bioperl/bioperl-live/issues
67              
68             =head1 AUTHOR - Ewan Birney, Jason Stajich
69              
70             Email birney@ebi.ac.uk
71             Email jason@bioperl.org
72              
73             =head1 APPENDIX
74              
75             The rest of the documentation details each of the object
76             methods. Internal methods are usually preceded with a _
77              
78             =cut
79              
80              
81             # Let the code begin...
82              
83              
84             package Bio::Seq::LargePrimarySeq;
85 4     4   390 use vars qw($AUTOLOAD);
  4         3  
  4         154  
86 4     4   12 use strict;
  4         4  
  4         78  
87              
88              
89 4     4   11 use base qw(Bio::PrimarySeq Bio::Root::IO Bio::Seq::LargeSeqI);
  4         3  
  4         1401  
90              
91             sub new {
92 22     22 1 170 my ($class, %params) = @_;
93            
94             # don't let PrimarySeq set seq until we have
95             # opened filehandle
96              
97 22   66     49 my $seq = $params{'-seq'} || $params{'-SEQ'};
98 22 100       46 if($seq ) {
99 10         12 delete $params{'-seq'};
100 10         12 delete $params{'-SEQ'};
101             }
102 22         70 my $self = $class->SUPER::new(%params);
103 22         82 $self->_initialize_io(%params);
104 22         55 my $tempdir = $self->tempdir( CLEANUP => 1);
105 22         6185 my ($tfh,$file) = $self->tempfile( DIR => $tempdir );
106 22         42 $self->{tempdir} = $tempdir;
107 22 50       84 $tfh && $self->_fh($tfh);
108 22 50       61 $file && $self->_filename($file);
109 22         36 $self->length(0);
110 22 100       45 $seq && $self->seq($seq);
111              
112 22         66 return $self;
113             }
114              
115              
116             =head2 length
117              
118             Title : length
119             Usage :
120             Function:
121             Example :
122             Returns :
123             Args :
124              
125              
126             =cut
127              
128             sub length {
129 14185     14185 1 9461 my ($obj,$value) = @_;
130 14185 100       17923 if( defined $value) {
131 5649         4587 $obj->{'length'} = $value;
132             }
133 14185 50       26073 return (defined $obj->{'length'}) ? $obj->{'length'} : 0;
134             }
135              
136             =head2 seq
137              
138             Title : seq
139             Usage :
140             Function:
141             Example :
142             Returns :
143             Args :
144              
145              
146             =cut
147              
148             sub seq {
149 29     29 1 28 my ($self, $data) = @_;
150 29 100       42 if( defined $data ) {
151 11 50       15 if( $self->length() == 0) {
152 11         14 $self->add_sequence_as_string($data);
153             } else {
154 0         0 $self->warn("Trying to reset the seq string, cannot do this with a LargePrimarySeq - must allocate a new object");
155             }
156             }
157 29         43 return $self->subseq(1,$self->length);
158             }
159              
160             =head2 subseq
161              
162             Title : subseq
163             Usage :
164             Function:
165             Example :
166             Returns :
167             Args :
168              
169              
170             =cut
171              
172             sub subseq{
173 2859     2859 1 3578 my ($self,$start,$end) = @_;
174 2859         1648 my $string;
175 2859         2859 my $fh = $self->_fh();
176              
177 2859 100 66     3944 if( ref($start) && $start->isa('Bio::LocationI') ) {
178 3         2 my $loc = $start;
179 3 50       10 if( $loc->length == 0 ) {
    50          
180 0         0 $self->warn("Expect location lengths to be > 0");
181 0         0 return '';
182             } elsif( $loc->end < $loc->start ) {
183             # what about circular seqs
184 0         0 $self->warn("Expect location start to come before location end");
185             }
186 3         5 my $seq = '';
187 3 100       12 if( $loc->isa('Bio::Location::SplitLocationI') ) {
188 1         2 foreach my $subloc ( $loc->sub_Location ) {
189 2 50       5 if(! seek($fh,$subloc->start() - 1,0)) {
190 0         0 $self->throw("Unable to seek on file $start:$end $!");
191             }
192 2         4 my $ret = read($fh, $string, $subloc->length());
193 2 50       5 if( !defined $ret ) {
194 0         0 $self->throw("Unable to read $start:$end $!");
195             }
196 2 100       5 if( $subloc->strand < 0 ) {
197 1         5 $string = Bio::PrimarySeq->new(-seq => $string)->revcom()->seq();
198             }
199 2         7 $seq .= $string;
200             }
201             } else {
202 2 50       4 if(! seek($fh,$loc->start()-1,0)) {
203 0         0 $self->throw("Unable to seek on file ".$loc->start.":".
204             $loc->end ." $!");
205             }
206 2         4 my $ret = read($fh, $string, $loc->length());
207 2 50       4 if( !defined $ret ) {
208 0         0 $self->throw("Unable to read ".$loc->start.":".
209             $loc->end ." $!");
210             }
211 2         5 $seq = $string;
212             }
213 3 50 66     6 if( defined $loc->strand &&
214             $loc->strand < 0 ) {
215 0         0 $seq = Bio::PrimarySeq->new(-seq => $seq)->revcom()->seq();
216             }
217 3         11 return $seq;
218             }
219 2856 50 33     4466 if( $start <= 0 || $end > $self->length ) {
220 0         0 $self->throw("Attempting to get a subseq out of range $start:$end vs ".
221             $self->length);
222             }
223 2856 50       3308 if( $end < $start ) {
224 0         0 $self->throw("Attempting to subseq with end ($end) less than start ($start). To revcom use the revcom function with trunc");
225             }
226              
227 2856 50       5738 if(! seek($fh,$start-1,0)) {
228 0         0 $self->throw("Unable to seek on file $start:$end $!");
229             }
230 2856         9084 my $ret = read($fh, $string, $end-$start+1);
231 2856 50       3610 if( !defined $ret ) {
232 0         0 $self->throw("Unable to read $start:$end $!");
233             }
234 2856         4788 return $string;
235             }
236              
237             =head2 add_sequence_as_string
238              
239             Title : add_sequence_as_string
240             Usage : $seq->add_sequence_as_string("CATGAT");
241             Function: Appends additional residues to an existing LargePrimarySeq object.
242             This allows one to build up a large sequence without storing
243             entire object in memory.
244             Returns : Current length of sequence
245             Args : string to append
246              
247             =cut
248              
249             sub add_sequence_as_string{
250 5635     5635 1 5334 my ($self,$str) = @_;
251 5635         5130 my $len = $self->length + CORE::length($str);
252 5635         6524 my $fh = $self->_fh();
253 5635 50       39262 if(! seek($fh,0,2)) {
254 0         0 $self->throw("Unable to seek end of file: $!");
255             }
256 5635         7331 $self->_print($str);
257 5635         5917 $self->length($len);
258             }
259              
260              
261             =head2 _filename
262              
263             Title : _filename
264             Usage : $obj->_filename($newval)
265             Function:
266             Example :
267             Returns : value of _filename
268             Args : newvalue (optional)
269              
270              
271             =cut
272              
273             sub _filename{
274 59     59   61 my ($obj,$value) = @_;
275 59 100       82 if( defined $value) {
276 14         15 $obj->{'_filename'} = $value;
277             }
278 59         83045 return $obj->{'_filename'};
279              
280             }
281              
282              
283             =head2 alphabet
284              
285             Title : alphabet
286             Usage : $obj->alphabet($newval)
287             Function:
288             Example :
289             Returns : value of alphabet
290             Args : newvalue (optional)
291              
292              
293             =cut
294              
295             sub alphabet{
296 29     29 1 31 my ($self,$value) = @_;
297 29 100       37 if( defined $value) {
298 14         40 $self->SUPER::alphabet($value);
299             }
300 29   100     46 return $self->SUPER::alphabet() || 'dna';
301              
302             }
303              
304             sub DESTROY {
305 27     27   879 my $self = shift;
306 27         57 my $fh = $self->_fh();
307 27 100       263 close($fh) if( defined $fh );
308             # this should be handled by Tempfile removal, but we'll unlink anyways.
309 27 100 100     50 unlink $self->_filename() if defined $self->_filename() && -e $self->_filename;
310             # remove tempdirs as well
311 27 100 66     1219 rmdir $self->{tempdir} if defined $self->{tempdir} && -e $self->{tempdir};
312 27         92 $self->SUPER::DESTROY();
313             }
314              
315             1;