File Coverage

Bio/Variation/IO.pm
Criterion Covered Total %
statement 24 36 66.6
branch 4 10 40.0
condition 1 2 50.0
subroutine 5 9 55.5
pod 5 5 100.0
total 39 62 62.9


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Variation::IO
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Heikki Lehvaslaiho
7             #
8             # Copyright Heikki Lehvaslaiho
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Variation::IO - Handler for sequence variation IO Formats
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Variation::IO;
21              
22             $in = Bio::Variation::IO->new(-file => "inputfilename" ,
23             -format => 'flat');
24             $out = Bio::Variation::IO->new(-file => ">outputfilename" ,
25             -format => 'xml');
26              
27             while ( my $seq = $in->next() ) {
28             $out->write($seq);
29             }
30              
31             # or
32              
33             use Bio::Variation::IO;
34              
35             #input file format can be read from the file extension (dat|xml)
36             $in = Bio::Variation::IO->newFh(-file => "inputfilename");
37             $out = Bio::Variation::IO->newFh(-format => 'xml');
38              
39             # World's shortest flat<->xml format converter:
40             print $out $_ while <$in>;
41              
42             =head1 DESCRIPTION
43              
44             Bio::Variation::IO is a handler module for the formats in the
45             Variation IO set (eg, Bio::Variation::IO::flat). It is the officially
46             sanctioned way of getting at the format objects, which most people
47             should use.
48              
49             The structure, conventions and most of the code is inherited from
50             L module. The main difference is that instead of using
51             methods next_seq and write_seq, you drop '_seq' from the method names.
52              
53             The idea is that you request a stream object for a particular format.
54             All the stream objects have a notion of an internal file that is read
55             from or written to. A particular SeqIO object instance is configured
56             for either input or output. A specific example of a stream object is
57             the Bio::Variation::IO::flat object.
58              
59             Each stream object has functions
60              
61             $stream->next();
62              
63             and
64              
65             $stream->write($seqDiff);
66              
67             also
68              
69             $stream->type() # returns 'INPUT' or 'OUTPUT'
70              
71             As an added bonus, you can recover a filehandle that is tied to the
72             SeqIO object, allowing you to use the standard EE and print
73             operations to read and write sequence objects:
74              
75             use Bio::Variation::IO;
76              
77             $stream = Bio::Variation::IO->newFh(-format => 'flat');
78             # read from standard input
79              
80             while ( $seq = <$stream> ) {
81             # do something with $seq
82             }
83              
84             and
85              
86             print $stream $seq; # when stream is in output mode
87              
88             This makes the simplest ever reformatter
89              
90             #!/usr/local/bin/perl
91              
92             $format1 = shift;
93             $format2 = shift;
94              
95             use Bio::Variation::IO;
96              
97             $in = Bio::Variation::IO->newFh(-format => $format1 );
98             $out = Bio::Variation::IO->newFh(-format => $format2 );
99              
100             print $out $_ while <$in>;
101              
102              
103             =head1 CONSTRUCTORS
104              
105             =head2 Bio::Variation::IO-Enew()
106              
107             $seqIO = Bio::Variation::IO->new(-file => 'filename', -format=>$format);
108             $seqIO = Bio::Variation::IO->new(-fh => \*FILEHANDLE, -format=>$format);
109             $seqIO = Bio::Variation::IO->new(-format => $format);
110              
111             The new() class method constructs a new Bio::Variation::IO object. The
112             returned object can be used to retrieve or print BioSeq objects. new()
113             accepts the following parameters:
114              
115             =over 4
116              
117             =item -file
118              
119             A file path to be opened for reading or writing. The usual Perl
120             conventions apply:
121              
122             'file' # open file for reading
123             '>file' # open file for writing
124             '>>file' # open file for appending
125             '+
126             'command |' # open a pipe from the command
127             '| command' # open a pipe to the command
128              
129             =item -fh
130              
131             You may provide new() with a previously-opened filehandle. For
132             example, to read from STDIN:
133              
134             $seqIO = Bio::Variation::IO->new(-fh => \*STDIN);
135              
136             Note that you must pass filehandles as references to globs.
137              
138             If neither a filehandle nor a filename is specified, then the module
139             will read from the @ARGV array or STDIN, using the familiar EE
140             semantics.
141              
142             =item -format
143              
144             Specify the format of the file. Supported formats include:
145              
146             flat pseudo EMBL format
147             xml seqvar xml format
148              
149             If no format is specified and a filename is given, then the module
150             will attempt to deduce it from the filename. If this is unsuccessful,
151             Fasta format is assumed.
152              
153             The format name is case insensitive. 'FLAT', 'Flat' and 'flat' are
154             all supported.
155              
156             =back
157              
158             =head2 Bio::Variation::IO-EnewFh()
159              
160             $fh = Bio::Variation::IO->newFh(-fh => \*FILEHANDLE, -format=>$format);
161             $fh = Bio::Variation::IO->newFh(-format => $format);
162             # etc.
163              
164             #e.g.
165             $out = Bio::Variation::IO->newFh( '-FORMAT' => 'flat');
166             print $out $seqDiff;
167              
168             This constructor behaves like new(), but returns a tied filehandle
169             rather than a Bio::Variation::IO object. You can read sequences from this
170             object using the familiar EE operator, and write to it using print().
171             The usual array and $_ semantics work. For example, you can read all
172             sequence objects into an array like this:
173              
174             @mutations = <$fh>;
175              
176             Other operations, such as read(), sysread(), write(), close(), and printf()
177             are not supported.
178              
179             =head1 OBJECT METHODS
180              
181             See below for more detailed summaries. The main methods are:
182              
183             =head2 $sequence = $seqIO-Enext()
184              
185             Fetch the next sequence from the stream.
186              
187             =head2 $seqIO-Ewrite($sequence [,$another_sequence,...])
188              
189             Write the specified sequence(s) to the stream.
190              
191             =head2 TIEHANDLE(), READLINE(), PRINT()
192              
193             These provide the tie interface. See L for more details.
194              
195             =head1 FEEDBACK
196              
197             =head2 Mailing Lists
198              
199             User feedback is an integral part of the evolution of this and other
200             Bioperl modules. Send your comments and suggestions preferably to the
201             Bioperl mailing lists Your participation is much appreciated.
202              
203             bioperl-l@bioperl.org - General discussion
204             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
205              
206             =head2 Support
207              
208             Please direct usage questions or support issues to the mailing list:
209              
210             I
211              
212             rather than to the module maintainer directly. Many experienced and
213             reponsive experts will be able look at the problem and quickly
214             address it. Please include a thorough description of the problem
215             with code and data examples if at all possible.
216              
217             =head2 Reporting Bugs
218              
219             Report bugs to the Bioperl bug tracking system to help us keep track
220             the bugs and their resolution. Bug reports can be submitted via the
221             web:
222              
223             https://github.com/bioperl/bioperl-live/issues
224              
225             =head1 AUTHOR - Heikki Lehvaslaiho
226              
227             Email: heikki-at-bioperl-dot-org
228              
229             =head1 APPENDIX
230              
231             The rest of the documentation details each of the object
232             methods. Internal methods are usually preceded with a _
233              
234             =cut
235              
236             # Let the code begin...
237              
238             package Bio::Variation::IO;
239              
240 2     2   1476 use strict;
  2         4  
  2         61  
241              
242              
243 2     2   8 use base qw(Bio::SeqIO Bio::Root::IO);
  2         2  
  2         679  
244              
245             =head2 new
246              
247             Title : new
248             Usage : $stream = Bio::Variation::IO->new(-file => $filename, -format => 'Format')
249             Function: Returns a new seqstream
250             Returns : A Bio::Variation::IO::Handler initialised with the appropriate format
251             Args : -file => $filename
252             -format => format
253             -fh => filehandle to attach to
254              
255             =cut
256              
257              
258             sub new {
259 5     5 1 2155 my ($class, %param) = @_;
260 5         7 my ($format);
261              
262 5         21 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
  8         25  
263             $format = $param{'-format'}
264 5   50     32 || $class->_guess_format( $param{-file} || $ARGV[0] )
265             || 'flat';
266 5         12 $format = "\L$format"; # normalize capitalization to lower case
267              
268 5 50       16 return unless $class->_load_format_module($format);
269 5         35 return "Bio::Variation::IO::$format"->new(%param);
270             }
271              
272              
273             =head2 format
274              
275             Title : format
276             Usage : $format = $stream->format()
277             Function: Get the variation format
278             Returns : variation format
279             Args : none
280              
281             =cut
282              
283             # format() method inherited from Bio::Root::IO
284              
285              
286             sub _load_format_module {
287 5     5   12 my ($class, $format) = @_;
288 5         12 my $module = "Bio::Variation::IO::" . $format;
289 5         6 my $ok;
290 5         9 eval {
291 5         32 $ok = $class->_load_module($module);
292             };
293 5 50       15 if ( $@ ) {
294 0         0 print STDERR <
295             $class: $format cannot be found
296             Exception $@
297             For more information about the IO system please see the IO docs.
298             This includes ways of checking for formats at compile time, not run time
299             END
300             ;
301             }
302 5         16 return $ok;
303             }
304              
305             =head2 next
306              
307             Title : next
308             Usage : $seqDiff = $stream->next
309             Function: reads the next $seqDiff object from the stream
310             Returns : a Bio::Variation::SeqDiff object
311             Args :
312              
313             =cut
314              
315             sub next {
316 0     0 1 0 my ($self, $seq) = @_;
317 0         0 $self->throw("Sorry, you cannot read from a generic Bio::Variation::IO object.");
318             }
319              
320             sub next_seq {
321 0     0 1 0 my ($self, $seq) = @_;
322 0         0 $self->throw("These are not sequence objects. Use method 'next' instead of 'next_seq'.");
323 0         0 $self->next($seq);
324             }
325              
326             =head2 write
327              
328             Title : write
329             Usage : $stream->write($seq)
330             Function: writes the $seq object into the stream
331             Returns : 1 for success and 0 for error
332             Args : Bio::Variation::SeqDiff object
333              
334             =cut
335              
336             sub write {
337 0     0 1 0 my ($self, $seq) = @_;
338 0         0 $self->throw("Sorry, you cannot write to a generic Bio::Variation::IO object.");
339             }
340              
341             sub write_seq {
342 0     0 1 0 my ($self, $seq) = @_;
343 0         0 $self->warn("These are not sequence objects. Use method 'write' instead of 'write_seq'.");
344 0         0 $self->write($seq);
345             }
346              
347             =head2 _guess_format
348              
349             Title : _guess_format
350             Usage : $obj->_guess_format($filename)
351             Function:
352             Example :
353             Returns : guessed format of filename (lower case)
354             Args :
355              
356             =cut
357              
358             sub _guess_format {
359 2     2   4 my $class = shift;
360 2 50       6 return unless $_ = shift;
361 2 50       17 return 'flat' if /\.dat$/i;
362 0 0         return 'xml' if /\.xml$/i;
363             }
364              
365              
366             1;