File Coverage

Bio/ClusterIO.pm
Criterion Covered Total %
statement 33 50 66.0
branch 6 16 37.5
condition 2 8 25.0
subroutine 7 13 53.8
pod 4 4 100.0
total 52 91 57.1


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::ClusterIO.pm
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Andrew Macgregor
7             #
8             # Copyright Andrew Macgregor, Jo-Ann Stanton, David Green
9             # Molecular Embryology Group, Anatomy & Structural Biology, University of Otago
10             # http://anatomy.otago.ac.nz/meg
11             #
12             # You may distribute this module under the same terms as perl itself
13             #
14             # _history
15             #
16             # May 7, 2002 - changed from UniGene.pm to more generic ClusterIO.pm
17             # by Andrew Macgregor
18             #
19             # April 17, 2002 - Initial implementation by Andrew Macgregor
20             # POD documentation - main docs before the code
21              
22             =head1 NAME
23              
24             Bio::ClusterIO - Handler for Cluster Formats
25              
26             =head1 SYNOPSIS
27              
28             #NB: This example is unigene specific
29              
30             use Bio::ClusterIO;
31              
32             $stream = Bio::ClusterIO->new('-file' => "Hs.data",
33             '-format' => "unigene");
34             # note: we quote -format to keep older perl's from complaining.
35              
36             while ( my $in = $stream->next_cluster() ) {
37             print $in->unigene_id() . "\n";
38             while ( my $sequence = $in->next_seq() ) {
39             print $sequence->accession_number() . "\n";
40             }
41             }
42             # Parsing errors are printed to STDERR.
43              
44             =head1 DESCRIPTION
45              
46             The ClusterIO module works with the ClusterIO format module to read
47             various cluster formats such as NCBI UniGene.
48              
49              
50             =head1 CONSTRUCTORS
51              
52             =head2 Bio::ClusterIO-Enew()
53              
54             $str = Bio::ClusterIO->new(-file => 'filename',
55             -format=>$format);
56              
57             The new() class method constructs a new Bio::ClusterIO object. The
58             returned object can be used to retrieve or print cluster
59             objects. new() accepts the following parameters:
60              
61             =over 4
62              
63             =item -file
64              
65             A file path to be opened for reading.
66              
67             =item -format
68              
69             Specify the format of the file. Supported formats include:
70              
71             unigene *.data UniGene build files.
72             dbsnp *.xml dbSNP XML files
73              
74             If no format is specified and a filename is given, then the module
75             will attempt to deduce it from the filename. If this is unsuccessful,
76             the main UniGene build format is assumed.
77              
78             The format name is case insensitive. 'UNIGENE', 'UniGene' and
79             'unigene' are all supported, as are dbSNP, dbsnp, and DBSNP
80              
81             =back
82              
83             =head1 OBJECT METHODS
84              
85             See below for more detailed summaries. The main methods are:
86              
87             =head2 $cluster = $str-Enext_cluster()
88              
89             Fetch the next cluster from the stream.
90              
91              
92             =head2 TIEHANDLE(), READLINE(), PRINT()
93              
94             These I've left in here because they were in the SeqIO
95             module. Feedback appreciated. There they provide the tie interface.
96             See L for more details.
97              
98             =head1 FEEDBACK
99              
100             =head2 Mailing Lists
101              
102             User feedback is an integral part of the evolution of this
103             and other Bioperl modules. Send your comments and suggestions preferably
104             to one of the Bioperl mailing lists.
105             Your participation is much appreciated.
106              
107             bioperl-l@bioperl.org - General discussion
108             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
109              
110             =head2 Support
111              
112             Please direct usage questions or support issues to the mailing list:
113              
114             I
115              
116             rather than to the module maintainer directly. Many experienced and
117             reponsive experts will be able look at the problem and quickly
118             address it. Please include a thorough description of the problem
119             with code and data examples if at all possible.
120              
121             =head2 Reporting Bugs
122              
123             Report bugs to the Bioperl bug tracking system to help us keep track
124             the bugs and their resolution. Bug reports can be submitted via the
125             web:
126              
127             https://github.com/bioperl/bioperl-live/issues
128              
129             =head1 AUTHOR - Andrew Macgregor
130              
131             Email andrew@anatomy.otago.ac.nz
132              
133             =head1 APPENDIX
134              
135             The rest of the documentation details each of the object
136             methods. Internal methods are usually preceded with a _
137              
138             =cut
139              
140             #'
141             # Let the code begin...
142              
143             package Bio::ClusterIO;
144              
145 2     2   883 use strict;
  2         2  
  2         52  
146              
147              
148 2     2   6 use base qw(Bio::Root::Root Bio::Root::IO);
  2         2  
  2         633  
149              
150              
151              
152             =head2 new
153              
154             Title : new
155             Usage : Bio::ClusterIO->new(-file => $filename, -format => 'format')
156             Function: Returns a new cluster stream
157             Returns : A Bio::ClusterIO::Handler initialised with the appropriate format
158             Args : -file => $filename
159             -format => format
160              
161             =cut
162              
163              
164             my $entry = 0;
165              
166             sub new {
167 4     4 1 16 my ($caller,@args) = @_;
168 4   33     20 my $class = ref($caller) || $caller;
169            
170             # or do we want to call SUPER on an object if $caller is an
171             # object?
172 4 100       19 if( $class =~ /Bio::ClusterIO::(\S+)/ ) {
173 2         12 my ($self) = $class->SUPER::new(@args);
174 2         7 $self->_initialize(@args);
175 2         16 return $self;
176             } else {
177              
178 2         8 my %param = @args;
179 2         6 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
  5         11  
180             my $format = $param{'-format'} ||
181 2   33     10 $class->_guess_format( $param{-file} || $ARGV[0] );
182 2         3 $format = "\L$format"; # normalize capitalization to lower case
183              
184 2 50       8 return unless( $class->_load_format_module($format) );
185 2         17 return "Bio::ClusterIO::$format"->new(@args);
186             }
187             }
188              
189             =head2 format
190              
191             Title : format
192             Usage : $format = $stream->format()
193             Function: Get the cluster format
194             Returns : cluster format
195             Args : none
196              
197             =cut
198              
199             # format() method inherited from Bio::Root::IO
200              
201              
202             # _initialize is chained for all ClusterIO classes
203              
204             sub _initialize {
205 2     2   3 my($self, @args) = @_;
206             # initialize the IO part
207 2         12 $self->_initialize_io(@args);
208             }
209              
210             =head2 next_cluster
211              
212             Title : next_cluster
213             Usage : $cluster = $stream->next_cluster()
214             Function: Reads the next cluster object from the stream and returns it.
215             Returns : a L compliant object
216             Args : none
217              
218              
219             =cut
220              
221             sub next_cluster {
222 0     0 1 0 my ($self, $seq) = @_;
223 0         0 $self->throw("Sorry, you cannot read from a generic Bio::ClusterIO object.");
224             }
225              
226             =head2 cluster_factory
227              
228             Title : cluster_factory
229             Usage : $obj->cluster_factory($newval)
230             Function: Get/set the object factory to use for creating the cluster
231             objects.
232             Example :
233             Returns : a L compliant object
234             Args : on set, new value (a L
235             compliant object or undef, optional)
236              
237              
238             =cut
239              
240             sub cluster_factory{
241 5     5 1 5 my $self = shift;
242              
243 5 100       11 return $self->{'cluster_factory'} = shift if @_;
244 4         22 return $self->{'cluster_factory'};
245             }
246              
247             =head2 object_factory
248              
249             Title : object_factory
250             Usage : $obj->object_factory($newval)
251             Function: This is an alias to cluster_factory with a more generic name.
252             Example :
253             Returns : a L compliant object
254             Args : on set, new value (a L
255             compliant object or undef, optional)
256              
257              
258             =cut
259              
260             sub object_factory{
261 0     0 1 0 return shift->cluster_factory(@_);
262             }
263              
264             =head2 _load_format_module
265              
266             Title : _load_format_module
267             Usage : *INTERNAL ClusterIO stuff*
268             Function: Loads up (like use) a module at run time on demand
269             Example :
270             Returns :
271             Args :
272              
273             =cut
274              
275             sub _load_format_module {
276 2     2   3 my ($self,$format) = @_;
277 2         6 my $module = "Bio::ClusterIO::" . $format;
278 2         2 my $ok;
279            
280 2         5 eval {
281 2         17 $ok = $self->_load_module($module);
282             };
283 2 50       7 if ( $@ ) {
284 0         0 print STDERR <
285             $self: could not load $format - for more details on supported formats please see the ClusterIO docs
286             Exception $@
287             END
288             ;
289             }
290 2         7 return $ok;
291             }
292              
293             =head2 _guess_format
294              
295             Title : _guess_format
296             Usage : $obj->_guess_format($filename)
297             Function: guess format based on file suffix
298             Example :
299             Returns : guessed format of filename (lower case)
300             Args :
301             Notes : formats that _filehandle() will guess include unigene and dbsnp
302              
303             =cut
304              
305             sub _guess_format {
306 0     0   0 my $class = shift;
307 0 0       0 return unless $_ = shift;
308 0 0       0 return 'unigene' if /\.(data)$/i;
309 0 0       0 return 'dbsnp' if /\.(xml)$/i;
310             }
311              
312             sub DESTROY {
313 1     1   1 my $self = shift;
314              
315 1         7 $self->close();
316             }
317              
318             # I need some direction on these!! The module works so I haven't fiddled with them!
319              
320             sub TIEHANDLE {
321 0     0     my ($class,$val) = @_;
322 0           return bless {'seqio' => $val}, $class;
323             }
324              
325             sub READLINE {
326 0     0     my $self = shift;
327 0 0 0       return $self->{'seqio'}->next_seq() || undef unless wantarray;
328 0           my (@list, $obj);
329 0           push @list, $obj while $obj = $self->{'seqio'}->next_seq();
330 0           return @list;
331             }
332              
333             sub PRINT {
334 0     0     my $self = shift;
335 0           $self->{'seqio'}->write_seq(@_);
336             }
337              
338             1;
339