File Coverage

Bio/OntologyIO.pm
Criterion Covered Total %
statement 48 51 94.1
branch 10 14 71.4
condition 1 3 33.3
subroutine 9 10 90.0
pod 3 4 75.0
total 71 82 86.5


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::OntologyIO
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Hilmar Lapp
7             #
8             # Copyright Hilmar Lapp
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             #
13             # (c) Hilmar Lapp, hlapp at gmx.net, 2003.
14             # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2003.
15             #
16             # You may distribute this module under the same terms as perl itself.
17             # Refer to the Perl Artistic License (see the license accompanying this
18             # software package, or see http://www.perl.com/language/misc/Artistic.html)
19             # for the terms under which you may use, modify, and redistribute this module.
20             #
21             # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
22             # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
23             # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
24             #
25              
26             # POD documentation - main docs before the code
27              
28             =head1 NAME
29              
30             Bio::OntologyIO - Parser factory for Ontology formats
31              
32             =head1 SYNOPSIS
33              
34             use Bio::OntologyIO;
35              
36             my $parser = Bio::OntologyIO->new(-format => "go",
37             -file=> $file);
38              
39             while(my $ont = $parser->next_ontology()) {
40             print "read ontology ",$ont->name()," with ",
41             scalar($ont->get_root_terms)," root terms, and ",
42             scalar($ont->get_leaf_terms)," leaf terms\n";
43             }
44              
45             =head1 DESCRIPTION
46              
47             This is the parser factory for different ontology sources and
48             formats. Conceptually, it is very similar to L, but the
49             difference is that the chunk of data returned as an object is an
50             entire ontology.
51              
52             =head1 FEEDBACK
53              
54             =head2 Mailing Lists
55              
56             User feedback is an integral part of the evolution of this and other
57             Bioperl modules. Send your comments and suggestions preferably to
58             the Bioperl mailing list. Your participation is much appreciated.
59              
60             bioperl-l@bioperl.org - General discussion
61             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62              
63             =head2 Support
64              
65             Please direct usage questions or support issues to the mailing list:
66              
67             I
68              
69             rather than to the module maintainer directly. Many experienced and
70             reponsive experts will be able look at the problem and quickly
71             address it. Please include a thorough description of the problem
72             with code and data examples if at all possible.
73              
74             =head2 Reporting Bugs
75              
76             Report bugs to the Bioperl bug tracking system to help us keep track
77             of the bugs and their resolution. Bug reports can be submitted via
78             the web:
79              
80             https://github.com/bioperl/bioperl-live/issues
81              
82             =head1 AUTHOR - Hilmar Lapp
83              
84             Email hlapp at gmx.net
85              
86             =head1 APPENDIX
87              
88             The rest of the documentation details each of the object methods.
89             Internal methods are usually preceded with a _
90              
91             =cut
92              
93              
94             # Let the code begin...
95              
96              
97             package Bio::OntologyIO;
98 10     10   1539 use strict;
  10         14  
  10         286  
99              
100             # Object preamble - inherits from Bio::Root::Root
101              
102              
103 10     10   31 use base qw(Bio::Root::Root Bio::Root::IO);
  10         11  
  10         4312  
104              
105             #
106             # Maps from format name to driver suitable for the format.
107             #
108             my %format_driver_map = (
109             "go" => "goflat",
110             "so" => "soflat",
111             "interpro" => "InterProParser",
112             "interprosax" => "Handlers::InterPro_BioSQL_Handler",
113             "evoc" => "simplehierarchy",
114             "obo" => "obo"
115             );
116              
117             =head2 new
118              
119             Title : new
120             Usage : my $parser = Bio::OntologyIO->new(-format => 'go', @args);
121             Function: Returns a stream of ontologies opened on the specified input
122             for the specified format.
123             Returns : An ontology parser (an instance of Bio::OntologyIO) initialized
124             for the specified format.
125             Args : Named parameters. Common parameters are
126              
127             -format - the format of the input; the following are
128             presently supported:
129             goflat: DAG-Edit Gene Ontology flat files
130             go : synonymous to goflat
131             soflat: DAG-Edit Sequence Ontology flat files
132             so : synonymous to soflat
133             simplehierarchy: text format with one term per line
134             and indentation giving the hierarchy
135             evoc : synonymous to simplehierarchy
136             interpro: InterPro XML
137             interprosax: InterPro XML - this is actually not a
138             Bio::OntologyIO compliant parser; instead it
139             persists terms as they are encountered.
140             L
141             obo : OBO format style from Gene Ontology Consortium
142             -file - the file holding the data
143             -fh - the stream providing the data (-file and -fh are
144             mutually exclusive)
145             -ontology_name - the name of the ontology
146             -engine - the L object
147             to be reused (will be created otherwise); note
148             that every L will
149             qualify as well since that one inherits from the
150             former.
151             -term_factory - the ontology term factory to use. Provide a
152             value only if you know what you are doing.
153              
154             DAG-Edit flat file parsers will usually also accept the
155             following parameters.
156              
157             -defs_file - the name of the file holding the term
158             definitions
159             -files - an array ref holding the file names (for GO,
160             there will usually be 3 files: component.ontology,
161             function.ontology, process.ontology)
162              
163             Other parameters are specific to the parsers.
164              
165             =cut
166              
167             sub new {
168 14     14 1 34 my ($caller,@args) = @_;
169 14   33     52 my $class = ref($caller) || $caller;
170             # or do we want to call SUPER on an object if $caller is an
171             # object?
172 14 100       63 if( $class =~ /Bio::OntologyIO::(\S+)/ ) {
173 7         30 my ($self) = $class->SUPER::new(@args);
174 7         25 $self->_initialize(@args);
175 7         33 return $self;
176             } else {
177 7         26 my %param = @args;
178 7         20 @param{ map { lc $_ } keys %param } = values %param; # lowercase keys
  16         32  
179 7         27 my $format = $class->_map_format($param{'-format'});
180              
181             # normalize capitalization
182 7 50       20 return unless( $class->_load_format_module($format) );
183 7         59 return "Bio::OntologyIO::$format"->new(@args);
184             }
185              
186             }
187              
188              
189             =head2 format
190              
191             Title : format
192             Usage : $format = $parser->format()
193             Function: Get the ontology format
194             Returns : ontology format
195             Args : none
196              
197             =cut
198              
199             # format() method inherited from Bio::Root::IO
200              
201              
202             sub _initialize {
203 7     7   14 my($self, @args) = @_;
204              
205             # initialize factories etc
206 7         21 my ($eng,$fact,$ontname) =
207             $self->_rearrange([qw(TERM_FACTORY)
208             ], @args);
209             # term object factory
210 7 50       19 $self->term_factory($fact) if $fact;
211              
212             # initialize the Bio::Root::IO part
213 7         26 $self->_initialize_io(@args);
214             }
215              
216             =head2 next_ontology
217              
218             Title : next_ontology
219             Usage : $ont = $stream->next_ontology()
220             Function: Reads the next ontology object from the stream and returns it.
221             Returns : a L compliant object, or undef at the
222             end of the stream
223             Args : none
224              
225              
226             =cut
227              
228             sub next_ontology {
229 0     0 1 0 shift->throw_not_implemented();
230             }
231              
232             =head2 term_factory
233              
234             Title : term_factory
235             Usage : $obj->term_factory($newval)
236             Function: Get/set the ontology term factory to use.
237              
238             As a user of this module it is not necessary to call this
239             method as there will be default. In order to change the
240             default, the easiest way is to instantiate
241             L with the proper -type
242             argument. Most if not all parsers will actually use this
243             very implementation, so even easier than the aforementioned
244             way is to simply call
245             $ontio->term_factory->type("Bio::Ontology::MyTerm").
246              
247             Example :
248             Returns : value of term_factory (a Bio::Factory::ObjectFactoryI object)
249             Args : on set, new value (a Bio::Factory::ObjectFactoryI object, optional)
250              
251              
252             =cut
253              
254             sub term_factory{
255 2497     2497 1 2179 my $self = shift;
256              
257 2497 100       3716 return $self->{'term_factory'} = shift if @_;
258 2490         5890 return $self->{'term_factory'};
259             }
260              
261             =head1 Private Methods
262              
263             Some of these are actually 'protected' in OO speak, which means you
264             may or will want to utilize them in a derived ontology parser, but
265             you should not call them from outside.
266              
267             =cut
268              
269             =head2 _load_format_module
270              
271             Title : _load_format_module
272             Usage : *INTERNAL OntologyIO stuff*
273             Function: Loads up (like use) a module at run time on demand
274             Example :
275             Returns :
276             Args :
277              
278             =cut
279              
280             sub _load_format_module {
281 7     7   12 my ($self, $format) = @_;
282 7         12 my $module = "Bio::OntologyIO::" . $format;
283 7         8 my $ok;
284              
285 7         11 eval {
286 7         32 $ok = $self->_load_module($module);
287             };
288 7 50       16 if ( $@ ) {
289 0         0 print STDERR <
290             $self: $format cannot be found
291             Exception $@
292             For more information about the OntologyIO system please see the docs.
293             This includes ways of checking for formats at compile time, not run time
294             END
295             }
296 7         23 return $ok;
297             }
298              
299             sub DESTROY {
300 7     7   1474 my $self = shift;
301              
302 7         22 $self->close();
303             }
304              
305             sub _map_format {
306 7     7   10 my $self = shift;
307 7         10 my $format = shift;
308 7         6 my $mod;
309              
310 7 50       13 if($format) {
311 7         17 $mod = $format_driver_map{lc($format)};
312 7 100       18 $mod = lc($format) unless $mod;
313             } else {
314 0         0 $self->throw("unable to guess ontology format, specify -format");
315             }
316 7         11 return $mod;
317             }
318              
319             sub unescape {
320 99     99 0 104 my( $self, $ref ) = @_;
321 99         93 $ref =~ s/<\\;/\
322 99         84 $ref =~ s/>\\;/\>/g;
323 99         74 $ref =~ s/&pct\\;/\%/g;
324 99         78 $ref =~ s/\\n/\n/g;
325 99         71 $ref =~ s/\\t/\t/g;
326 99         152 return $ref;
327             }
328              
329             1;