File Coverage

Bio/OntologyIO/dagflat.pm
Criterion Covered Total %
statement 277 312 88.7
branch 105 152 69.0
condition 16 24 66.6
subroutine 40 41 97.5
pod 5 7 71.4
total 443 536 82.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::OntologyIO::dagflat
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Hilmar Lapp, hlapp at gmx.net
7             #
8             # (c) Christian M. Zmasek, czmasek-at-burnham.org, 2002.
9             # (c) Hilmar Lapp, hlapp at gmx.net, 2003.
10             # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
11             #
12             # You may distribute this module under the same terms as perl itself.
13             # Refer to the Perl Artistic License (see the license accompanying this
14             # software package, or see http://www.perl.com/language/misc/Artistic.html)
15             # for the terms under which you may use, modify, and redistribute this module.
16             #
17             # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
18             # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19             # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20             #
21             # You may distribute this module under the same terms as perl itself
22              
23             # POD documentation - main docs before the code
24              
25             =head1 NAME
26              
27             Bio::OntologyIO::dagflat - a base class parser for GO flat-file type formats
28              
29             =head1 SYNOPSIS
30              
31             use Bio::OntologyIO;
32              
33             # do not use directly -- use via Bio::OntologyIO
34             # e.g., the GO parser is a simple extension of this class
35             my $parser = Bio::OntologyIO->new
36             ( -format => "go",
37             -defs_file => "/home/czmasek/GO/GO.defs",
38             -files => ["/home/czmasek/GO/component.ontology",
39             "/home/czmasek/GO/function.ontology",
40             "/home/czmasek/GO/process.ontology"] );
41              
42             my $go_ontology = $parser->next_ontology();
43              
44             my $IS_A = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
45             my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" );
46             my $RELATED_TO = Bio::Ontology::RelationshipType->get_instance( "RELATED_TO" );
47              
48             =head1 DESCRIPTION
49              
50             Needs Graph.pm from CPAN.
51              
52             =head1 FEEDBACK
53              
54             =head2 Mailing Lists
55              
56             User feedback is an integral part of the evolution of this and other
57             Bioperl modules. Send your comments and suggestions preferably to the
58             Bioperl mailing lists Your participation is much appreciated.
59              
60             bioperl-l@bioperl.org - General discussion
61             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62              
63             =head2 Support
64              
65             Please direct usage questions or support issues to the mailing list:
66              
67             I
68              
69             rather than to the module maintainer directly. Many experienced and
70             reponsive experts will be able look at the problem and quickly
71             address it. Please include a thorough description of the problem
72             with code and data examples if at all possible.
73              
74             =head2 Reporting Bugs
75              
76             Report bugs to the Bioperl bug tracking system to help us keep track
77             the bugs and their resolution. Bug reports can be submitted via the
78             web:
79              
80             https://github.com/bioperl/bioperl-live/issues
81              
82             =head1 AUTHOR
83              
84             Christian M. Zmasek
85              
86             Email: czmasek-at-burnham.org or cmzmasek@yahoo.com
87              
88             WWW: http://monochrome-effect.net/
89              
90             Address:
91              
92             Genomics Institute of the Novartis Research Foundation
93             10675 John Jay Hopkins Drive
94             San Diego, CA 92121
95              
96             =head2 CONTRIBUTOR
97              
98             Hilmar Lapp, hlapp at gmx.net
99              
100             =head1 APPENDIX
101              
102             The rest of the documentation details each of the object
103             methods. Internal methods are usually preceded with a _
104              
105             =cut
106              
107              
108             # Let the code begin...
109              
110              
111             package Bio::OntologyIO::dagflat;
112              
113 2     2   12 use strict;
  2         4  
  2         57  
114              
115 2     2   10 use Bio::Root::IO;
  2         3  
  2         42  
116 2     2   651 use Bio::Ontology::OBOEngine;
  2         6  
  2         71  
117 2     2   17 use Bio::Ontology::Ontology;
  2         4  
  2         52  
118 2     2   10 use Bio::Ontology::OntologyStore;
  2         3  
  2         35  
119 2     2   8 use Bio::Ontology::TermFactory;
  2         3  
  2         32  
120 2     2   8 use Bio::Annotation::DBLink;
  2         4  
  2         44  
121              
122 2     2   8 use constant TRUE => 1;
  2         2  
  2         97  
123 2     2   11 use constant FALSE => 0;
  2         3  
  2         73  
124              
125              
126 2     2   10 use base qw(Bio::OntologyIO);
  2         5  
  2         5356  
127              
128              
129             =head2 new
130              
131             Title : new
132             Usage : $parser = Bio::OntologyIO->new(
133             -format => "go",
134             -defs_file => "/path/to/GO.defs",
135             -files => ["/path/to/component.ontology",
136             "/path/to/function.ontology",
137             "/path/to/process.ontology"] );
138             Function: Creates a new dagflat parser.
139             Returns : A new dagflat parser object, implementing Bio::OntologyIO.
140             Args : -defs_file => the name of the file holding the term
141             definitions
142             -files => a single ontology flat file holding the
143             term relationships, or an array ref holding
144             the file names (for GO, there will usually be
145             3 files: component.ontology, function.ontology,
146             process.ontology)
147             -file => if there is only a single flat file, it may
148             also be specified via the -file parameter
149             -ontology_name => the name of the ontology; if not specified the
150             parser will auto-discover it by using the term
151             that starts with a $, and converting underscores
152             to spaces
153             -engine => the Bio::Ontology::OntologyEngineI object
154             to be reused (will be created otherwise); note
155             that every Bio::Ontology::OntologyI will
156             qualify as well since that one inherits from the
157             former.
158              
159             See L.
160              
161             =cut
162              
163             # in reality, we let OntologyIO::new do the instantiation, and override
164             # _initialize for all initialization work
165             sub _initialize {
166 4     4   15 my ($self, %arg) = @_;
167              
168 4         32 my ( $defs_file_name,$files,$defs_url,$url,$name,$eng ) =
169             $self->_rearrange([qw( DEFS_FILE
170             FILES
171             DEFS_URL
172             URL
173             ONTOLOGY_NAME
174             ENGINE)
175             ],
176             %arg );
177              
178 4         14 delete($arg{-url}); #b/c GO has 3 files...
179              
180 4         23 $self->SUPER::_initialize( %arg );
181              
182 4         19 $self->_done( FALSE );
183 4         14 $self->_not_first_record( FALSE );
184 4         13 $self->_term( "" );
185 4         7 delete $self->{'_ontologies'};
186              
187             # ontology engine (and possibly name if it's an OntologyI)
188 4 50       33 $eng = Bio::Ontology::OBOEngine->new() unless $eng;
189 4 50       32 if($eng->isa("Bio::Ontology::OntologyI")) {
190 0         0 $self->ontology_name($eng->name());
191 0 0       0 $eng = $eng->engine() if $eng->can('engine');
192             }
193 4         21 $self->_ont_engine($eng);
194              
195             # flat files to parse
196 4 50 66     18 if(defined($defs_file_name) && defined($defs_url)){
197 0         0 $self->throw('cannot provide both -defs_file and -defs_url');
198             } else {
199 4 100       15 defined($defs_file_name) && $self->defs_file( $defs_file_name );
200 4 50       10 defined($defs_url) && $self->defs_url( $defs_url );
201             }
202              
203 4 50 66     27 if(defined($files) && defined($url)){
    100          
    50          
204             } elsif(defined($files)){
205 1 50       7 $self->{_flat_files} = $files ? ref($files) ? $files : [$files] : [];
    50          
206             } elsif(defined($url)){
207 0         0 $self->url($url);
208             }
209              
210             # ontology name (overrides implicit one through OntologyI engine)
211 4 50       17 $self->ontology_name($name) if $name;
212              
213             } # _initialize
214              
215             =head2 ontology_name
216              
217             Title : ontology_name
218             Usage : $obj->ontology_name($newval)
219             Function: Get/set the name of the ontology parsed by this module.
220             Example :
221             Returns : value of ontology_name (a scalar)
222             Args : on set, new value (a scalar or undef, optional)
223              
224              
225             =cut
226              
227             sub ontology_name{
228 14     14 1 26 my $self = shift;
229              
230 14 100       35 return $self->{'ontology_name'} = shift if @_;
231 11         44 return $self->{'ontology_name'};
232             }
233              
234              
235             =head2 parse
236              
237             Title : parse()
238             Usage : $parser->parse();
239             Function: Parses the files set with "new" or with methods
240             defs_file and _flat_files.
241              
242             Normally you should not need to call this method as it will
243             be called automatically upon the first call to
244             next_ontology().
245              
246             Returns : [Bio::Ontology::OntologyEngineI]
247             Args :
248              
249             =cut
250              
251             sub parse {
252 4     4 1 6 my $self = shift;
253              
254             #warn "PARSING";
255             # setup the default term factory if not done by anyone yet
256 4 50       11 $self->term_factory(Bio::Ontology::TermFactory->new(
257             -type => "Bio::Ontology::Term"))
258             unless $self->term_factory();
259              
260             # create the ontology object itself
261 4         19 my $ont = Bio::Ontology::Ontology->new(-name => $self->ontology_name(),
262             -engine => $self->_ont_engine());
263              
264             # parse definitions
265 4         20 while( my $term = $self->_next_term() ) {
266 9         17 $self->_add_term( $term, $ont );
267             }
268              
269             # set up the ontology of the relationship types
270 4         19 foreach ($self->_part_of_relationship(), $self->_is_a_relationship(), $self->_related_to_relationship()) {
271 12         26 $_->ontology($ont);
272             }
273              
274             # pre-seed the IO system with the first flat file if -file wasn't provided
275 4 100       13 if(! $self->_fh) {
276 1 50       10 if($self->url){
    50          
277 0 0       0 if(ref($self->url) eq 'ARRAY'){
278             #warn "BA";
279 0         0 foreach my $url (@{ $self->url }){
  0         0  
280             #warn $url;
281             #warn $ont;
282             #warn scalar($ont->get_all_terms());
283 0         0 $self->_initialize_io(-url => $url);
284 0         0 $self->_parse_flat_file($ont);
285             }
286 0         0 $self->close();
287             } else {
288 0         0 $self->_initialize_io(-url => $self->url);
289             }
290             } elsif($self->_flat_files){
291 1         4 $self->_initialize_io(-file => shift(@{$self->_flat_files()}));
  1         3  
292             }
293             }
294              
295 4         13 while($self->_fh) {
296 4         20 $self->_parse_flat_file($ont);
297             # advance to next flat file if more are available
298 4 50       8 if(@{$self->_flat_files()}) {
  4         45  
299 0         0 $self->close();
300 0         0 $self->_initialize_io(-file => shift(@{$self->_flat_files()}));
  0         0  
301             } else {
302 4         11 last; # nothing else to parse so terminate the loop
303             }
304             }
305 4         22 $self->_add_ontology($ont);
306              
307             # not needed anywhere, only because of backward compatibility
308 4         12 return $self->_ont_engine();
309             } # parse
310              
311             =head2 next_ontology
312              
313             Title : next_ontology
314             Usage :
315             Function: Get the next available ontology from the parser. This is the
316             method prescribed by Bio::OntologyIO.
317             Example :
318             Returns : An object implementing Bio::Ontology::OntologyI, and undef if
319             there is no more ontology in the input.
320             Args :
321              
322              
323             =cut
324              
325             sub next_ontology {
326 5     5 1 424 my $self = shift;
327              
328             # parse if not done already
329 5 100       23 $self->parse() unless exists($self->{'_ontologies'});
330             # return next available ontology
331 5 50       16 if(exists($self->{'_ontologies'})){
332 5         9 my $ont = shift (@{$self->{'_ontologies'}});
  5         12  
333 5 100       14 if($ont){
334 4         37 my $store = Bio::Ontology::OntologyStore->new();
335 4         19 $store->register_ontology($ont);
336 4         15 return $ont;
337             }
338             }
339 1         2 return;
340             }
341              
342             =head2 defs_file
343              
344             Title : defs_file
345             Usage : $parser->defs_file( "GO.defs" );
346             Function: Set/get for the term definitions filename.
347             Returns : The term definitions file name [string].
348             Args : On set, the term definitions file name [string] (optional).
349              
350             =cut
351              
352             sub defs_file {
353 2     2 1 4 my $self = shift;
354              
355 2 50       5 if ( @_ ) {
356 2         4 my $f = shift;
357 2         3 $self->{ "_defs_file_name" } = $f;
358 2 50       7 $self->_defs_io->close() if $self->_defs_io();
359 2 50       6 if(defined($f)) {
360 2         8 $self->_defs_io( Bio::Root::IO->new( -input => $f ) );
361             }
362             }
363 2         4 return $self->{ "_defs_file_name" };
364             } # defs_file
365              
366             sub defs_url {
367 0     0 0 0 my $self = shift;
368 0         0 my $val = shift;
369 0 0       0 if(defined($val)){
370 0         0 $self->{'_defs_url'} = $val;
371              
372 0 0       0 $self->_defs_io->close() if $self->_defs_io();
373 0         0 $self->_defs_io( Bio::Root::IO->new( -url => $val ) );
374             }
375 0         0 return $self->{'_defs_url'};
376             }
377              
378             sub url {
379 1     1 0 3 my $self = shift;
380 1         2 my $val = shift;
381 1 50       4 if(defined($val)){
382 0         0 $self->{'_url'} = $val;
383             }
384 1         6 return $self->{'_url'};
385             }
386              
387             =head2 close
388              
389             Title : close
390             Usage :
391             Function: Closes this ontology stream and associated file handles.
392              
393             Clients should call this method especially when they write
394             ontologies.
395              
396             We need to override this here in order to close the file
397             handle for the term definitions file.
398              
399             Example :
400             Returns : none
401             Args : none
402              
403              
404             =cut
405              
406             sub close{
407 4     4 1 9 my $self = shift;
408              
409             # first call the inherited implementation
410 4         33 $self->SUPER::close();
411             # then close the defs file io (if there is one)
412 4 100       20 $self->_defs_io->close() if $self->_defs_io();
413             }
414              
415             =head2 _flat_files
416              
417             Title : _flat_files
418             Usage : $files_to_parse = $parser->_flat_files();
419             Function: Get the array of ontology flat files that need to be parsed.
420              
421             Note that this array will decrease in elements over the
422             parsing process. Therefore, it\'s value outside of this
423             module will be limited. Also, be careful not to alter the
424             array unless you know what you are doing.
425              
426             Returns : a reference to an array of zero or more strings
427             Args : none
428              
429             =cut
430              
431             sub _flat_files {
432 6     6   13 my $self = shift;
433              
434 6 100       22 $self->{_flat_files} = [] unless exists($self->{_flat_files});
435 6         27 return $self->{_flat_files};
436             }
437              
438              
439             # INTERNAL METHODS
440             # ----------------
441              
442             =head2 _defs_io
443              
444             Title : _defs_io
445             Usage : $obj->_defs_io($newval)
446             Function: Get/set the Bio::Root::IO instance representing the
447             definition file, if provided (see defs_file()).
448             Example :
449             Returns : value of _defs_io (a Bio::Root::IO object)
450             Args : on set, new value (a Bio::Root::IO object or undef, optional)
451              
452             =cut
453              
454             sub _defs_io{
455 85     85   99 my $self = shift;
456              
457 85 100       130 return $self->{'_defs_io'} = shift if @_;
458 83         183 return $self->{'_defs_io'};
459             }
460              
461             sub _add_ontology {
462 4     4   6 my $self = shift;
463 4 50       16 $self->{'_ontologies'} = [] unless exists($self->{'_ontologies'});
464 4         10 foreach my $ont (@_) {
465 4 50 33     29 $self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI")
466             unless ref($ont) && $ont->isa("Bio::Ontology::OntologyI");
467             # the ontology name may have been auto-discovered while parsing
468             # the file
469 4 50       25 $ont->name($self->ontology_name) unless $ont->name();
470 4         6 push(@{$self->{'_ontologies'}}, $ont);
  4         14  
471             }
472             }
473              
474             # This simply delegates. See SimpleGOEngine.
475             sub _add_term {
476 227     227   322 my ( $self, $term, $ont ) = @_;
477 227 50 33     533 $term->ontology($ont) if $ont && (! $term->ontology);
478 227         376 $self->_ont_engine()->add_term( $term );
479             } # _add_term
480              
481              
482              
483             # This simply delegates. See SimpleGOEngine
484             sub _part_of_relationship {
485 51     51   62 my $self = shift;
486              
487 51         79 return $self->_ont_engine()->part_of_relationship(@_);
488             } # _part_of_relationship
489              
490              
491              
492             # This simply delegates. See SimpleGOEngine
493             sub _is_a_relationship {
494 211     211   223 my $self = shift;
495              
496 211         303 return $self->_ont_engine()->is_a_relationship(@_);
497             } # _is_a_relationship
498              
499             # This simply delegates. See SimpleGOEngine
500             sub _related_to_relationship {
501 4     4   10 my $self = shift;
502              
503 4         9 return $self->_ont_engine()->related_to_relationship(@_);
504             } # _is_a_relationship
505              
506              
507              
508             # This simply delegates. See SimpleGOEngine
509             sub _add_relationship {
510 254     254   352 my ( $self, $parent, $child, $type, $ont ) = @_;
511              
512             # note the triple terminology (subject,predicate,object) corresponds to
513             # (child,type,parent)
514 254         296 $self->_ont_engine()->add_relationship( $child, $type, $parent, $ont );
515              
516              
517             } # _add_relationship
518              
519              
520             # This simply delegates. See SimpleGOEngine
521             sub _has_term {
522 258     258   247 my $self = shift;
523              
524 258         392 return $self->_ont_engine()->has_term( @_ );
525             } # _add_term
526              
527              
528              
529             # This parses the relationships files
530             sub _parse_flat_file {
531 4     4   8 my $self = shift;
532 4         6 my $ont = shift;
533              
534 4         9 my @stack = ();
535 4         5 my $prev_spaces = -1;
536 4         8 my $prev_term = "";
537              
538 4         22 while ( my $line = $self->_readline() ) {
539              
540 248 100       466 if ( $line =~ /^!/ ) {
541 26         43 next;
542             }
543              
544             # split into term specifications
545 222         1038 my @termspecs = split(/ (?=[%<])/, $line);
546             # the first element is whitespace only
547 222 100       661 shift(@termspecs) if $termspecs[0] =~ /^\s*$/;
548              
549             # parse out the focus term
550 222         441 my $current_term = $self->_get_first_termid( $termspecs[0] );
551 222         405 my @syns = $self->_get_synonyms( $termspecs[0] );
552 222         363 my @sec_go_ids = $self->_get_secondary_termids( $termspecs[0] );
553 222         324 my @cross = $self->_get_db_cross_refs( $termspecs[0] );
554 222         225 my @cross_refs;
555 222         286 foreach my $cross_ref (@cross) {
556 81 100       151 $cross_ref eq $current_term && next;
557 4         6 push(@cross_refs, $cross_ref);
558             }
559            
560             # parse out the parents of the focus term
561 222         207 shift(@termspecs);
562 222         232 my @isa_parents = ();
563 222         224 my @partof_parents = ();
564 222         232 foreach my $parent (@termspecs) {
565 36 100       85 if (index($parent, "%") == 0) {
    50          
566 25         75 push(@isa_parents, $self->_get_first_termid($parent));
567             } elsif (index($parent, "<") == 0) {
568 11         20 push(@partof_parents, $self->_get_first_termid($parent));
569             } else {
570 0         0 $self->warn("unhandled relationship type in '".$parent."'");
571             }
572             }
573              
574 222 100       346 if ( ! $self->_has_term( $current_term ) ) {
575 204         347 my $term =$self->_create_ont_entry($self->_get_name($line,
576             $current_term),
577             $current_term );
578 204         431 $self->_add_term( $term, $ont );
579             }
580              
581 222         394 my $current_term_object = $self->_ont_engine()->get_terms( $current_term );
582 222         447 my $anno = $self->_to_annotation(\@cross_refs);
583 222         564 $current_term_object->add_dbxref(-dbxrefs => $anno);
584 222         496 $current_term_object->add_secondary_id( @sec_go_ids );
585 222         412 $current_term_object->add_synonym( @syns );
586 222 100       402 unless ( $line =~ /^\$/ ) {
587 218         361 $current_term_object->ontology( $ont );
588             }
589 222         289 foreach my $parent ( @isa_parents ) {
590 25 100       46 if ( ! $self->_has_term( $parent ) ) {
591 11         40 my $term = $self->_create_ont_entry($self->_get_name($line,
592             $parent),
593             $parent );
594 11         32 $self->_add_term( $term, $ont );
595             }
596              
597 25         68 $self->_add_relationship( $parent,
598             $current_term,
599             $self->_is_a_relationship(),
600             $ont);
601              
602             }
603 222         259 foreach my $parent ( @partof_parents ) {
604 11 100       21 if ( ! $self->_has_term( $parent ) ) {
605 3         7 my $term = $self->_create_ont_entry($self->_get_name($line,
606             $parent),
607             $parent );
608 3         7 $self->_add_term( $term, $ont );
609             }
610              
611 11         19 $self->_add_relationship( $parent,
612             $current_term,
613             $self->_part_of_relationship(),
614             $ont);
615             }
616              
617 222         298 my $current_spaces = $self->_count_spaces( $line );
618              
619 222 100       380 if ( $current_spaces != $prev_spaces ) {
620              
621 116 100       209 if ( $current_spaces == $prev_spaces + 1 ) {
    50          
622 75         106 push( @stack, $prev_term );
623             } elsif ( $current_spaces < $prev_spaces ) {
624 41         65 my $n = $prev_spaces - $current_spaces;
625 41         84 for ( my $i = 0; $i < $n; ++$i ) {
626 60         115 pop( @stack );
627             }
628             } else {
629 0         0 $self->throw( "format error (file ".$self->file.")" );
630             }
631             }
632              
633 222         344 my $parent = $stack[ @stack - 1 ];
634              
635             # add a relationship if the line isn\'t the one with the root term
636             # of the ontology (which is also the name of the ontology)
637 222 100       416 if ( index($line,'$') != 0 ) {
638             #adding @reltype@ syntax
639 218 50       493 if ( $line !~ /^\s*([<%~]|\@\w+?\@)/ ) {
640 0         0 $self->throw( "format error (file ".$self->file.") offending line:\n$line" );
641             }
642              
643 218         463 my($relstring) = $line =~ /^\s*([<%~]|\@[^\@]+?\@)/;
644              
645 218         240 my $reltype;
646              
647 218 100       389 if ($relstring eq '<') {
    50          
    0          
648 36         69 $reltype = $self->_part_of_relationship;
649             } elsif ($relstring eq '%') {
650 182         299 $reltype = $self->_is_a_relationship;
651             } elsif ($relstring eq '~') {
652 0         0 $reltype = $self->_related_to_relationship;
653             } else {
654 0         0 $relstring =~ s/\@//g;
655 0 0       0 if ($self->_ont_engine->get_relationship_type($relstring)) {
656 0         0 $reltype = $self->_ont_engine->get_relationship_type($relstring);
657             } else {
658 0         0 $self->_ont_engine->add_relationship_type($relstring, $ont);
659 0         0 $reltype = $self->_ont_engine->get_relationship_type($relstring);
660             }
661             }
662              
663             #my $reltype = ($line =~ /^\s*
664             #$self->_part_of_relationship() :
665             #$self->_is_a_relationship();
666 218         341 $self->_add_relationship( $parent, $current_term, $reltype, $ont);
667             }
668              
669 222         274 $prev_spaces = $current_spaces;
670 222         776 $prev_term = $current_term;
671             }
672 4         14 return $ont;
673             } # _parse_relationships_file
674              
675              
676              
677             # Parses the 1st term id number out of line.
678             sub _get_first_termid {
679 258     258   383 my ( $self, $line ) = @_;
680 258 50       751 if ( $line =~ /;\s*([A-Z_]{1,8}:\d{1,})/ ) {
681             # if ( $line =~ /;\s*(\w+:\w+)/ ) {
682 258         712 return $1;
683             }
684             else {
685 0         0 $self->throw( "format error: no term id in line \"$line\"" );
686             }
687              
688             } # _get_first_termid
689              
690              
691              
692             # Parses the name out of line.
693             sub _get_name {
694 218     218   302 my ( $self, $line, $termid ) = @_;
695              
696 218 50       5276 if ( $line =~ /([^;<%~]+);\s*$termid/ ) {
697 218         540 my $name = $1;
698             # remove trailing and leading whitespace
699 218         718 $name =~ s/\s+$//;
700 218         604 $name =~ s/^\s+//;
701 218         257 $name =~ s/\@.+?\@//;
702             # remove leading dollar character; also we default the name of the
703             # ontology to this name unless it is preset to something else
704 218 100       450 if(index($name,'$') == 0) {
705 3         9 $name = substr($name,1);
706             # replace underscores by spaces for setting the ontology name
707 3 50       10 $self->ontology_name(join(" ",split(/_/,$name)))
708             unless $self->ontology_name();
709             }
710 218         654 return $name;
711             }
712             else {
713 0         0 return;
714             }
715             } # _get_name
716              
717              
718             # Parses the synonyms out of line.
719             sub _get_synonyms {
720 222     222   296 my ( $self, $line ) = @_;
721              
722 222         228 my @synonyms = ();
723              
724 222         509 while ( $line =~ /synonym\s*:\s*([^;<%~]+)/g ) {
725 38         72 my $syn = $1;
726 38         128 $syn =~ s/\s+$//;
727 38         71 $syn =~ s/^\s+//;
728 38         93 push( @synonyms, $syn );
729             }
730 222         331 return @synonyms;
731              
732             } # _get_synonyms
733              
734              
735              
736             # Parses the db cross refs out of line.
737             sub _get_db_cross_refs {
738 222     222   283 my ( $self, $line ) = @_;
739              
740 222         218 my @refs = ();
741              
742 222         722 while ( $line =~ /;([^;<%~:]+:[^;<%~:]+)/g ) {
743 264         418 my $ref = $1;
744 264 100 100     1097 if ( $ref =~ /synonym/ || $ref =~ /[A-Z]{1,8}:\d{3,}/ ) {
745 183         381 next;
746             }
747 81         253 $ref =~ s/\s+$//;
748 81         199 $ref =~ s/^\s+//;
749              
750 81         196 $ref = $self->unescape( $ref );
751              
752 81 50       234 push( @refs, $ref ) if defined $ref;
753             }
754 222         384 return @refs;
755              
756             }
757              
758              
759             # Parses the secondary go ids out of a line
760             sub _get_secondary_termids {
761 222     222   275 my ( $self, $line ) = @_;
762 222         230 my @secs = ();
763              
764             # while ( $line =~ /,\s*([A-Z]{1,8}:\d{3,})/g ) {
765 222         406 while ( $line =~ /,\s*(\w+:\w+)/g ) {
766 1         3 my $sec = $1;
767 1         3 push( @secs, $sec );
768             }
769 222         261 return @secs;
770              
771             } # _get_secondary_termids
772              
773              
774             # Counts the spaces at the beginning of a line in the relationships files
775             sub _count_spaces {
776 222     222   298 my ( $self, $line ) = @_;
777              
778 222 100       591 if ( $line =~ /^(\s+)/ ) {
779 218         549 return length( $1 );
780             }
781             else {
782 4         11 return 0;
783             }
784             } # _count_spaces
785              
786              
787             # "next" method for parsing the defintions file
788             sub _next_term {
789 13     13   22 my ( $self ) = @_;
790              
791 13 100 100     25 if ( ($self->_done() == TRUE) || (! $self->_defs_io())) {
792 4         15 return;
793             }
794              
795 9         13 my $line = "";
796 9         11 my $termid = "";
797 9         14 my $next_term = $self->_term();
798 9         10 my $def = "";
799 9         9 my $comment = "";
800 9         11 my @def_refs = ();
801 9         9 my $isobsolete;
802              
803 9         12 while( $line = ( $self->_defs_io->_readline() ) ) {
804 62 100 100     316 if ( $line !~ /\S/
    100          
    100          
    100          
    50          
    0          
805             || $line =~ /^\s*!/ ) {
806 26         41 next;
807             }
808             elsif ( $line =~ /^\s*term:\s*(.+)/ ) {
809 9         17 $self->_term( $1 );
810 9 100       13 last if $self->_not_first_record();
811 2         6 $next_term = $1;
812 2         4 $self->_not_first_record( TRUE );
813             }
814             elsif ( $line =~ /^\s*[a-z]{0,8}id:\s*(.+)/ ) {
815 9         26 $termid = $1;
816             }
817             elsif ( $line =~ /^\s*definition:\s*(.+)/ ) {
818 9         30 $def = $self->unescape($1);
819 9 50       38 $isobsolete = 1 if index($def,"OBSOLETE") == 0;
820             }
821             elsif ( $line =~ /^\s*definition_reference:\s*(.+)/ ) {
822 9         19 push( @def_refs, $self->unescape($1) );
823             }
824             elsif ( $line =~ /^\s*comment:\s*(.+)/ ) {
825 0         0 $comment = $self->unescape($1);
826             }
827             }
828 9 100       19 $self->_done( TRUE ) unless $line; # we'll come back until done
829 9         24 return $self->_create_ont_entry( $next_term, $termid, $def,
830             $comment, \@def_refs, $isobsolete);
831             } # _next_term
832              
833              
834              
835             # Holds the GO engine to be parsed into
836             sub _ont_engine {
837 1239     1239   1440 my ( $self, $value ) = @_;
838              
839 1239 100       1626 if ( defined $value ) {
840 4         9 $self->{ "_ont_engine" } = $value;
841             }
842              
843 1239         2633 return $self->{ "_ont_engine" };
844             } # _ont_engine
845              
846              
847             # Used to create ontology terms.
848             # Arguments: name, id
849             sub _create_ont_entry {
850 227     227   454 my ( $self, $name, $termid, $def, $cmt, $dbxrefs, $obsolete ) = @_;
851              
852 227 50 33     806 if((!defined($obsolete)) && (index(lc($name),"obsolete") == 0)) {
853 0         0 $obsolete = 1;
854             }
855 227         350 my $anno = $self->_to_annotation($dbxrefs);
856 227         480 my $term = $self->term_factory->create_object(-name => $name,
857             -identifier => $termid,
858             -definition => $def,
859             -comment => $cmt,
860             -dbxrefs => $anno,
861             -is_obsolete => $obsolete);
862              
863 227         490 return $term;
864             } # _create_ont_entry
865              
866              
867              
868             # Holds whether first record or not
869             sub _not_first_record {
870 15     15   21 my ( $self, $value ) = @_;
871              
872 15 100       26 if ( defined $value ) {
873 6         12 $self->{ "_not_first_record" } = $value;
874             }
875              
876 15         29 return $self->{ "_not_first_record" };
877             } # _not_first_record
878              
879              
880              
881             # Holds whether done or not
882             sub _done {
883 19     19   28 my ( $self, $value ) = @_;
884              
885 19 100       37 if ( defined $value ) {
886 6         11 $self->{ "_done" } = $value;
887             }
888              
889 19         58 return $self->{ "_done" };
890             } # _done
891              
892              
893             # Holds a term.
894             sub _term {
895 22     22   42 my ( $self, $value ) = @_;
896              
897 22 100       32 if ( defined $value ) {
898 13         24 $self->{ "_term" } = $value;
899             }
900              
901 22         32 return $self->{ "_term" };
902             } # _term
903              
904             # convert simple strings to Bio::Annotation::DBLinks
905             sub _to_annotation {
906 449     449   547 my ($self , $links) = @_;
907 449 100       680 return unless $links;
908 231         212 my @dbxrefs;
909 231         219 for my $string (@{$links}) {
  231         315  
910 13         38 my ($db, $id) = split(':',$string);
911 13         54 push @dbxrefs, Bio::Annotation::DBLink->new(-database => $db, -primary_id => $id);
912             }
913 231         371 return \@dbxrefs;
914             }
915              
916             1;