File Coverage

Bio/OntologyIO/dagflat.pm
Criterion Covered Total %
statement 277 312 88.7
branch 105 152 69.0
condition 16 24 66.6
subroutine 40 41 97.5
pod 5 7 71.4
total 443 536 82.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::OntologyIO::dagflat
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Hilmar Lapp, hlapp at gmx.net
7             #
8             # (c) Christian M. Zmasek, czmasek-at-burnham.org, 2002.
9             # (c) Hilmar Lapp, hlapp at gmx.net, 2003.
10             # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
11             #
12             # You may distribute this module under the same terms as perl itself.
13             # Refer to the Perl Artistic License (see the license accompanying this
14             # software package, or see http://www.perl.com/language/misc/Artistic.html)
15             # for the terms under which you may use, modify, and redistribute this module.
16             #
17             # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
18             # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19             # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20             #
21             # You may distribute this module under the same terms as perl itself
22              
23             # POD documentation - main docs before the code
24              
25             =head1 NAME
26              
27             Bio::OntologyIO::dagflat - a base class parser for GO flat-file type formats
28              
29             =head1 SYNOPSIS
30              
31             use Bio::OntologyIO;
32              
33             # do not use directly -- use via Bio::OntologyIO
34             # e.g., the GO parser is a simple extension of this class
35             my $parser = Bio::OntologyIO->new
36             ( -format => "go",
37             -defs_file => "/home/czmasek/GO/GO.defs",
38             -files => ["/home/czmasek/GO/component.ontology",
39             "/home/czmasek/GO/function.ontology",
40             "/home/czmasek/GO/process.ontology"] );
41              
42             my $go_ontology = $parser->next_ontology();
43              
44             my $IS_A = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
45             my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" );
46             my $RELATED_TO = Bio::Ontology::RelationshipType->get_instance( "RELATED_TO" );
47              
48             =head1 DESCRIPTION
49              
50             Needs Graph.pm from CPAN.
51              
52             =head1 FEEDBACK
53              
54             =head2 Mailing Lists
55              
56             User feedback is an integral part of the evolution of this and other
57             Bioperl modules. Send your comments and suggestions preferably to the
58             Bioperl mailing lists Your participation is much appreciated.
59              
60             bioperl-l@bioperl.org - General discussion
61             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62              
63             =head2 Support
64              
65             Please direct usage questions or support issues to the mailing list:
66              
67             I
68              
69             rather than to the module maintainer directly. Many experienced and
70             reponsive experts will be able look at the problem and quickly
71             address it. Please include a thorough description of the problem
72             with code and data examples if at all possible.
73              
74             =head2 Reporting Bugs
75              
76             Report bugs to the Bioperl bug tracking system to help us keep track
77             the bugs and their resolution. Bug reports can be submitted via the
78             web:
79              
80             https://github.com/bioperl/bioperl-live/issues
81              
82             =head1 AUTHOR
83              
84             Christian M. Zmasek
85              
86             Email: czmasek-at-burnham.org or cmzmasek@yahoo.com
87              
88             WWW: http://monochrome-effect.net/
89              
90             Address:
91              
92             Genomics Institute of the Novartis Research Foundation
93             10675 John Jay Hopkins Drive
94             San Diego, CA 92121
95              
96             =head2 CONTRIBUTOR
97              
98             Hilmar Lapp, hlapp at gmx.net
99              
100             =head1 APPENDIX
101              
102             The rest of the documentation details each of the object
103             methods. Internal methods are usually preceded with a _
104              
105             =cut
106              
107              
108             # Let the code begin...
109              
110              
111             package Bio::OntologyIO::dagflat;
112              
113 2     2   9 use strict;
  2         2  
  2         44  
114              
115 2     2   6 use Bio::Root::IO;
  2         2  
  2         35  
116 2     2   797 use Bio::Ontology::OBOEngine;
  2         4  
  2         43  
117 2     2   10 use Bio::Ontology::Ontology;
  2         2  
  2         36  
118 2     2   6 use Bio::Ontology::OntologyStore;
  2         2  
  2         29  
119 2     2   5 use Bio::Ontology::TermFactory;
  2         2  
  2         27  
120 2     2   5 use Bio::Annotation::DBLink;
  2         2  
  2         36  
121              
122 2     2   6 use constant TRUE => 1;
  2         2  
  2         88  
123 2     2   6 use constant FALSE => 0;
  2         4  
  2         68  
124              
125              
126 2     2   6 use base qw(Bio::OntologyIO);
  2         3  
  2         4979  
127              
128              
129             =head2 new
130              
131             Title : new
132             Usage : $parser = Bio::OntologyIO->new(
133             -format => "go",
134             -defs_file => "/path/to/GO.defs",
135             -files => ["/path/to/component.ontology",
136             "/path/to/function.ontology",
137             "/path/to/process.ontology"] );
138             Function: Creates a new dagflat parser.
139             Returns : A new dagflat parser object, implementing Bio::OntologyIO.
140             Args : -defs_file => the name of the file holding the term
141             definitions
142             -files => a single ontology flat file holding the
143             term relationships, or an array ref holding
144             the file names (for GO, there will usually be
145             3 files: component.ontology, function.ontology,
146             process.ontology)
147             -file => if there is only a single flat file, it may
148             also be specified via the -file parameter
149             -ontology_name => the name of the ontology; if not specified the
150             parser will auto-discover it by using the term
151             that starts with a $, and converting underscores
152             to spaces
153             -engine => the Bio::Ontology::OntologyEngineI object
154             to be reused (will be created otherwise); note
155             that every Bio::Ontology::OntologyI will
156             qualify as well since that one inherits from the
157             former.
158              
159             See L.
160              
161             =cut
162              
163             # in reality, we let OntologyIO::new do the instantiation, and override
164             # _initialize for all initialization work
165             sub _initialize {
166 4     4   7 my ($self, %arg) = @_;
167              
168 4         22 my ( $defs_file_name,$files,$defs_url,$url,$name,$eng ) =
169             $self->_rearrange([qw( DEFS_FILE
170             FILES
171             DEFS_URL
172             URL
173             ONTOLOGY_NAME
174             ENGINE)
175             ],
176             %arg );
177              
178 4         12 delete($arg{-url}); #b/c GO has 3 files...
179              
180 4         19 $self->SUPER::_initialize( %arg );
181              
182 4         15 $self->_done( FALSE );
183 4         9 $self->_not_first_record( FALSE );
184 4         8 $self->_term( "" );
185 4         5 delete $self->{'_ontologies'};
186              
187             # ontology engine (and possibly name if it's an OntologyI)
188 4 50       23 $eng = Bio::Ontology::OBOEngine->new() unless $eng;
189 4 50       25 if($eng->isa("Bio::Ontology::OntologyI")) {
190 0         0 $self->ontology_name($eng->name());
191 0 0       0 $eng = $eng->engine() if $eng->can('engine');
192             }
193 4         15 $self->_ont_engine($eng);
194              
195             # flat files to parse
196 4 50 66     15 if(defined($defs_file_name) && defined($defs_url)){
197 0         0 $self->throw('cannot provide both -defs_file and -defs_url');
198             } else {
199 4 100       13 defined($defs_file_name) && $self->defs_file( $defs_file_name );
200 4 50       8 defined($defs_url) && $self->defs_url( $defs_url );
201             }
202              
203 4 50 66     21 if(defined($files) && defined($url)){
    100          
    50          
204             } elsif(defined($files)){
205 1 50       6 $self->{_flat_files} = $files ? ref($files) ? $files : [$files] : [];
    50          
206             } elsif(defined($url)){
207 0         0 $self->url($url);
208             }
209              
210             # ontology name (overrides implicit one through OntologyI engine)
211 4 50       15 $self->ontology_name($name) if $name;
212              
213             } # _initialize
214              
215             =head2 ontology_name
216              
217             Title : ontology_name
218             Usage : $obj->ontology_name($newval)
219             Function: Get/set the name of the ontology parsed by this module.
220             Example :
221             Returns : value of ontology_name (a scalar)
222             Args : on set, new value (a scalar or undef, optional)
223              
224              
225             =cut
226              
227             sub ontology_name{
228 14     14 1 14 my $self = shift;
229              
230 14 100       28 return $self->{'ontology_name'} = shift if @_;
231 11         41 return $self->{'ontology_name'};
232             }
233              
234              
235             =head2 parse
236              
237             Title : parse()
238             Usage : $parser->parse();
239             Function: Parses the files set with "new" or with methods
240             defs_file and _flat_files.
241              
242             Normally you should not need to call this method as it will
243             be called automatically upon the first call to
244             next_ontology().
245              
246             Returns : [Bio::Ontology::OntologyEngineI]
247             Args :
248              
249             =cut
250              
251             sub parse {
252 4     4 1 4 my $self = shift;
253              
254             #warn "PARSING";
255             # setup the default term factory if not done by anyone yet
256 4 50       22 $self->term_factory(Bio::Ontology::TermFactory->new(
257             -type => "Bio::Ontology::Term"))
258             unless $self->term_factory();
259              
260             # create the ontology object itself
261 4         14 my $ont = Bio::Ontology::Ontology->new(-name => $self->ontology_name(),
262             -engine => $self->_ont_engine());
263              
264             # parse definitions
265 4         14 while( my $term = $self->_next_term() ) {
266 9         14 $self->_add_term( $term, $ont );
267             }
268              
269             # set up the ontology of the relationship types
270 4         12 foreach ($self->_part_of_relationship(), $self->_is_a_relationship(), $self->_related_to_relationship()) {
271 12         18 $_->ontology($ont);
272             }
273              
274             # pre-seed the IO system with the first flat file if -file wasn't provided
275 4 100       11 if(! $self->_fh) {
276 1 50       6 if($self->url){
    50          
277 0 0       0 if(ref($self->url) eq 'ARRAY'){
278             #warn "BA";
279 0         0 foreach my $url (@{ $self->url }){
  0         0  
280             #warn $url;
281             #warn $ont;
282             #warn scalar($ont->get_all_terms());
283 0         0 $self->_initialize_io(-url => $url);
284 0         0 $self->_parse_flat_file($ont);
285             }
286 0         0 $self->close();
287             } else {
288 0         0 $self->_initialize_io(-url => $self->url);
289             }
290             } elsif($self->_flat_files){
291 1         3 $self->_initialize_io(-file => shift(@{$self->_flat_files()}));
  1         3  
292             }
293             }
294              
295 4         9 while($self->_fh) {
296 4         13 $self->_parse_flat_file($ont);
297             # advance to next flat file if more are available
298 4 50       6 if(@{$self->_flat_files()}) {
  4         21  
299 0         0 $self->close();
300 0         0 $self->_initialize_io(-file => shift(@{$self->_flat_files()}));
  0         0  
301             } else {
302 4         7 last; # nothing else to parse so terminate the loop
303             }
304             }
305 4         17 $self->_add_ontology($ont);
306              
307             # not needed anywhere, only because of backward compatibility
308 4         8 return $self->_ont_engine();
309             } # parse
310              
311             =head2 next_ontology
312              
313             Title : next_ontology
314             Usage :
315             Function: Get the next available ontology from the parser. This is the
316             method prescribed by Bio::OntologyIO.
317             Example :
318             Returns : An object implementing Bio::Ontology::OntologyI, and undef if
319             there is no more ontology in the input.
320             Args :
321              
322              
323             =cut
324              
325             sub next_ontology {
326 5     5 1 24 my $self = shift;
327              
328             # parse if not done already
329 5 100       19 $self->parse() unless exists($self->{'_ontologies'});
330             # return next available ontology
331 5 50       14 if(exists($self->{'_ontologies'})){
332 5         4 my $ont = shift (@{$self->{'_ontologies'}});
  5         10  
333 5 100       12 if($ont){
334 4         32 my $store = Bio::Ontology::OntologyStore->new();
335 4         11 $store->register_ontology($ont);
336 4         13 return $ont;
337             }
338             }
339 1         2 return;
340             }
341              
342             =head2 defs_file
343              
344             Title : defs_file
345             Usage : $parser->defs_file( "GO.defs" );
346             Function: Set/get for the term definitions filename.
347             Returns : The term definitions file name [string].
348             Args : On set, the term definitions file name [string] (optional).
349              
350             =cut
351              
352             sub defs_file {
353 2     2 1 2 my $self = shift;
354              
355 2 50       5 if ( @_ ) {
356 2         4 my $f = shift;
357 2         3 $self->{ "_defs_file_name" } = $f;
358 2 50       7 $self->_defs_io->close() if $self->_defs_io();
359 2 50       5 if(defined($f)) {
360 2         9 $self->_defs_io( Bio::Root::IO->new( -input => $f ) );
361             }
362             }
363 2         4 return $self->{ "_defs_file_name" };
364             } # defs_file
365              
366             sub defs_url {
367 0     0 0 0 my $self = shift;
368 0         0 my $val = shift;
369 0 0       0 if(defined($val)){
370 0         0 $self->{'_defs_url'} = $val;
371              
372 0 0       0 $self->_defs_io->close() if $self->_defs_io();
373 0         0 $self->_defs_io( Bio::Root::IO->new( -url => $val ) );
374             }
375 0         0 return $self->{'_defs_url'};
376             }
377              
378             sub url {
379 1     1 0 2 my $self = shift;
380 1         1 my $val = shift;
381 1 50       4 if(defined($val)){
382 0         0 $self->{'_url'} = $val;
383             }
384 1         3 return $self->{'_url'};
385             }
386              
387             =head2 close
388              
389             Title : close
390             Usage :
391             Function: Closes this ontology stream and associated file handles.
392              
393             Clients should call this method especially when they write
394             ontologies.
395              
396             We need to override this here in order to close the file
397             handle for the term definitions file.
398              
399             Example :
400             Returns : none
401             Args : none
402              
403              
404             =cut
405              
406             sub close{
407 4     4 1 6 my $self = shift;
408              
409             # first call the inherited implementation
410 4         26 $self->SUPER::close();
411             # then close the defs file io (if there is one)
412 4 100       12 $self->_defs_io->close() if $self->_defs_io();
413             }
414              
415             =head2 _flat_files
416              
417             Title : _flat_files
418             Usage : $files_to_parse = $parser->_flat_files();
419             Function: Get the array of ontology flat files that need to be parsed.
420              
421             Note that this array will decrease in elements over the
422             parsing process. Therefore, it\'s value outside of this
423             module will be limited. Also, be careful not to alter the
424             array unless you know what you are doing.
425              
426             Returns : a reference to an array of zero or more strings
427             Args : none
428              
429             =cut
430              
431             sub _flat_files {
432 6     6   10 my $self = shift;
433              
434 6 100       16 $self->{_flat_files} = [] unless exists($self->{_flat_files});
435 6         17 return $self->{_flat_files};
436             }
437              
438              
439             # INTERNAL METHODS
440             # ----------------
441              
442             =head2 _defs_io
443              
444             Title : _defs_io
445             Usage : $obj->_defs_io($newval)
446             Function: Get/set the Bio::Root::IO instance representing the
447             definition file, if provided (see defs_file()).
448             Example :
449             Returns : value of _defs_io (a Bio::Root::IO object)
450             Args : on set, new value (a Bio::Root::IO object or undef, optional)
451              
452             =cut
453              
454             sub _defs_io{
455 85     85   63 my $self = shift;
456              
457 85 100       119 return $self->{'_defs_io'} = shift if @_;
458 83         149 return $self->{'_defs_io'};
459             }
460              
461             sub _add_ontology {
462 4     4   4 my $self = shift;
463 4 50       15 $self->{'_ontologies'} = [] unless exists($self->{'_ontologies'});
464 4         7 foreach my $ont (@_) {
465 4 50 33     28 $self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI")
466             unless ref($ont) && $ont->isa("Bio::Ontology::OntologyI");
467             # the ontology name may have been auto-discovered while parsing
468             # the file
469 4 50       19 $ont->name($self->ontology_name) unless $ont->name();
470 4         5 push(@{$self->{'_ontologies'}}, $ont);
  4         14  
471             }
472             }
473              
474             # This simply delegates. See SimpleGOEngine.
475             sub _add_term {
476 227     227   215 my ( $self, $term, $ont ) = @_;
477 227 50 33     518 $term->ontology($ont) if $ont && (! $term->ontology);
478 227         314 $self->_ont_engine()->add_term( $term );
479             } # _add_term
480              
481              
482              
483             # This simply delegates. See SimpleGOEngine
484             sub _part_of_relationship {
485 51     51   36 my $self = shift;
486              
487 51         68 return $self->_ont_engine()->part_of_relationship(@_);
488             } # _part_of_relationship
489              
490              
491              
492             # This simply delegates. See SimpleGOEngine
493             sub _is_a_relationship {
494 211     211   159 my $self = shift;
495              
496 211         254 return $self->_ont_engine()->is_a_relationship(@_);
497             } # _is_a_relationship
498              
499             # This simply delegates. See SimpleGOEngine
500             sub _related_to_relationship {
501 4     4   4 my $self = shift;
502              
503 4         6 return $self->_ont_engine()->related_to_relationship(@_);
504             } # _is_a_relationship
505              
506              
507              
508             # This simply delegates. See SimpleGOEngine
509             sub _add_relationship {
510 254     254   256 my ( $self, $parent, $child, $type, $ont ) = @_;
511              
512             # note the triple terminology (subject,predicate,object) corresponds to
513             # (child,type,parent)
514 254         258 $self->_ont_engine()->add_relationship( $child, $type, $parent, $ont );
515              
516              
517             } # _add_relationship
518              
519              
520             # This simply delegates. See SimpleGOEngine
521             sub _has_term {
522 258     258   181 my $self = shift;
523              
524 258         345 return $self->_ont_engine()->has_term( @_ );
525             } # _add_term
526              
527              
528              
529             # This parses the relationships files
530             sub _parse_flat_file {
531 4     4   9 my $self = shift;
532 4         4 my $ont = shift;
533              
534 4         10 my @stack = ();
535 4         3 my $prev_spaces = -1;
536 4         5 my $prev_term = "";
537              
538 4         19 while ( my $line = $self->_readline() ) {
539              
540 248 100       397 if ( $line =~ /^!/ ) {
541 26         42 next;
542             }
543              
544             # split into term specifications
545 222         964 my @termspecs = split(/ (?=[%<])/, $line);
546             # the first element is whitespace only
547 222 100       573 shift(@termspecs) if $termspecs[0] =~ /^\s*$/;
548              
549             # parse out the focus term
550 222         384 my $current_term = $self->_get_first_termid( $termspecs[0] );
551 222         319 my @syns = $self->_get_synonyms( $termspecs[0] );
552 222         299 my @sec_go_ids = $self->_get_secondary_termids( $termspecs[0] );
553 222         307 my @cross = $self->_get_db_cross_refs( $termspecs[0] );
554 222         154 my @cross_refs;
555 222         233 foreach my $cross_ref (@cross) {
556 81 100       141 $cross_ref eq $current_term && next;
557 4         6 push(@cross_refs, $cross_ref);
558             }
559            
560             # parse out the parents of the focus term
561 222         166 shift(@termspecs);
562 222         202 my @isa_parents = ();
563 222         155 my @partof_parents = ();
564 222         179 foreach my $parent (@termspecs) {
565 36 100       56 if (index($parent, "%") == 0) {
    50          
566 25         39 push(@isa_parents, $self->_get_first_termid($parent));
567             } elsif (index($parent, "<") == 0) {
568 11         12 push(@partof_parents, $self->_get_first_termid($parent));
569             } else {
570 0         0 $self->warn("unhandled relationship type in '".$parent."'");
571             }
572             }
573              
574 222 100       303 if ( ! $self->_has_term( $current_term ) ) {
575 204         266 my $term =$self->_create_ont_entry($self->_get_name($line,
576             $current_term),
577             $current_term );
578 204         374 $self->_add_term( $term, $ont );
579             }
580              
581 222         366 my $current_term_object = $self->_ont_engine()->get_terms( $current_term );
582 222         392 my $anno = $self->_to_annotation(\@cross_refs);
583 222         513 $current_term_object->add_dbxref(-dbxrefs => $anno);
584 222         413 $current_term_object->add_secondary_id( @sec_go_ids );
585 222         324 $current_term_object->add_synonym( @syns );
586 222 100       363 unless ( $line =~ /^\$/ ) {
587 218         305 $current_term_object->ontology( $ont );
588             }
589 222         224 foreach my $parent ( @isa_parents ) {
590 25 100       32 if ( ! $self->_has_term( $parent ) ) {
591 11         23 my $term = $self->_create_ont_entry($self->_get_name($line,
592             $parent),
593             $parent );
594 11         24 $self->_add_term( $term, $ont );
595             }
596              
597 25         43 $self->_add_relationship( $parent,
598             $current_term,
599             $self->_is_a_relationship(),
600             $ont);
601              
602             }
603 222         181 foreach my $parent ( @partof_parents ) {
604 11 100       14 if ( ! $self->_has_term( $parent ) ) {
605 3         9 my $term = $self->_create_ont_entry($self->_get_name($line,
606             $parent),
607             $parent );
608 3         7 $self->_add_term( $term, $ont );
609             }
610              
611 11         18 $self->_add_relationship( $parent,
612             $current_term,
613             $self->_part_of_relationship(),
614             $ont);
615             }
616              
617 222         292 my $current_spaces = $self->_count_spaces( $line );
618              
619 222 100       318 if ( $current_spaces != $prev_spaces ) {
620              
621 116 100       161 if ( $current_spaces == $prev_spaces + 1 ) {
    50          
622 75         90 push( @stack, $prev_term );
623             } elsif ( $current_spaces < $prev_spaces ) {
624 41         40 my $n = $prev_spaces - $current_spaces;
625 41         68 for ( my $i = 0; $i < $n; ++$i ) {
626 60         106 pop( @stack );
627             }
628             } else {
629 0         0 $self->throw( "format error (file ".$self->file.")" );
630             }
631             }
632              
633 222         289 my $parent = $stack[ @stack - 1 ];
634              
635             # add a relationship if the line isn\'t the one with the root term
636             # of the ontology (which is also the name of the ontology)
637 222 100       387 if ( index($line,'$') != 0 ) {
638             #adding @reltype@ syntax
639 218 50       447 if ( $line !~ /^\s*([<%~]|\@\w+?\@)/ ) {
640 0         0 $self->throw( "format error (file ".$self->file.") offending line:\n$line" );
641             }
642              
643 218         410 my($relstring) = $line =~ /^\s*([<%~]|\@[^\@]+?\@)/;
644              
645 218         163 my $reltype;
646              
647 218 100       365 if ($relstring eq '<') {
    50          
    0          
648 36         59 $reltype = $self->_part_of_relationship;
649             } elsif ($relstring eq '%') {
650 182         264 $reltype = $self->_is_a_relationship;
651             } elsif ($relstring eq '~') {
652 0         0 $reltype = $self->_related_to_relationship;
653             } else {
654 0         0 $relstring =~ s/\@//g;
655 0 0       0 if ($self->_ont_engine->get_relationship_type($relstring)) {
656 0         0 $reltype = $self->_ont_engine->get_relationship_type($relstring);
657             } else {
658 0         0 $self->_ont_engine->add_relationship_type($relstring, $ont);
659 0         0 $reltype = $self->_ont_engine->get_relationship_type($relstring);
660             }
661             }
662              
663             #my $reltype = ($line =~ /^\s*
664             #$self->_part_of_relationship() :
665             #$self->_is_a_relationship();
666 218         300 $self->_add_relationship( $parent, $current_term, $reltype, $ont);
667             }
668              
669 222         178 $prev_spaces = $current_spaces;
670 222         738 $prev_term = $current_term;
671             }
672 4         9 return $ont;
673             } # _parse_relationships_file
674              
675              
676              
677             # Parses the 1st term id number out of line.
678             sub _get_first_termid {
679 258     258   241 my ( $self, $line ) = @_;
680 258 50       664 if ( $line =~ /;\s*([A-Z_]{1,8}:\d{1,})/ ) {
681             # if ( $line =~ /;\s*(\w+:\w+)/ ) {
682 258         590 return $1;
683             }
684             else {
685 0         0 $self->throw( "format error: no term id in line \"$line\"" );
686             }
687              
688             } # _get_first_termid
689              
690              
691              
692             # Parses the name out of line.
693             sub _get_name {
694 218     218   204 my ( $self, $line, $termid ) = @_;
695              
696 218 50       4234 if ( $line =~ /([^;<%~]+);\s*$termid/ ) {
697 218         334 my $name = $1;
698             # remove trailing and leading whitespace
699 218         517 $name =~ s/\s+$//;
700 218         251 $name =~ s/^\s+//;
701 218         163 $name =~ s/\@.+?\@//;
702             # remove leading dollar character; also we default the name of the
703             # ontology to this name unless it is preset to something else
704 218 100       381 if(index($name,'$') == 0) {
705 3         6 $name = substr($name,1);
706             # replace underscores by spaces for setting the ontology name
707 3 50       8 $self->ontology_name(join(" ",split(/_/,$name)))
708             unless $self->ontology_name();
709             }
710 218         557 return $name;
711             }
712             else {
713 0         0 return;
714             }
715             } # _get_name
716              
717              
718             # Parses the synonyms out of line.
719             sub _get_synonyms {
720 222     222   200 my ( $self, $line ) = @_;
721              
722 222         186 my @synonyms = ();
723              
724 222         433 while ( $line =~ /synonym\s*:\s*([^;<%~]+)/g ) {
725 38         45 my $syn = $1;
726 38         88 $syn =~ s/\s+$//;
727 38         43 $syn =~ s/^\s+//;
728 38         81 push( @synonyms, $syn );
729             }
730 222         270 return @synonyms;
731              
732             } # _get_synonyms
733              
734              
735              
736             # Parses the db cross refs out of line.
737             sub _get_db_cross_refs {
738 222     222   171 my ( $self, $line ) = @_;
739              
740 222         164 my @refs = ();
741              
742 222         632 while ( $line =~ /;([^;<%~:]+:[^;<%~:]+)/g ) {
743 264         304 my $ref = $1;
744 264 100 100     1106 if ( $ref =~ /synonym/ || $ref =~ /[A-Z]{1,8}:\d{3,}/ ) {
745 183         332 next;
746             }
747 81         173 $ref =~ s/\s+$//;
748 81         124 $ref =~ s/^\s+//;
749              
750 81         166 $ref = $self->unescape( $ref );
751              
752 81 50       242 push( @refs, $ref ) if defined $ref;
753             }
754 222         292 return @refs;
755              
756             }
757              
758              
759             # Parses the secondary go ids out of a line
760             sub _get_secondary_termids {
761 222     222   197 my ( $self, $line ) = @_;
762 222         166 my @secs = ();
763              
764             # while ( $line =~ /,\s*([A-Z]{1,8}:\d{3,})/g ) {
765 222         358 while ( $line =~ /,\s*(\w+:\w+)/g ) {
766 1         3 my $sec = $1;
767 1         2 push( @secs, $sec );
768             }
769 222         192 return @secs;
770              
771             } # _get_secondary_termids
772              
773              
774             # Counts the spaces at the beginning of a line in the relationships files
775             sub _count_spaces {
776 222     222   213 my ( $self, $line ) = @_;
777              
778 222 100       518 if ( $line =~ /^(\s+)/ ) {
779 218         790 return length( $1 );
780             }
781             else {
782 4         7 return 0;
783             }
784             } # _count_spaces
785              
786              
787             # "next" method for parsing the defintions file
788             sub _next_term {
789 13     13   10 my ( $self ) = @_;
790              
791 13 100 100     21 if ( ($self->_done() == TRUE) || (! $self->_defs_io())) {
792 4         8 return;
793             }
794              
795 9         9 my $line = "";
796 9         6 my $termid = "";
797 9         13 my $next_term = $self->_term();
798 9         8 my $def = "";
799 9         7 my $comment = "";
800 9         8 my @def_refs = ();
801 9         8 my $isobsolete;
802              
803 9         12 while( $line = ( $self->_defs_io->_readline() ) ) {
804 62 100 100     309 if ( $line !~ /\S/
    100          
    100          
    100          
    50          
    0          
805             || $line =~ /^\s*!/ ) {
806 26         34 next;
807             }
808             elsif ( $line =~ /^\s*term:\s*(.+)/ ) {
809 9         14 $self->_term( $1 );
810 9 100       11 last if $self->_not_first_record();
811 2         3 $next_term = $1;
812 2         4 $self->_not_first_record( TRUE );
813             }
814             elsif ( $line =~ /^\s*[a-z]{0,8}id:\s*(.+)/ ) {
815 9         20 $termid = $1;
816             }
817             elsif ( $line =~ /^\s*definition:\s*(.+)/ ) {
818 9         23 $def = $self->unescape($1);
819 9 50       26 $isobsolete = 1 if index($def,"OBSOLETE") == 0;
820             }
821             elsif ( $line =~ /^\s*definition_reference:\s*(.+)/ ) {
822 9         14 push( @def_refs, $self->unescape($1) );
823             }
824             elsif ( $line =~ /^\s*comment:\s*(.+)/ ) {
825 0         0 $comment = $self->unescape($1);
826             }
827             }
828 9 100       16 $self->_done( TRUE ) unless $line; # we'll come back until done
829 9         17 return $self->_create_ont_entry( $next_term, $termid, $def,
830             $comment, \@def_refs, $isobsolete);
831             } # _next_term
832              
833              
834              
835             # Holds the GO engine to be parsed into
836             sub _ont_engine {
837 1239     1239   925 my ( $self, $value ) = @_;
838              
839 1239 100       1628 if ( defined $value ) {
840 4         6 $self->{ "_ont_engine" } = $value;
841             }
842              
843 1239         2382 return $self->{ "_ont_engine" };
844             } # _ont_engine
845              
846              
847             # Used to create ontology terms.
848             # Arguments: name, id
849             sub _create_ont_entry {
850 227     227   276 my ( $self, $name, $termid, $def, $cmt, $dbxrefs, $obsolete ) = @_;
851              
852 227 50 33     842 if((!defined($obsolete)) && (index(lc($name),"obsolete") == 0)) {
853 0         0 $obsolete = 1;
854             }
855 227         357 my $anno = $self->_to_annotation($dbxrefs);
856 227         473 my $term = $self->term_factory->create_object(-name => $name,
857             -identifier => $termid,
858             -definition => $def,
859             -comment => $cmt,
860             -dbxrefs => $anno,
861             -is_obsolete => $obsolete);
862              
863 227         376 return $term;
864             } # _create_ont_entry
865              
866              
867              
868             # Holds whether first record or not
869             sub _not_first_record {
870 15     15   15 my ( $self, $value ) = @_;
871              
872 15 100       24 if ( defined $value ) {
873 6         9 $self->{ "_not_first_record" } = $value;
874             }
875              
876 15         22 return $self->{ "_not_first_record" };
877             } # _not_first_record
878              
879              
880              
881             # Holds whether done or not
882             sub _done {
883 19     19   16 my ( $self, $value ) = @_;
884              
885 19 100       31 if ( defined $value ) {
886 6         8 $self->{ "_done" } = $value;
887             }
888              
889 19         45 return $self->{ "_done" };
890             } # _done
891              
892              
893             # Holds a term.
894             sub _term {
895 22     22   25 my ( $self, $value ) = @_;
896              
897 22 100       28 if ( defined $value ) {
898 13         24 $self->{ "_term" } = $value;
899             }
900              
901 22         25 return $self->{ "_term" };
902             } # _term
903              
904             # convert simple strings to Bio::Annotation::DBLinks
905             sub _to_annotation {
906 449     449   357 my ($self , $links) = @_;
907 449 100       697 return unless $links;
908 231         149 my @dbxrefs;
909 231         149 for my $string (@{$links}) {
  231         293  
910 13         33 my ($db, $id) = split(':',$string);
911 13         55 push @dbxrefs, Bio::Annotation::DBLink->new(-database => $db, -primary_id => $id);
912             }
913 231         260 return \@dbxrefs;
914             }
915              
916             1;