File Coverage

Bio/OntologyIO/dagflat.pm
Criterion Covered Total %
statement 277 312 88.7
branch 105 152 69.0
condition 16 24 66.6
subroutine 40 41 97.5
pod 5 7 71.4
total 443 536 82.6


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::OntologyIO::dagflat
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Hilmar Lapp, hlapp at gmx.net
7             #
8             # (c) Christian M. Zmasek, czmasek-at-burnham.org, 2002.
9             # (c) Hilmar Lapp, hlapp at gmx.net, 2003.
10             # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2002.
11             #
12             # You may distribute this module under the same terms as perl itself.
13             # Refer to the Perl Artistic License (see the license accompanying this
14             # software package, or see http://www.perl.com/language/misc/Artistic.html)
15             # for the terms under which you may use, modify, and redistribute this module.
16             #
17             # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
18             # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
19             # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
20             #
21             # You may distribute this module under the same terms as perl itself
22              
23             # POD documentation - main docs before the code
24              
25             =head1 NAME
26              
27             Bio::OntologyIO::dagflat - a base class parser for GO flat-file type formats
28              
29             =head1 SYNOPSIS
30              
31             use Bio::OntologyIO;
32              
33             # do not use directly -- use via Bio::OntologyIO
34             # e.g., the GO parser is a simple extension of this class
35             my $parser = Bio::OntologyIO->new
36             ( -format => "go",
37             -defs_file => "/home/czmasek/GO/GO.defs",
38             -files => ["/home/czmasek/GO/component.ontology",
39             "/home/czmasek/GO/function.ontology",
40             "/home/czmasek/GO/process.ontology"] );
41              
42             my $go_ontology = $parser->next_ontology();
43              
44             my $IS_A = Bio::Ontology::RelationshipType->get_instance( "IS_A" );
45             my $PART_OF = Bio::Ontology::RelationshipType->get_instance( "PART_OF" );
46             my $RELATED_TO = Bio::Ontology::RelationshipType->get_instance( "RELATED_TO" );
47              
48             =head1 DESCRIPTION
49              
50             Needs Graph.pm from CPAN.
51              
52             =head1 FEEDBACK
53              
54             =head2 Mailing Lists
55              
56             User feedback is an integral part of the evolution of this and other
57             Bioperl modules. Send your comments and suggestions preferably to the
58             Bioperl mailing lists Your participation is much appreciated.
59              
60             bioperl-l@bioperl.org - General discussion
61             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
62              
63             =head2 Support
64              
65             Please direct usage questions or support issues to the mailing list:
66              
67             I
68              
69             rather than to the module maintainer directly. Many experienced and
70             reponsive experts will be able look at the problem and quickly
71             address it. Please include a thorough description of the problem
72             with code and data examples if at all possible.
73              
74             =head2 Reporting Bugs
75              
76             Report bugs to the Bioperl bug tracking system to help us keep track
77             the bugs and their resolution. Bug reports can be submitted via the
78             web:
79              
80             https://github.com/bioperl/bioperl-live/issues
81              
82             =head1 AUTHOR
83              
84             Christian M. Zmasek
85              
86             Email: czmasek-at-burnham.org or cmzmasek@yahoo.com
87              
88             WWW: http://monochrome-effect.net/
89              
90             Address:
91              
92             Genomics Institute of the Novartis Research Foundation
93             10675 John Jay Hopkins Drive
94             San Diego, CA 92121
95              
96             =head2 CONTRIBUTOR
97              
98             Hilmar Lapp, hlapp at gmx.net
99              
100             =head1 APPENDIX
101              
102             The rest of the documentation details each of the object
103             methods. Internal methods are usually preceded with a _
104              
105             =cut
106              
107              
108             # Let the code begin...
109              
110              
111             package Bio::OntologyIO::dagflat;
112              
113 2     2   9 use strict;
  2         3  
  2         45  
114              
115 2     2   6 use Bio::Root::IO;
  2         1  
  2         34  
116 2     2   771 use Bio::Ontology::OBOEngine;
  2         4  
  2         42  
117 2     2   10 use Bio::Ontology::Ontology;
  2         2  
  2         36  
118 2     2   7 use Bio::Ontology::OntologyStore;
  2         2  
  2         28  
119 2     2   6 use Bio::Ontology::TermFactory;
  2         2  
  2         24  
120 2     2   7 use Bio::Annotation::DBLink;
  2         2  
  2         37  
121              
122 2     2   6 use constant TRUE => 1;
  2         2  
  2         88  
123 2     2   7 use constant FALSE => 0;
  2         2  
  2         69  
124              
125              
126 2     2   6 use base qw(Bio::OntologyIO);
  2         2  
  2         5118  
127              
128              
129             =head2 new
130              
131             Title : new
132             Usage : $parser = Bio::OntologyIO->new(
133             -format => "go",
134             -defs_file => "/path/to/GO.defs",
135             -files => ["/path/to/component.ontology",
136             "/path/to/function.ontology",
137             "/path/to/process.ontology"] );
138             Function: Creates a new dagflat parser.
139             Returns : A new dagflat parser object, implementing Bio::OntologyIO.
140             Args : -defs_file => the name of the file holding the term
141             definitions
142             -files => a single ontology flat file holding the
143             term relationships, or an array ref holding
144             the file names (for GO, there will usually be
145             3 files: component.ontology, function.ontology,
146             process.ontology)
147             -file => if there is only a single flat file, it may
148             also be specified via the -file parameter
149             -ontology_name => the name of the ontology; if not specified the
150             parser will auto-discover it by using the term
151             that starts with a $, and converting underscores
152             to spaces
153             -engine => the Bio::Ontology::OntologyEngineI object
154             to be reused (will be created otherwise); note
155             that every Bio::Ontology::OntologyI will
156             qualify as well since that one inherits from the
157             former.
158              
159             See L.
160              
161             =cut
162              
163             # in reality, we let OntologyIO::new do the instantiation, and override
164             # _initialize for all initialization work
165             sub _initialize {
166 4     4   8 my ($self, %arg) = @_;
167              
168 4         24 my ( $defs_file_name,$files,$defs_url,$url,$name,$eng ) =
169             $self->_rearrange([qw( DEFS_FILE
170             FILES
171             DEFS_URL
172             URL
173             ONTOLOGY_NAME
174             ENGINE)
175             ],
176             %arg );
177              
178 4         11 delete($arg{-url}); #b/c GO has 3 files...
179              
180 4         15 $self->SUPER::_initialize( %arg );
181              
182 4         14 $self->_done( FALSE );
183 4         9 $self->_not_first_record( FALSE );
184 4         10 $self->_term( "" );
185 4         5 delete $self->{'_ontologies'};
186              
187             # ontology engine (and possibly name if it's an OntologyI)
188 4 50       24 $eng = Bio::Ontology::OBOEngine->new() unless $eng;
189 4 50       23 if($eng->isa("Bio::Ontology::OntologyI")) {
190 0         0 $self->ontology_name($eng->name());
191 0 0       0 $eng = $eng->engine() if $eng->can('engine');
192             }
193 4         13 $self->_ont_engine($eng);
194              
195             # flat files to parse
196 4 50 66     13 if(defined($defs_file_name) && defined($defs_url)){
197 0         0 $self->throw('cannot provide both -defs_file and -defs_url');
198             } else {
199 4 100       12 defined($defs_file_name) && $self->defs_file( $defs_file_name );
200 4 50       6 defined($defs_url) && $self->defs_url( $defs_url );
201             }
202              
203 4 50 66     19 if(defined($files) && defined($url)){
    100          
    50          
204             } elsif(defined($files)){
205 1 50       5 $self->{_flat_files} = $files ? ref($files) ? $files : [$files] : [];
    50          
206             } elsif(defined($url)){
207 0         0 $self->url($url);
208             }
209              
210             # ontology name (overrides implicit one through OntologyI engine)
211 4 50       13 $self->ontology_name($name) if $name;
212              
213             } # _initialize
214              
215             =head2 ontology_name
216              
217             Title : ontology_name
218             Usage : $obj->ontology_name($newval)
219             Function: Get/set the name of the ontology parsed by this module.
220             Example :
221             Returns : value of ontology_name (a scalar)
222             Args : on set, new value (a scalar or undef, optional)
223              
224              
225             =cut
226              
227             sub ontology_name{
228 14     14 1 16 my $self = shift;
229              
230 14 100       25 return $self->{'ontology_name'} = shift if @_;
231 11         37 return $self->{'ontology_name'};
232             }
233              
234              
235             =head2 parse
236              
237             Title : parse()
238             Usage : $parser->parse();
239             Function: Parses the files set with "new" or with methods
240             defs_file and _flat_files.
241              
242             Normally you should not need to call this method as it will
243             be called automatically upon the first call to
244             next_ontology().
245              
246             Returns : [Bio::Ontology::OntologyEngineI]
247             Args :
248              
249             =cut
250              
251             sub parse {
252 4     4 1 3 my $self = shift;
253              
254             #warn "PARSING";
255             # setup the default term factory if not done by anyone yet
256 4 50       9 $self->term_factory(Bio::Ontology::TermFactory->new(
257             -type => "Bio::Ontology::Term"))
258             unless $self->term_factory();
259              
260             # create the ontology object itself
261 4         13 my $ont = Bio::Ontology::Ontology->new(-name => $self->ontology_name(),
262             -engine => $self->_ont_engine());
263              
264             # parse definitions
265 4         11 while( my $term = $self->_next_term() ) {
266 9         17 $self->_add_term( $term, $ont );
267             }
268              
269             # set up the ontology of the relationship types
270 4         24 foreach ($self->_part_of_relationship(), $self->_is_a_relationship(), $self->_related_to_relationship()) {
271 12         16 $_->ontology($ont);
272             }
273              
274             # pre-seed the IO system with the first flat file if -file wasn't provided
275 4 100       9 if(! $self->_fh) {
276 1 50       6 if($self->url){
    50          
277 0 0       0 if(ref($self->url) eq 'ARRAY'){
278             #warn "BA";
279 0         0 foreach my $url (@{ $self->url }){
  0         0  
280             #warn $url;
281             #warn $ont;
282             #warn scalar($ont->get_all_terms());
283 0         0 $self->_initialize_io(-url => $url);
284 0         0 $self->_parse_flat_file($ont);
285             }
286 0         0 $self->close();
287             } else {
288 0         0 $self->_initialize_io(-url => $self->url);
289             }
290             } elsif($self->_flat_files){
291 1         3 $self->_initialize_io(-file => shift(@{$self->_flat_files()}));
  1         1  
292             }
293             }
294              
295 4         7 while($self->_fh) {
296 4         12 $self->_parse_flat_file($ont);
297             # advance to next flat file if more are available
298 4 50       4 if(@{$self->_flat_files()}) {
  4         23  
299 0         0 $self->close();
300 0         0 $self->_initialize_io(-file => shift(@{$self->_flat_files()}));
  0         0  
301             } else {
302 4         8 last; # nothing else to parse so terminate the loop
303             }
304             }
305 4         19 $self->_add_ontology($ont);
306              
307             # not needed anywhere, only because of backward compatibility
308 4         8 return $self->_ont_engine();
309             } # parse
310              
311             =head2 next_ontology
312              
313             Title : next_ontology
314             Usage :
315             Function: Get the next available ontology from the parser. This is the
316             method prescribed by Bio::OntologyIO.
317             Example :
318             Returns : An object implementing Bio::Ontology::OntologyI, and undef if
319             there is no more ontology in the input.
320             Args :
321              
322              
323             =cut
324              
325             sub next_ontology {
326 5     5 1 19 my $self = shift;
327              
328             # parse if not done already
329 5 100       19 $self->parse() unless exists($self->{'_ontologies'});
330             # return next available ontology
331 5 50       15 if(exists($self->{'_ontologies'})){
332 5         4 my $ont = shift (@{$self->{'_ontologies'}});
  5         10  
333 5 100       10 if($ont){
334 4         30 my $store = Bio::Ontology::OntologyStore->new();
335 4         17 $store->register_ontology($ont);
336 4         13 return $ont;
337             }
338             }
339 1         1 return;
340             }
341              
342             =head2 defs_file
343              
344             Title : defs_file
345             Usage : $parser->defs_file( "GO.defs" );
346             Function: Set/get for the term definitions filename.
347             Returns : The term definitions file name [string].
348             Args : On set, the term definitions file name [string] (optional).
349              
350             =cut
351              
352             sub defs_file {
353 2     2 1 2 my $self = shift;
354              
355 2 50       4 if ( @_ ) {
356 2         2 my $f = shift;
357 2         3 $self->{ "_defs_file_name" } = $f;
358 2 50       5 $self->_defs_io->close() if $self->_defs_io();
359 2 50       4 if(defined($f)) {
360 2         7 $self->_defs_io( Bio::Root::IO->new( -input => $f ) );
361             }
362             }
363 2         4 return $self->{ "_defs_file_name" };
364             } # defs_file
365              
366             sub defs_url {
367 0     0 0 0 my $self = shift;
368 0         0 my $val = shift;
369 0 0       0 if(defined($val)){
370 0         0 $self->{'_defs_url'} = $val;
371              
372 0 0       0 $self->_defs_io->close() if $self->_defs_io();
373 0         0 $self->_defs_io( Bio::Root::IO->new( -url => $val ) );
374             }
375 0         0 return $self->{'_defs_url'};
376             }
377              
378             sub url {
379 1     1 0 2 my $self = shift;
380 1         1 my $val = shift;
381 1 50       2 if(defined($val)){
382 0         0 $self->{'_url'} = $val;
383             }
384 1         5 return $self->{'_url'};
385             }
386              
387             =head2 close
388              
389             Title : close
390             Usage :
391             Function: Closes this ontology stream and associated file handles.
392              
393             Clients should call this method especially when they write
394             ontologies.
395              
396             We need to override this here in order to close the file
397             handle for the term definitions file.
398              
399             Example :
400             Returns : none
401             Args : none
402              
403              
404             =cut
405              
406             sub close{
407 4     4 1 7 my $self = shift;
408              
409             # first call the inherited implementation
410 4         33 $self->SUPER::close();
411             # then close the defs file io (if there is one)
412 4 100       11 $self->_defs_io->close() if $self->_defs_io();
413             }
414              
415             =head2 _flat_files
416              
417             Title : _flat_files
418             Usage : $files_to_parse = $parser->_flat_files();
419             Function: Get the array of ontology flat files that need to be parsed.
420              
421             Note that this array will decrease in elements over the
422             parsing process. Therefore, it\'s value outside of this
423             module will be limited. Also, be careful not to alter the
424             array unless you know what you are doing.
425              
426             Returns : a reference to an array of zero or more strings
427             Args : none
428              
429             =cut
430              
431             sub _flat_files {
432 6     6   6 my $self = shift;
433              
434 6 100       22 $self->{_flat_files} = [] unless exists($self->{_flat_files});
435 6         17 return $self->{_flat_files};
436             }
437              
438              
439             # INTERNAL METHODS
440             # ----------------
441              
442             =head2 _defs_io
443              
444             Title : _defs_io
445             Usage : $obj->_defs_io($newval)
446             Function: Get/set the Bio::Root::IO instance representing the
447             definition file, if provided (see defs_file()).
448             Example :
449             Returns : value of _defs_io (a Bio::Root::IO object)
450             Args : on set, new value (a Bio::Root::IO object or undef, optional)
451              
452             =cut
453              
454             sub _defs_io{
455 85     85   63 my $self = shift;
456              
457 85 100       109 return $self->{'_defs_io'} = shift if @_;
458 83         160 return $self->{'_defs_io'};
459             }
460              
461             sub _add_ontology {
462 4     4   6 my $self = shift;
463 4 50       13 $self->{'_ontologies'} = [] unless exists($self->{'_ontologies'});
464 4         9 foreach my $ont (@_) {
465 4 50 33     25 $self->throw(ref($ont)." does not implement Bio::Ontology::OntologyI")
466             unless ref($ont) && $ont->isa("Bio::Ontology::OntologyI");
467             # the ontology name may have been auto-discovered while parsing
468             # the file
469 4 50       14 $ont->name($self->ontology_name) unless $ont->name();
470 4         5 push(@{$self->{'_ontologies'}}, $ont);
  4         11  
471             }
472             }
473              
474             # This simply delegates. See SimpleGOEngine.
475             sub _add_term {
476 227     227   193 my ( $self, $term, $ont ) = @_;
477 227 50 33     515 $term->ontology($ont) if $ont && (! $term->ontology);
478 227         309 $self->_ont_engine()->add_term( $term );
479             } # _add_term
480              
481              
482              
483             # This simply delegates. See SimpleGOEngine
484             sub _part_of_relationship {
485 51     51   44 my $self = shift;
486              
487 51         60 return $self->_ont_engine()->part_of_relationship(@_);
488             } # _part_of_relationship
489              
490              
491              
492             # This simply delegates. See SimpleGOEngine
493             sub _is_a_relationship {
494 211     211   156 my $self = shift;
495              
496 211         270 return $self->_ont_engine()->is_a_relationship(@_);
497             } # _is_a_relationship
498              
499             # This simply delegates. See SimpleGOEngine
500             sub _related_to_relationship {
501 4     4   5 my $self = shift;
502              
503 4         7 return $self->_ont_engine()->related_to_relationship(@_);
504             } # _is_a_relationship
505              
506              
507              
508             # This simply delegates. See SimpleGOEngine
509             sub _add_relationship {
510 254     254   244 my ( $self, $parent, $child, $type, $ont ) = @_;
511              
512             # note the triple terminology (subject,predicate,object) corresponds to
513             # (child,type,parent)
514 254         247 $self->_ont_engine()->add_relationship( $child, $type, $parent, $ont );
515              
516              
517             } # _add_relationship
518              
519              
520             # This simply delegates. See SimpleGOEngine
521             sub _has_term {
522 258     258   190 my $self = shift;
523              
524 258         357 return $self->_ont_engine()->has_term( @_ );
525             } # _add_term
526              
527              
528              
529             # This parses the relationships files
530             sub _parse_flat_file {
531 4     4   7 my $self = shift;
532 4         4 my $ont = shift;
533              
534 4         5 my @stack = ();
535 4         5 my $prev_spaces = -1;
536 4         3 my $prev_term = "";
537              
538 4         17 while ( my $line = $self->_readline() ) {
539              
540 248 100       394 if ( $line =~ /^!/ ) {
541 26         46 next;
542             }
543              
544             # split into term specifications
545 222         973 my @termspecs = split(/ (?=[%<])/, $line);
546             # the first element is whitespace only
547 222 100       578 shift(@termspecs) if $termspecs[0] =~ /^\s*$/;
548              
549             # parse out the focus term
550 222         354 my $current_term = $self->_get_first_termid( $termspecs[0] );
551 222         338 my @syns = $self->_get_synonyms( $termspecs[0] );
552 222         321 my @sec_go_ids = $self->_get_secondary_termids( $termspecs[0] );
553 222         261 my @cross = $self->_get_db_cross_refs( $termspecs[0] );
554 222         180 my @cross_refs;
555 222         241 foreach my $cross_ref (@cross) {
556 81 100       145 $cross_ref eq $current_term && next;
557 4         5 push(@cross_refs, $cross_ref);
558             }
559            
560             # parse out the parents of the focus term
561 222         154 shift(@termspecs);
562 222         191 my @isa_parents = ();
563 222         143 my @partof_parents = ();
564 222         183 foreach my $parent (@termspecs) {
565 36 100       78 if (index($parent, "%") == 0) {
    50          
566 25         33 push(@isa_parents, $self->_get_first_termid($parent));
567             } elsif (index($parent, "<") == 0) {
568 11         13 push(@partof_parents, $self->_get_first_termid($parent));
569             } else {
570 0         0 $self->warn("unhandled relationship type in '".$parent."'");
571             }
572             }
573              
574 222 100       263 if ( ! $self->_has_term( $current_term ) ) {
575 204         277 my $term =$self->_create_ont_entry($self->_get_name($line,
576             $current_term),
577             $current_term );
578 204         382 $self->_add_term( $term, $ont );
579             }
580              
581 222         358 my $current_term_object = $self->_ont_engine()->get_terms( $current_term );
582 222         340 my $anno = $self->_to_annotation(\@cross_refs);
583 222         532 $current_term_object->add_dbxref(-dbxrefs => $anno);
584 222         375 $current_term_object->add_secondary_id( @sec_go_ids );
585 222         319 $current_term_object->add_synonym( @syns );
586 222 100       368 unless ( $line =~ /^\$/ ) {
587 218         363 $current_term_object->ontology( $ont );
588             }
589 222         225 foreach my $parent ( @isa_parents ) {
590 25 100       37 if ( ! $self->_has_term( $parent ) ) {
591 11         26 my $term = $self->_create_ont_entry($self->_get_name($line,
592             $parent),
593             $parent );
594 11         25 $self->_add_term( $term, $ont );
595             }
596              
597 25         48 $self->_add_relationship( $parent,
598             $current_term,
599             $self->_is_a_relationship(),
600             $ont);
601              
602             }
603 222         214 foreach my $parent ( @partof_parents ) {
604 11 100       12 if ( ! $self->_has_term( $parent ) ) {
605 3         6 my $term = $self->_create_ont_entry($self->_get_name($line,
606             $parent),
607             $parent );
608 3         7 $self->_add_term( $term, $ont );
609             }
610              
611 11         16 $self->_add_relationship( $parent,
612             $current_term,
613             $self->_part_of_relationship(),
614             $ont);
615             }
616              
617 222         294 my $current_spaces = $self->_count_spaces( $line );
618              
619 222 100       335 if ( $current_spaces != $prev_spaces ) {
620              
621 116 100       166 if ( $current_spaces == $prev_spaces + 1 ) {
    50          
622 75         95 push( @stack, $prev_term );
623             } elsif ( $current_spaces < $prev_spaces ) {
624 41         38 my $n = $prev_spaces - $current_spaces;
625 41         72 for ( my $i = 0; $i < $n; ++$i ) {
626 60         105 pop( @stack );
627             }
628             } else {
629 0         0 $self->throw( "format error (file ".$self->file.")" );
630             }
631             }
632              
633 222         290 my $parent = $stack[ @stack - 1 ];
634              
635             # add a relationship if the line isn\'t the one with the root term
636             # of the ontology (which is also the name of the ontology)
637 222 100       401 if ( index($line,'$') != 0 ) {
638             #adding @reltype@ syntax
639 218 50       447 if ( $line !~ /^\s*([<%~]|\@\w+?\@)/ ) {
640 0         0 $self->throw( "format error (file ".$self->file.") offending line:\n$line" );
641             }
642              
643 218         420 my($relstring) = $line =~ /^\s*([<%~]|\@[^\@]+?\@)/;
644              
645 218         155 my $reltype;
646              
647 218 100       359 if ($relstring eq '<') {
    50          
    0          
648 36         56 $reltype = $self->_part_of_relationship;
649             } elsif ($relstring eq '%') {
650 182         258 $reltype = $self->_is_a_relationship;
651             } elsif ($relstring eq '~') {
652 0         0 $reltype = $self->_related_to_relationship;
653             } else {
654 0         0 $relstring =~ s/\@//g;
655 0 0       0 if ($self->_ont_engine->get_relationship_type($relstring)) {
656 0         0 $reltype = $self->_ont_engine->get_relationship_type($relstring);
657             } else {
658 0         0 $self->_ont_engine->add_relationship_type($relstring, $ont);
659 0         0 $reltype = $self->_ont_engine->get_relationship_type($relstring);
660             }
661             }
662              
663             #my $reltype = ($line =~ /^\s*
664             #$self->_part_of_relationship() :
665             #$self->_is_a_relationship();
666 218         317 $self->_add_relationship( $parent, $current_term, $reltype, $ont);
667             }
668              
669 222         191 $prev_spaces = $current_spaces;
670 222         769 $prev_term = $current_term;
671             }
672 4         11 return $ont;
673             } # _parse_relationships_file
674              
675              
676              
677             # Parses the 1st term id number out of line.
678             sub _get_first_termid {
679 258     258   244 my ( $self, $line ) = @_;
680 258 50       651 if ( $line =~ /;\s*([A-Z_]{1,8}:\d{1,})/ ) {
681             # if ( $line =~ /;\s*(\w+:\w+)/ ) {
682 258         566 return $1;
683             }
684             else {
685 0         0 $self->throw( "format error: no term id in line \"$line\"" );
686             }
687              
688             } # _get_first_termid
689              
690              
691              
692             # Parses the name out of line.
693             sub _get_name {
694 218     218   201 my ( $self, $line, $termid ) = @_;
695              
696 218 50       4443 if ( $line =~ /([^;<%~]+);\s*$termid/ ) {
697 218         334 my $name = $1;
698             # remove trailing and leading whitespace
699 218         508 $name =~ s/\s+$//;
700 218         261 $name =~ s/^\s+//;
701 218         176 $name =~ s/\@.+?\@//;
702             # remove leading dollar character; also we default the name of the
703             # ontology to this name unless it is preset to something else
704 218 100       404 if(index($name,'$') == 0) {
705 3         6 $name = substr($name,1);
706             # replace underscores by spaces for setting the ontology name
707 3 50       6 $self->ontology_name(join(" ",split(/_/,$name)))
708             unless $self->ontology_name();
709             }
710 218         544 return $name;
711             }
712             else {
713 0         0 return;
714             }
715             } # _get_name
716              
717              
718             # Parses the synonyms out of line.
719             sub _get_synonyms {
720 222     222   183 my ( $self, $line ) = @_;
721              
722 222         182 my @synonyms = ();
723              
724 222         485 while ( $line =~ /synonym\s*:\s*([^;<%~]+)/g ) {
725 38         47 my $syn = $1;
726 38         84 $syn =~ s/\s+$//;
727 38         43 $syn =~ s/^\s+//;
728 38         79 push( @synonyms, $syn );
729             }
730 222         278 return @synonyms;
731              
732             } # _get_synonyms
733              
734              
735              
736             # Parses the db cross refs out of line.
737             sub _get_db_cross_refs {
738 222     222   177 my ( $self, $line ) = @_;
739              
740 222         150 my @refs = ();
741              
742 222         629 while ( $line =~ /;([^;<%~:]+:[^;<%~:]+)/g ) {
743 264         301 my $ref = $1;
744 264 100 100     1130 if ( $ref =~ /synonym/ || $ref =~ /[A-Z]{1,8}:\d{3,}/ ) {
745 183         345 next;
746             }
747 81         197 $ref =~ s/\s+$//;
748 81         124 $ref =~ s/^\s+//;
749              
750 81         478 $ref = $self->unescape( $ref );
751              
752 81 50       218 push( @refs, $ref ) if defined $ref;
753             }
754 222         323 return @refs;
755              
756             }
757              
758              
759             # Parses the secondary go ids out of a line
760             sub _get_secondary_termids {
761 222     222   185 my ( $self, $line ) = @_;
762 222         149 my @secs = ();
763              
764             # while ( $line =~ /,\s*([A-Z]{1,8}:\d{3,})/g ) {
765 222         368 while ( $line =~ /,\s*(\w+:\w+)/g ) {
766 1         2 my $sec = $1;
767 1         2 push( @secs, $sec );
768             }
769 222         208 return @secs;
770              
771             } # _get_secondary_termids
772              
773              
774             # Counts the spaces at the beginning of a line in the relationships files
775             sub _count_spaces {
776 222     222   206 my ( $self, $line ) = @_;
777              
778 222 100       533 if ( $line =~ /^(\s+)/ ) {
779 218         500 return length( $1 );
780             }
781             else {
782 4         5 return 0;
783             }
784             } # _count_spaces
785              
786              
787             # "next" method for parsing the defintions file
788             sub _next_term {
789 13     13   12 my ( $self ) = @_;
790              
791 13 100 100     17 if ( ($self->_done() == TRUE) || (! $self->_defs_io())) {
792 4         9 return;
793             }
794              
795 9         11 my $line = "";
796 9         4 my $termid = "";
797 9         11 my $next_term = $self->_term();
798 9         8 my $def = "";
799 9         7 my $comment = "";
800 9         9 my @def_refs = ();
801 9         7 my $isobsolete;
802              
803 9         9 while( $line = ( $self->_defs_io->_readline() ) ) {
804 62 100 100     305 if ( $line !~ /\S/
    100          
    100          
    100          
    50          
    0          
805             || $line =~ /^\s*!/ ) {
806 26         33 next;
807             }
808             elsif ( $line =~ /^\s*term:\s*(.+)/ ) {
809 9         13 $self->_term( $1 );
810 9 100       12 last if $self->_not_first_record();
811 2         2 $next_term = $1;
812 2         8 $self->_not_first_record( TRUE );
813             }
814             elsif ( $line =~ /^\s*[a-z]{0,8}id:\s*(.+)/ ) {
815 9         20 $termid = $1;
816             }
817             elsif ( $line =~ /^\s*definition:\s*(.+)/ ) {
818 9         25 $def = $self->unescape($1);
819 9 50       27 $isobsolete = 1 if index($def,"OBSOLETE") == 0;
820             }
821             elsif ( $line =~ /^\s*definition_reference:\s*(.+)/ ) {
822 9         15 push( @def_refs, $self->unescape($1) );
823             }
824             elsif ( $line =~ /^\s*comment:\s*(.+)/ ) {
825 0         0 $comment = $self->unescape($1);
826             }
827             }
828 9 100       16 $self->_done( TRUE ) unless $line; # we'll come back until done
829 9         27 return $self->_create_ont_entry( $next_term, $termid, $def,
830             $comment, \@def_refs, $isobsolete);
831             } # _next_term
832              
833              
834              
835             # Holds the GO engine to be parsed into
836             sub _ont_engine {
837 1239     1239   890 my ( $self, $value ) = @_;
838              
839 1239 100       1623 if ( defined $value ) {
840 4         6 $self->{ "_ont_engine" } = $value;
841             }
842              
843 1239         2488 return $self->{ "_ont_engine" };
844             } # _ont_engine
845              
846              
847             # Used to create ontology terms.
848             # Arguments: name, id
849             sub _create_ont_entry {
850 227     227   307 my ( $self, $name, $termid, $def, $cmt, $dbxrefs, $obsolete ) = @_;
851              
852 227 50 33     823 if((!defined($obsolete)) && (index(lc($name),"obsolete") == 0)) {
853 0         0 $obsolete = 1;
854             }
855 227         350 my $anno = $self->_to_annotation($dbxrefs);
856 227         482 my $term = $self->term_factory->create_object(-name => $name,
857             -identifier => $termid,
858             -definition => $def,
859             -comment => $cmt,
860             -dbxrefs => $anno,
861             -is_obsolete => $obsolete);
862              
863 227         331 return $term;
864             } # _create_ont_entry
865              
866              
867              
868             # Holds whether first record or not
869             sub _not_first_record {
870 15     15   14 my ( $self, $value ) = @_;
871              
872 15 100       21 if ( defined $value ) {
873 6         7 $self->{ "_not_first_record" } = $value;
874             }
875              
876 15         25 return $self->{ "_not_first_record" };
877             } # _not_first_record
878              
879              
880              
881             # Holds whether done or not
882             sub _done {
883 19     19   19 my ( $self, $value ) = @_;
884              
885 19 100       23 if ( defined $value ) {
886 6         7 $self->{ "_done" } = $value;
887             }
888              
889 19         41 return $self->{ "_done" };
890             } # _done
891              
892              
893             # Holds a term.
894             sub _term {
895 22     22   23 my ( $self, $value ) = @_;
896              
897 22 100       28 if ( defined $value ) {
898 13         15 $self->{ "_term" } = $value;
899             }
900              
901 22         28 return $self->{ "_term" };
902             } # _term
903              
904             # convert simple strings to Bio::Annotation::DBLinks
905             sub _to_annotation {
906 449     449   397 my ($self , $links) = @_;
907 449 100       623 return unless $links;
908 231         141 my @dbxrefs;
909 231         175 for my $string (@{$links}) {
  231         265  
910 13         29 my ($db, $id) = split(':',$string);
911 13         52 push @dbxrefs, Bio::Annotation::DBLink->new(-database => $db, -primary_id => $id);
912             }
913 231         271 return \@dbxrefs;
914             }
915              
916             1;