File Coverage

blib/lib/GO/OntologyProvider/OboParser.pm
Criterion Covered Total %
statement 127 153 83.0
branch 39 58 67.2
condition 3 8 37.5
subroutine 22 25 88.0
pod 7 7 100.0
total 198 251 78.8


line stmt bran cond sub pod time code
1             package GO::OntologyProvider::OboParser;
2              
3             # File : OboParser.pm
4             # Authors : Elizabeth Boyle; Gavin Sherlock
5             # Date Begun : Summer 2001
6             # Rewritten : September 29th 2002
7             #
8             # Updated to parse the gene ontology info from the obo file.
9             # August 2006, Shuai Weng
10             #
11             # $Id: OboParser.pm,v 1.4 2007/11/15 18:32:12 sherlock Exp $
12              
13             # License information (the MIT license)
14              
15             # Copyright (c) 2003 Gavin Sherlock; Stanford University
16              
17             # Permission is hereby granted, free of charge, to any person
18             # obtaining a copy of this software and associated documentation files
19             # (the "Software"), to deal in the Software without restriction,
20             # including without limitation the rights to use, copy, modify, merge,
21             # publish, distribute, sublicense, and/or sell copies of the Software,
22             # and to permit persons to whom the Software is furnished to do so,
23             # subject to the following conditions:
24              
25             # The above copyright notice and this permission notice shall be
26             # included in all copies or substantial portions of the Software.
27              
28             # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29             # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30             # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31             # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32             # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33             # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34             # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35             # SOFTWARE.
36              
37             =pod
38              
39             =head1 NAME
40              
41             GO::OntologyProvider::OboParser - Provides API for retrieving data from Gene Ontology obo file.
42              
43             =head1 SYNOPSIS
44              
45             use GO::OntologyProvider::OboParser;
46              
47             my $ontology = GO::OntologyProvider::OboParser->new(ontologyFile => "gene_ontology.obo",
48             aspect => [P|F|C]);
49              
50             print "The ancestors of GO:0006177 are:\n";
51              
52             my $node = $ontology->nodeFromId("GO:0006177");
53              
54             foreach my $ancestor ($node->ancestors){
55            
56             print $ancestor->goid, " ", $ancestor->term, "\n";
57            
58             }
59              
60             $ontology->printOntology();
61              
62              
63             =head1 DESCRIPTION
64              
65             GO::OntologyProvider::OboParser implements the interface defined by
66             GO::OntologyProvider, and parses the gene ontology obo file (GO) in
67             plain text (not XML) format. These files can be obtained from the
68             Gene Ontology Consortium web site, http://www.geneontology.org/. From
69             the information in the file, it creates a directed acyclic graph (DAG)
70             structure in memory. This means that GO terms are arranged into
71             tree-like structures where each GO node can have multiple parent nodes
72             and multiple child nodes. The file MUST be named with a .obo suffix.
73              
74             This data structure can be used in conjunction with files in which
75             certain genes are annotated to corresponding GO nodes.
76              
77             Each GO ID (e.g. "GO:1234567") has associated with it a GO node. That
78             GO node contains the name of the GO term, a list of the nodes directly
79             above the node ("parent nodes"), and a list of the nodes directly
80             below the current node ("child nodes"). The "ancestor nodes" of a
81             certain node are all of the nodes that are in a path from the current
82             node to the root of the ontology, with all repetitions removed.
83              
84             The example format is as follows:
85              
86             [Term]
87             id: GO:0000006
88             name: high affinity zinc uptake transporter activity
89             namespace: molecular_function
90             def: "Catalysis of the reaction: Zn2+(out) = Zn2+(in), probably powered by proton motive force." [TC:2.A.5.1.1]
91             xref_analog: TC:2.A.5.1.1
92             is_a: GO:0005385 ! zinc ion transporter activity
93              
94              
95             [Term]
96             id: GO:0000005
97             name: ribosomal chaperone activity
98             namespace: molecular_function
99             def: "OBSOLETE. Assists in the correct assembly of ribosomes or ribosomal subunits in vivo, but is not a component of the assembled ribosome when performing its normal biological function." [GOC:jl, PMID:12150913]
100             comment: This term was made obsolete because it refers to a class of gene products and a biological process rather than a molecular
101             function. To update annotations, consider the molecular function term 'unfolded protein binding ; GO:0051082' and the biological process
102             term 'ribosome biogenesis and assembly ; GO:0042254' and its children.
103             is_obsolete: true
104              
105             =cut
106              
107             ##################################################################
108             ##################################################################
109              
110 2     2   248807 use strict;
  2         4  
  2         94  
111 2     2   12 use warnings;
  2         3  
  2         70  
112 2     2   12 use diagnostics;
  2         4  
  2         16  
113              
114 2     2   380 use base qw (GO::OntologyProvider);
  2         4  
  2         2927  
115 2     2   631 use GO::Node;
  2         5  
  2         66  
116 2     2   1440 use Storable qw (nstore);
  2         5178  
  2         155  
117              
118 2     2   1068 use IO::File;
  2         25294  
  2         5160  
119              
120             our $VERSION = 0.01;
121             our $PACKAGE = "GO::OntologyProvider::OntologyOboParser";
122              
123             ##################################################################
124             #
125             # CLASS ATTRIBUTES
126             #
127             ##################################################################
128              
129             # All the following class attributes are constants, that should be
130             # initialized here at compile time.
131              
132             my $DEBUG = 0;
133              
134             my $kFile = $PACKAGE.'::__file';
135             my $kAspect = $PACKAGE.'::__aspect';
136             my $kRootNode = $PACKAGE.'::__rootNode';
137             my $kNodes = $PACKAGE.'::__nodes';
138             my $kSecondaryIds = $PACKAGE.'::__secondaryIds';
139             my $kParent = $PACKAGE.'::__parent';
140              
141             my %kAspects = (
142             'P' => 'biological_process',
143             'F' => 'molecular_function',
144             'C' => 'cellular_component'
145             );
146              
147             ##################################################################
148              
149             # The constructor, and associated initialization methods
150              
151             ##################################################################
152             sub new{
153             ##################################################################
154             # This is the constructor for an OntologyOboParser object.
155             #
156             # The constructor expects one of two type of arguments, either an
157             # 'ontologyFile' and 'ontology' argument , or an 'objectFile' argument.
158             # When instantiated with an ontologyFile argument, it expects the file
159             # to be in obo format. When instantiated with an objectFile argument,
160             # it expects to open a previously created OboParser object that
161             # has been serialized to disk.
162             #
163             #
164             # Usage :
165             #
166             # my $ontology = GO::OntologyProvider::OboParser->new(ontologyFile=>$file,
167             # ontology=>[P|F|C]);
168             #
169             # my $ontology = GO::OntologyProvider::OboParser->new(objectFile=>$file);
170             #
171              
172 2     2 1 42 my ($class, %args) = @_;
173              
174 2         4 my $self;
175              
176 2 50       14 if (exists($args{'objectFile'})){
    50          
177              
178 0         0 $self = Storable::retrieve($args{'objectFile'})
179              
180             }elsif (exists($args{'ontologyFile'})){
181              
182 2         6 $self = {};
183            
184 2         6 bless $self, $class;
185            
186 2         12 $self->__setFile($args{'ontologyFile'},
187             $args{'aspect'});
188              
189 2         9 $self->__init;
190              
191             }
192            
193 2         5065 return ($self);
194              
195             }
196              
197             ############################################################################
198             sub __setFile{
199             ############################################################################
200             # This private method simply stores the name of the file used for
201             # construction inside the object's hash
202              
203 2     2   6 my ($self, $file, $aspect) = @_;
204              
205 2 50       96 if (!-e $file){
    50          
    50          
    50          
206              
207 0         0 die "$file does not exist";
208              
209             }elsif (-d $file){
210              
211 0         0 die "$file is a directory";
212              
213             }elsif (!-r $file){
214              
215 0         0 die "$file is not readable";
216              
217             }elsif ($file !~ /\.obo/){
218              
219 0         0 die "$file must have a .obo suffix";
220              
221             }
222              
223 2 50       9 if (!defined $aspect) {
224              
225 0         0 die "You have to pass the GO aspect [".join("\|", sort keys %kAspects) ."] to the ", ref($self);
226              
227             }
228            
229 2 50       8 if (!exists $kAspects{$aspect}) {
230              
231 0         0 die "Unknown aspect name: $aspect. The allowable GO aspects are ". join(", ", sort keys %kAspects)."\n";
232              
233             }
234              
235 2         18 $self->{$kFile} = $file;
236              
237 2         6 $self->{$kAspect} = $aspect;
238              
239             }
240              
241             ############################################################################
242             sub __file {
243             ############################################################################
244             # This private method returns the name of the file used to construct the object
245              
246 2     2   25 return $_[0]->{$kFile};
247              
248             }
249              
250             ############################################################################
251             sub __aspect {
252             ############################################################################
253             # This private method returns the name of the ontology used to construct the object
254              
255 2     2   9 return $_[0]->{$kAspect};
256              
257             }
258              
259             ############################################################################
260             sub __init { # okay
261             ############################################################################
262             # This method initializes the ontologyOboParser object, by parsing an ontology
263             # file, and storing the structures represented therein, in memory.
264              
265 2     2   5 my $self = shift;
266              
267 2   50     8 my $ontologyFh = IO::File->new($self->__file, q{<} )|| die "$PACKAGE can't open file ". $self->__file ." : $!";
268              
269 2         224 my $aspect = $kAspects{$self->__aspect};
270              
271             # go through the ontology one line at a time
272              
273 2         4 my @entryLine;
274              
275 2         4 my $isValidEntry = 0;
276              
277 2         3 my $namespace;
278              
279 2         58 while (<$ontologyFh>){
280              
281 428186         460872 chomp;
282              
283             # finish parsing the obo file of we reach the typedef line.
284              
285 428186 100       789031 last if (/^\[Typedef\]/);
286              
287 428184 100       1105413 if ($_ eq '[Term]') {
    100          
    100          
288              
289             # we reached a new term - so process the previous entry
290              
291 45858 100       79022 if ($isValidEntry) {
292            
293 25108         59258 $self->__processNode(\@entryLine);
294              
295             }
296              
297             # reset our variables
298              
299 45858         106218 @entryLine = ();
300              
301 45858         55327 $isValidEntry = 0;
302 45858         137656 $namespace = '';
303              
304             }elsif ($_ eq "namespace: $aspect"){
305              
306             # term is in the requested namespace
307              
308 25808         28727 $namespace = $aspect;
309              
310 25808         101010 $isValidEntry = 1;
311            
312             }elsif ($_ eq 'is_obsolete: true'){
313              
314             # we don't want obsolete nodes - DO NOT COMMENT THIS OUT -
315             # infinite recursion will result!
316              
317             # Note, the logic here relies on the is_obsolete line coming after the
318             # namespace line.
319              
320 2024         4914 $isValidEntry = 0;
321            
322             }else {
323              
324             # build up the information for this node
325              
326 354494         1126825 push(@entryLine, $_);
327              
328             }
329              
330             }
331              
332             # process the final node
333              
334 2 50 33     33 if ($namespace eq $aspect && $isValidEntry) {
335            
336 2         9 $self->__processNode(\@entryLine);
337              
338             }
339              
340 2 50       32 $ontologyFh->close || die "Can't close ". $self->__file ." : $!";
341              
342             # now populate ancestor paths for each node.
343              
344 2         144 $self->__populatePaths;
345              
346             }
347              
348             ############################################################################
349             sub __processNode{
350             ############################################################################
351             # This private method processes entry lines identified as a node.
352             # The general idea is that it needs three pieces of
353             # information about the line to deal with it:
354             #
355             # 1. The name of the node.
356             # 2. The GOIDs associated with the node.
357             # 3. The parent node ids.
358             #
359             # It creates a node object for the current node and then indicates in that node
360             # the identity of its parent(s).
361              
362 25110     25110   37373 my ($self, $entryLineArrayRef) = @_;
363              
364 25110         46197 my ($nodeName, $goid, $secondaryGoidArrayRef, $parentGoidArrayRef)
365             = $self->__getNodeInfoFromLine($entryLineArrayRef);
366              
367 25110         60148 my $node = $self->__createNode($goid, $nodeName);
368              
369 25110 100       25308 if (scalar (@{$parentGoidArrayRef}) == 0) { # no parent goid
  25110         55044  
370              
371             # The GOA has obsoleted the 'Gene_Ontology' term, but
372             # currently we need it to make the graph work. Thus, we'll
373             # recreate the root, using it.s original id and name. This
374             # needs to be fixed in future.
375              
376 2         3 my $rootGoid = 'GO:0003673';
377 2         6 my $rootTerm = 'Gene_Ontology';
378              
379 2         9 my $rootNode = $self->__createNode($rootGoid, $rootTerm);
380              
381 2         9 $self->{$kRootNode} = $rootNode;
382              
383 2         5 @{$parentGoidArrayRef} = ($rootGoid);
  2         7  
384              
385             }
386              
387             ## now hash any secondaries to the primary
388              
389 25110         26523 foreach my $secondaryId (@{$secondaryGoidArrayRef}){
  25110         43300  
390              
391 696         3217 $self->{$kSecondaryIds}{$secondaryId} = $goid;
392              
393             }
394              
395 25110         83610 $self->{$kParent}{$goid} = $parentGoidArrayRef;
396              
397             }
398              
399             ############################################################################
400             sub __getNodeInfoFromLine { # okay
401             ############################################################################
402              
403             # This private method takes an array reference to the lines for a
404             # given GO term node entry and returns the term name, a reference
405             # that points to an array of goids associated with that term name, and
406             # a reference that points to an array of direct parent GOIDs. The
407             # primary goid will be the first goid returned in the list.
408             #
409             # Usage:
410             #
411             # my ($termName, $goidArrayRef, $parentGoidArrayRef)
412             # = $self->__getNodeInfoFromLine($entryLineArrayRef);
413              
414 25110     25110   29079 my ($self, $entryLineArrayRef) = @_;
415              
416 25110         24222 my ($nodeName, $goid, @secondaryGoid, @parentGoid);
417              
418 25110         24681 foreach my $line (@{$entryLineArrayRef}) {
  25110         44089  
419              
420 184204 100       848179 if ($line =~ /^id: *(GO:0*[0-9]+)$/) {
    100          
    100          
    100          
421              
422 25110         57467 $goid = $1;
423              
424             }elsif ($line =~ /^name: *(.+)$/) {
425              
426 25110         47449 $nodeName = $1;
427              
428             }elsif ($line =~ /^alt_id: *(GO:0*[0-9]+)$/) {
429            
430 696         1631 push(@secondaryGoid, $1);
431              
432             }elsif ($line =~ /^(is_a:|relationship: part_of) *(GO:0*[0-9]+)/) {
433            
434 43284         110860 push(@parentGoid, $2);
435              
436             }
437            
438             }
439              
440             # check that we can actually get some goids. Added this in to
441             # deal with when a broken file that appeared on the GO site, it
442             # caused me to get email saying my code was broken...
443              
444 25110 50       55076 if (!$goid){
445              
446 0         0 die "There appears to be a problem with the ontology file.\n".
447             "No GOIDs could be extracted from '$nodeName'.n\n";
448            
449             }
450              
451             # remove \'s from nodeName
452              
453 25110         36681 $nodeName =~ s/\\//g;
454              
455 25110         102037 return ($nodeName, $goid, \@secondaryGoid, \@parentGoid);
456              
457              
458             }
459              
460             ###############################################################################
461             sub __createNode {
462             ###############################################################################
463            
464 25112     25112   34957 my ($self, $goid, $nodeName) = @_;
465              
466 25112         24453 my $node;
467              
468 25112 50       43252 if ($self->__nodeIsAlreadyCreated($goid)){
469              
470 0         0 $node = $self->nodeFromId($goid);
471              
472             } else { # node has not already been created
473              
474             # create node
475              
476 25112         73748 $node = GO::Node->new(goid => $goid,
477             term => $nodeName);
478              
479             # store it
480            
481 25112         100105 $self->{$kNodes}{$goid} = $node;
482              
483             }
484              
485 25112         46735 return $node;
486              
487             }
488              
489             ###############################################################################
490             sub __populatePaths {
491             ###############################################################################
492             # in this method, we populate all the paths to the root for each node
493             # in the ontology. To do this, we have to call the recursive method,
494             # __findAncestor(), which will build up each path from a node to the
495             # root, and when it reaches the end of the path (the root itself),
496             # will add that path via the Node method addPathToRoot.
497              
498             # POSSIBLE ALTERNATIVE APPROACH
499             #
500             # Profiling of the OboParser reveals that when building the ontology,
501             # ~77% of the time is spent in the recursive __findAncestor(). Thus,
502             # if a way could be found to decrease the number of recursive calls to
503             # that method, it might significantly positively impact the runtime
504             # performance.
505             #
506             # A possible alternative approach to the current method, might be to
507             # simply populate paths for every leaf node (we would need to know who
508             # they are), and as their paths are populated, also populate the paths
509             # for their ancestors as well, as the paths from the ancestors are
510             # subparts of the paths from leaves to the root. However, care would
511             # have to be taken to not add the same path twice, as there would be
512             # issues with when a leaf has two or more paths to a particular node,
513             # whose paths are then being added. Note also, if you encounter a
514             # node for whom you have already added paths, you don't need to add
515             # them again, so this might significantly save the number of recursive
516             # calls required.
517              
518 2     2   6 my $self = shift;
519              
520             # go through each GO node in the $kParent hash, the keys of which
521             # are the goids that are parents of a given node.
522              
523 2         4 foreach my $childGoid ( keys %{$self->{$kParent}} ) {
  2         12800  
524              
525             # note, we directly access the kNodes hash here, rather than
526             # use nodeFromId(). This is for performance reasons only -
527             # accessing the kNodes hash directly in this method, and the
528             # __findAncestor method shaces about 40% of the runtime off of
529             # the time taken to populate all the paths.
530              
531 25110         74096 my $childNode = $self->{$kNodes}{$childGoid};
532              
533             # now go through each of this child's parents
534              
535 25110         30021 foreach my $parentGoid (@{$self->{$kParent}{$childGoid}}) {
  25110         85374  
536              
537             ### Note, there has been a case in the obo file where
538             ### there was an error, and a node was listed as having
539             ### parent in a different aspect. This results in a fatal
540             ### run time error, as when the parser reads the file, it
541             ### only keeps nodes of a given aspect, and is thus left
542             ### with a dangling reference. In this case, parentNode
543             ### will be undef, and the call to addParentNodes ends up
544             ### in a run time error. We can add some logic here to
545             ### give a better error message.
546              
547             my $parentNode = $self->{$kNodes}{$parentGoid}
548              
549 43286   33     195534 || do {
550              
551             print "There is an error in the obo file, where the relationship between ",
552             $childNode->goid,
553             " and one or more of its parents is not correctly defined.\n",
554             "Please check the obo file.\n",
555             "The program is unable to continue.\n\n";
556            
557             exit;
558              
559             };
560              
561             ### create connections between child node and its parent
562              
563 43286         119876 $childNode->addParentNodes($parentNode);
564            
565 43286         121786 $parentNode->addChildNodes($childNode);
566              
567             # begin to build the ancestor path, starting with this
568             # parent
569              
570 43286         75710 my @path = ($parentNode);
571              
572 43286 100       127141 if (exists $self->{$kParent}{$parentGoid}){
573              
574             # if this parent has parents, then we continue to
575             # build the path upwards to the root. We pass in the
576             # child node, so that each path which reaches the root
577             # can be added during the recursive calls to find
578             # ancestor
579              
580 43284         91685 $self->__findAncestors($childNode,
581             $parentGoid,
582             \@path);
583              
584             }else{
585              
586             # otherwise, the path only contains the root, and we add it.
587              
588 2         9 $childNode->addPathToRoot(@path);
589              
590             }
591              
592             }
593              
594             }
595              
596             }
597              
598             #######################################################################
599             sub __findAncestors {
600             #######################################################################
601             # Usage:
602             #
603             # $self->__findAncestor($childNode,
604             # $parentGoid,
605             # $pathArrayRef);
606             #
607             # This method looks through each goid in hash %{$self->{$kParent}} to
608             # find all ancestors and push everything to @{$pathArrayRef}..And if
609             # there is no ancestor found for the $parentGoid, it just add the path
610             # to the child node.
611              
612 2427650     2427650   3167333 my ($self, $childNode, $parentGoid, $pathArrayRef) = @_;
613              
614             # go through each immediate parent of the passed in parent
615              
616 2427650         2292451 foreach my $ancestorGoid (@{$self->{$kParent}{$parentGoid}}) {
  2427650         5446603  
617              
618             # add the ancestor node to our path to the root which is being
619             # built
620              
621 3008496         2937253 push (@{$pathArrayRef}, $self->{$kNodes}{$ancestorGoid});
  3008496         6354722  
622              
623 3008496 100       6965503 if (exists $self->{$kParent}{$ancestorGoid}){
624              
625             # if this ancestor has parents, continue building the
626             # paths to the root recursively up the DAG
627              
628 2384366         4181235 $self->__findAncestors($childNode,
629             $ancestorGoid,
630             $pathArrayRef);
631              
632             }else {
633              
634 624130         643826 $childNode->addPathToRoot(reverse @{$pathArrayRef});
  624130         1662233  
635              
636             }
637              
638             # because there are multiple paths to the root for most nodes,
639             # we have now remove the current ancestor from this time
640             # through the loop so that the path is reset to the original
641             # condition that it was in when passed in to this method
642              
643 3008496         3460975 pop @{$pathArrayRef};
  3008496         6192103  
644              
645             }
646              
647             }
648              
649             ############################################################################
650             sub __nodeIsAlreadyCreated { # okay
651             ############################################################################
652             # This private method returns a boolean to indicate whether a node has
653             # already been created for a given GO ID.
654              
655              
656 25112     25112   101256 return (exists($_[0]->{$kNodes}{$_[1]}));
657              
658             }
659              
660             ############################################################################
661             sub printOntology{
662             ############################################################################
663             # This prints out the ontology, with redundancies.
664              
665 0     0 1 0 my $self = shift;
666              
667 0         0 $self->__printNode($self->rootNode, 0);
668              
669             }
670              
671             ############################################################################
672             sub __printNode{
673             ############################################################################
674             # This recursive function prints the name of the specified node and the
675             # names of all of its descendants.
676             #
677              
678 0     0   0 my ($self, $node, $indentationLevel) = @_;
679              
680 0         0 print " " x $indentationLevel, $node->term, " ; ", $node->goid, "\n";
681              
682 0         0 foreach my $childNode (sort {$a->term cmp $b->term} $node->childNodes) {
  0         0  
683              
684 0         0 $self->__printNode($childNode, $indentationLevel+1);
685              
686             }
687              
688             }
689              
690             ############################################################################
691             sub allNodes{
692             ############################################################################
693             # This method returns an array of all the nodes that have been created.
694             #
695             # Usage:
696             #
697             # my @nodes = $ontologyParser->allNodes;
698              
699 3     3 1 4248 return (values %{$_[0]->{$kNodes}});
  3         5278  
700              
701             }
702              
703             ############################################################################
704             sub rootNode{
705             ############################################################################
706             # This returns the root node in the ontology.
707             #
708             # Usage:
709             #
710             # my $rootNode = $ontologyParser->rootNode;
711              
712 119030     119030 1 481410 return ($_[0]->{$kRootNode});
713              
714             }
715              
716             ############################################################################
717             sub nodeFromId{
718             ############################################################################
719             # This public method takes a GOID and returns the GO::Node that
720             # it corresponds to. It should also work with secondary id's
721             #
722             # Usage :
723             #
724             # my $node = $ontologyParser->nodeFromId($goid);
725              
726 170559     170559 1 273400 my ($self, $goid) = @_;
727              
728 170559 100       527308 if (exists ($self->{$kNodes}{$goid})){ # it's a primary
    50          
729              
730 170351         813567 return ($self->{$kNodes}{$goid});
731            
732             }elsif (exists ($self->{$kSecondaryIds}{$goid})){ # it's a secondary
733              
734 0         0 return $self->{$kNodes}{$self->{$kSecondaryIds}{$goid}};
735              
736             }else{
737              
738 208         1506 return undef;
739              
740             }
741              
742             }
743              
744             ############################################################################
745             sub numNodes{
746             ############################################################################
747             # This public method returns the number of nodes that exist with the
748             # ontology
749             #
750             # Usage :
751             #
752             # my $numNodes = $ontologyParser->numNodes;
753              
754 1     1 1 3 return scalar (keys %{$_[0]->{$kNodes}});
  1         8  
755              
756             }
757              
758             ############################################################################
759             sub serializeToDisk {
760             ############################################################################
761             # Saves the current state of the Ontology Parser Object to a file,
762             # using the Storable package. Saves in network order for portability,
763             # just in case. Returns the name of the file. If no filename is
764             # provided, then the name of the file (and it's directory, if one was
765             # provided) used for object construction, will be used, with .obj
766             # appended. If the object was instantiated from a file with a .obj
767             # suffix, then the same filename would be used, if none were provided.
768             #
769             # This method currently causes a segfault on MacOSX (at least 10.1.5
770             # -> 10.2.3), with perl 5.6, and Storable 1.0.14, when trying to store
771             # the process ontology. This failure occurs using either store, or
772             # nstore, and is manifested by a segmentation fault. It has not been
773             # investigated whether this is a perl problem, or a Storable problem
774             # (which has large amounts of C-code). This does not cause a
775             # segmentation on Solaris, using perl 5.6.1 and Storable 1.0.13. This
776             # doesn't make it clear whether it's a MacOSX problem or a perl
777             # problem or not. It should be noted that newer versions of both perl
778             # and Storable exist, and the code should be tested with those as
779             # well.
780             #
781             # Usage:
782             #
783             # my $objectFile = $ontologyParser->serializeToDisk(filename=>$filename);
784              
785 0     0 1   my ($self, %args) = @_;
786              
787 0           my $fileName;
788              
789 0 0         if (exists ($args{'filename'})){ # they supply their own filename
790              
791 0           $fileName = $args{'filename'};
792              
793             }else{ # we build a name from the file used to instantiate ourselves
794              
795 0           $fileName = $self->__file;
796            
797 0 0         if ($fileName !~ /\.obj$/){ # if we weren't instantiated from an object
798            
799 0           $fileName .= ".obj"; # add a .obj suffix to the name
800            
801             }
802              
803             }
804              
805 0 0         nstore ($self, $fileName) || die "$PACKAGE could not serialize itself to $fileName : $!";
806              
807 0           return ($fileName);
808              
809             }
810              
811             1; # to keep perl happy
812              
813              
814             # P O D D O C U M E N T A T I O N #
815              
816             =pod
817              
818             =head1 Instance Constructor
819              
820             =head2 new
821              
822             This is the constructor for an OboParser object. The constructor
823             expects one of two arguments, either an 'ontologyFile' argument, or an
824             'objectFile' argument. When instantiated with an ontologyFile
825             argument, it expects it to correspond to an obo file created by the GO
826             consortium, according to their file format, and in addition, also
827             requires an 'aspect' argument. When instantiated with an objectFile
828             argument, it expects to open a previously created ontologyParser
829             object that has been serialized to disk (see serializeToDisk).
830              
831             Usage:
832              
833             my $ontology = GO::OntologyProvider::OboParser->new(ontologyFile => $ontologyFile,
834             aspect => $aspect);
835              
836             my $ontology = GO::OntologyProvider::OboParser->new(objectFile => $objectFile);
837              
838             =head1 Instance Methods
839              
840             =head2 printOntology
841              
842             This prints out the ontology, with redundancies, to STDOUT. It does
843             not yet print out all of the ontology information (like relationship
844             type etc). This method will be likely be removed in a future version,
845             so should not be relied upon.
846              
847             Usage:
848              
849             $ontologyParser->printOntology;
850              
851             =head2 allNodes
852              
853             This method returns an array of all the GO:Nodes that have been
854             created.
855              
856             Usage:
857              
858             my @nodes = $ontologyParser->allNodes;
859              
860             =head2 rootNode
861              
862             This returns the root node in the ontology.
863              
864             my $rootNode = $ontologyParser->rootNode;
865              
866             =head2 nodeFromId
867              
868             This public method takes a GOID and returns the GO::Node that
869             it corresponds to.
870              
871             Usage :
872              
873             my $node = $ontologyParser->nodeFromId($goid);
874              
875             If the GOID does not correspond to a GO node, then undef will be
876             returned. Note if you try to call any methods on an undef, you will
877             get a fatal runtime error, so if you can't guarantee all GOIDs that
878             you supply are good, you should check that the return value from this
879             method is defined.
880              
881             =head2 numNodes
882              
883             This public method returns the number of nodes that exist with the
884             ontology
885              
886             Usage :
887              
888             my $numNodes = $ontologyParser->numNodes;
889              
890             =head2 serializeToDisk
891              
892             Saves the current state of the Ontology Parser Object to a file, using
893             the Storable package. Saves in network order for portability, just in
894             case. Returns the name of the file. If no filename is provided, then
895             the name of the file (and its directory, if one was provided) used for
896             object construction, will be used, with .obj appended. If the object
897             was instantiated from a file with a .obj suffix, then the same
898             filename would be used, if none were provided.
899              
900             This method currently causes a segfault on MacOSX (at least 10.1.5 ->
901             10.2.3), with perl 5.6, and Storable 1.0.14, when trying to store the
902             process ontology. This failure occurs using either store, or nstore,
903             and is manifested by a segmentation fault. It has not been
904             investigated whether this is a perl problem, or a Storable problem
905             (which has large amounts of C-code). This does not cause a
906             segmentation on Solaris, using perl 5.6.1 and Storable 1.0.13. This
907             does not make it clear whether it is a MacOSX problem or a perl
908             problem or not. It should be noted that newer versions of both perl
909             and Storable exist, and the code should be tested with those as well.
910              
911             Usage:
912              
913             my $objectFile = $ontologyParser->serializeToDisk(filename=>$filename);
914              
915             =head1 Authors
916              
917             Gavin Sherlock; sherlock@genome.stanford.edu
918             Elizabeth Boyle; ell@mit.edu
919             Shuai Weng; shuai@genome.stanford.edu
920              
921             =cut