File Coverage

blib/lib/GO/Node.pm
Criterion Covered Total %
statement 68 127 53.5
branch 10 36 27.7
condition 5 12 41.6
subroutine 18 26 69.2
pod 21 21 100.0
total 122 222 54.9


line stmt bran cond sub pod time code
1             package GO::Node;
2              
3             # File : Node.pm
4             # Author : Gavin Sherlock
5             # Date Begun : December 23rd 2002
6              
7             # $Id: Node.pm,v 1.11 2007/03/18 02:54:46 sherlock Exp $
8              
9             # License information (the MIT license)
10              
11             # Copyright (c) 2003 Gavin Sherlock; Stanford University
12              
13             # Permission is hereby granted, free of charge, to any person
14             # obtaining a copy of this software and associated documentation files
15             # (the "Software"), to deal in the Software without restriction,
16             # including without limitation the rights to use, copy, modify, merge,
17             # publish, distribute, sublicense, and/or sell copies of the Software,
18             # and to permit persons to whom the Software is furnished to do so,
19             # subject to the following conditions:
20              
21             # The above copyright notice and this permission notice shall be
22             # included in all copies or substantial portions of the Software.
23              
24             # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25             # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26             # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27             # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28             # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29             # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30             # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31             # SOFTWARE.
32              
33             =pod
34              
35             =head1 NAME
36              
37             GO::Node - provides information about a node in the Gene Ontology
38              
39             =head1 DESCRIPTION
40              
41             The GO::Node package is intended to be used as a container for
42             information about a node in one of the three Gene Ontologies. It
43             allows the storage of the goid, and immediate parents and children, as
44             well as paths to the top of the ontology. This package provides
45             methods to both store and retrieve that information.
46              
47             It should be strongly noted that clients are not expected to create
48             individual Node objects themselves, but instead should rely in a Node
49             Factory to create nodes and return them. Such a factory would be a
50             concrete subclass of the abstract GO::OntologyProvider package.
51              
52             =head1 TODO
53              
54             The following items needs to be done at some point to make the Node
55             class more flexible, and for it to better model the data.
56              
57             Add in methods to deal with secondary GOIDs
58              
59             Add in methods to allow definitions to be associated with, and
60             retrieved from Nodes.
61              
62             Add in methods to allow dbxrefs to be included.
63              
64             Not require Factories to add the paths to the root, but instead
65             have this class generate those paths from the inherent structure
66             of the graph in which the Nodes sit. This will also be useful to
67             generate paths to leaves/descendants.
68              
69             =cut
70              
71 3     3   247403 use strict;
  3         6  
  3         348  
72 3     3   19 use warnings;
  3         7  
  3         90  
73 3     3   16 use diagnostics;
  3         6  
  3         20  
74              
75 3     3   102 use vars qw ($PACKAGE $VERSION);
  3         7  
  3         7025  
76              
77             $VERSION = 0.16;
78             $PACKAGE = "GO::Node";
79              
80             # CLASS CONSTANTS
81              
82             my $kGoid = $PACKAGE.'::__goid';
83             my $kTerm = $PACKAGE.'::__term';
84              
85             my $kParents = $PACKAGE.'::__parents';
86             my $kChildren = $PACKAGE.'::__children';
87             my $kPaths = $PACKAGE.'::__paths';
88             my $kAncestors = $PACKAGE.'::__ancestors';
89              
90              
91             ##################################################################
92              
93             # The constructor, and associated initialization methods
94              
95             ##################################################################
96             sub new{
97             ##################################################################
98             # This is the constructor for the Node object
99             #
100             # At a minimum, the constructor expects, as named arguments, a GOID
101             # and a GO term, with which to create the node object.
102             #
103             # Usage:
104             #
105             # my $node = GO::Node->new(goid => $goid,
106             # term => $term);
107              
108 25116     25116 1 65517 my ($class, %args) = @_;
109              
110 25116         38299 my $self = {};
111              
112 25116         45829 bless $self, $class;
113              
114 25116 100 66     187371 if (!exists ($args{'goid'}) || !defined ($args{'goid'})){
    100 66        
115              
116 1         4 $self->_handleMissingArgument(argument=>'goid');
117              
118             }elsif (!exists ($args{'term'}) || !defined ($args{'term'})){
119              
120 1         6 $self->_handleMissingArgument(argument=>'term');
121              
122             }
123              
124 25114         61394 $self->{$kGoid} = $args{'goid'};
125 25114         47808 $self->{$kTerm} = $args{'term'};
126              
127 25114         49500 $self->{$kPaths} = [];
128              
129 25114         74830 return $self;
130              
131             }
132              
133             ##################################################################
134             #
135             # PUBLIC SETTER METHODS
136             #
137             ##################################################################
138              
139             ##################################################################
140             sub addChildNodes{
141             ##################################################################
142             # The public setter method allows a client to indicate that an array
143             # of nodes are children of the 'self' node. Only one node per child
144             # goid will get stored.
145             #
146             # Usage:
147             #
148             # $node->addChildNodes(@childNodes);
149              
150 43286     43286 1 50614 my $self = shift;
151              
152 43286         59438 foreach my $node (@_){
153              
154             # store children as a hash, with the goid as the key and the
155             # node itself as the value
156              
157 43286         129355 $self->{$kChildren}{$node->goid} = $node;
158              
159             }
160              
161             }
162              
163             ##################################################################
164             sub addParentNodes{
165             ##################################################################
166             # The public setter method allows a client to indicate that an array
167             # of nodes are parents of the 'self' node. Only one node per parent
168             # goid will get stored.
169             #
170             # Usage:
171             #
172             # $node->addParentNodes(@parentNodes);
173              
174 43286     43286 1 56748 my $self = shift;
175              
176 43286         72066 foreach my $node (@_){
177              
178             # store parents as a hash, with the goid as the key and the
179             # node itself as the value
180              
181 43286         131182 $self->{$kParents}{$node->goid} = $node;
182              
183             }
184              
185             }
186              
187             ##################################################################
188             sub addPathToRoot{
189             ##################################################################
190              
191             # This public setter method expects an array of nodes, that indicates
192             # a direct path to the root of the ontology. The array should not
193             # contain the self node, but should contain the root node. The last
194             # entry in the array is expected to be an immediate parent of the self
195             # node, while the first entry is expected to be the root node itself.
196             # This method will NOT check to see if the supplied path has not
197             # already been added. It is the Node Factory's responsibility to only
198             # add a unique path once. Furthermore, it will not check whether
199             # there is consistency between addedPaths and addedParents (this can
200             # be done using the isValid method though).
201              
202             #
203             # Usage:
204             #
205             # $node->addPathToRoot(@nodes);
206             #
207              
208 624132     624132 1 2268398 my ($self, @nodes) = @_;
209              
210 624132         622162 push (@{$self->{$kPaths}}, \@nodes);
  624132         2205524  
211              
212             }
213              
214             ##################################################################
215             #
216             # PUBLIC ACCESSSOR METHODS
217             #
218             ##################################################################
219              
220             ##################################################################
221             sub goid{
222             ##################################################################
223             # This public method returns the goid associated with the node.
224             #
225             # Usage:
226             #
227             # my $goid = $node->goid;
228              
229 1702074     1702074 1 6217476 return $_[0]->{$kGoid};
230              
231             }
232              
233             ##################################################################
234             sub term{
235             ##################################################################
236             # This public method returns the term associated with the node.
237             #
238             # Usage:
239             #
240             # my $goid = $node->term;
241              
242 524     524 1 2165 return $_[0]->{$kTerm};
243              
244             }
245              
246             ##################################################################
247             sub childNodes{
248             ##################################################################
249             # This public method returns an array of child nodes for the self
250             # node.
251             #
252             # Usage:
253             #
254             # my @childNodes = $node->childNodes;
255              
256 127354     127354 1 151683 return (values %{$_[0]->{$kChildren}});
  127354         522419  
257              
258             }
259              
260             ##################################################################
261             sub parentNodes{
262             ##################################################################
263             # This public method returns an array of parent nodes for the self
264             # node.
265             #
266             # Usage:
267             #
268             # my @parentNodes = $node->parentNodes;
269              
270 12558     12558 1 16644 return (values %{$_[0]->{$kParents}});
  12558         81379  
271              
272             }
273              
274             ##################################################################
275             sub pathsToRoot{
276             ##################################################################
277             # This public method returns an array of references to arrays, each of
278             # which contains the nodes in a path between the self node and the
279             # root. The self node is not included in the paths, but the root node
280             # is. The first node in each array is the most distant ancestor (the
281             # root), the last node is an immediate parent. If there are no paths
282             # to the root (i.e. it is the root node) then an empty array will be
283             # returned.
284             #
285             # Usage:
286             #
287             # my @pathsToRoot = $node->pathsToRoot;
288              
289 13761     13761 1 13104 return (@{$_[0]->{$kPaths}});
  13761         106720  
290              
291             }
292              
293             ##################################################################
294             sub pathsToAncestor{
295             ##################################################################
296             # This public method returns an array of references to arrays, each of
297             # which contains the nodes in a path between the self node and the
298             # specified ancestor. The self node is not included paths, but the
299             # specified ancestor node is. The first node in each array is the
300             # specified ancestor, the last node is an immediate parent. If there
301             # are no paths to the ancestor then an empty array will be returned.
302             #
303             # Usage:
304             #
305             # my @pathsToAncestor = $node->pathsToAncestor($ancestorNode);
306              
307 0     0 1 0 my ($self, $ancestor) = @_;
308              
309 0 0       0 return () if (!$self->isADescendantOf($ancestor)); # NOTE early return
310              
311 0         0 my @paths;
312            
313 0         0 foreach my $path ($self->pathsToRoot){ # examine paths to root
314              
315 0         0 foreach (my $j = 0; $j< @{$path}; $j++){
  0         0  
316              
317 0 0       0 if ($path->[$j] == $ancestor){ # if it's the node we want
318              
319             # we want the array from this point to the end
320            
321 0         0 push (@paths, [@{$path}[$j..@{$path}-1]]); # array slice
  0         0  
  0         0  
322            
323 0         0 last; # no need to look further
324              
325             }
326              
327             }
328              
329             }
330              
331             # now we have to unique the paths, as there may be some redundancy
332             # should check cookbook to see if there's a better way to do this
333              
334 0         0 my (%duplicates, @uniquePaths);
335              
336 0         0 foreach (my $i = 0; $i < @paths - 1 ; $i++){
337              
338 0 0       0 next if exists $duplicates{$i};
339              
340             INNER:
341              
342 0         0 foreach (my $j = $i+1; $j < @paths; $j++){
343              
344 0 0       0 next if exists $duplicates{$j};
345              
346             # can't be the same if different sizes
347              
348 0 0       0 next INNER if scalar @{$paths[$i]} != scalar @{$paths[$j]};
  0         0  
  0         0  
349              
350             # now compare each member of the arrays
351              
352 0         0 for (my $k = 0; $k < @{$paths[$i]}; $k++){
  0         0  
353              
354             # can't be the same if any two members are different
355              
356 0 0       0 next INNER if $paths[$i][$k] != $paths[$j][$k];
357              
358             }
359              
360             # if we get here, path j must be the same as i
361              
362 0         0 $duplicates{$j} = undef; # so we'll eliminate it from future consideration
363              
364             }
365              
366             }
367              
368 0         0 for (my $i = 0; $i < @paths; $i++){
369              
370 0 0       0 next if exists $duplicates{$i};
371              
372 0         0 push (@uniquePaths, $paths[$i]);
373              
374             }
375              
376 0         0 return @uniquePaths;
377              
378             }
379              
380             ##################################################################
381             sub ancestors{
382             ##################################################################
383             # This public method returns an array of unique GO::Nodes which
384             # are the unique ancestors that a node has. These ancestors will be
385             # derived from the paths to the root node that have been added to the
386             # node.
387              
388 32710     32710 1 43104 my $self = shift;
389              
390 32710 100       84425 if (!exists $self->{$kAncestors}){
391              
392 1205         1445 my %ancestors;
393            
394 1205         2790 foreach my $path ($self->pathsToRoot){
395            
396 23421         30593 foreach my $node (@{$path}){
  23421         40636  
397            
398 220728         368818 $ancestors{$node->goid} = $node;
399            
400             }
401            
402             }
403              
404 1205         4475 $self->{$kAncestors} = \%ancestors;
405              
406             }
407              
408 32710         39588 return (values %{$self->{$kAncestors}});
  32710         300762  
409              
410             }
411              
412             ##################################################################
413             sub lengthOfLongestPathToRoot{
414             ##################################################################
415             # This public method returns the length of the longest path to the
416             # root of the ontology from the node. If the node is in fact the root,
417             # then a value of zero will be returned.
418             #
419             # Usage:
420             #
421             # my $length = $node->lengthOfLongestPathToRoot;
422              
423 0     0 1 0 my $self = shift;
424              
425 0         0 my $maxLength = 0;
426              
427 0         0 foreach my $path ($self->pathsToRoot){
428              
429 0 0       0 $maxLength = scalar (@{$path}) if (scalar (@{$path}) > $maxLength);
  0         0  
  0         0  
430              
431             }
432              
433 0         0 return $maxLength;
434              
435             }
436              
437             ##################################################################
438             sub lengthOfShortestPathToRoot{
439             ##################################################################
440             # This public method returns the length of the shortest path to the
441             # root of the ontology from the node. If the node is in fact the root,
442             # then a value of zero will be returned.
443             #
444             # Usage:
445             #
446             # my $length = $node->lengthOfShortestPathToRoot;
447              
448 0     0 1 0 my $self = shift;
449              
450 0         0 my $minLength;
451              
452 0         0 foreach my $path ($self->pathsToRoot){
453              
454 0 0 0     0 $minLength = scalar (@{$path}) if (!defined $minLength || scalar (@{$path}) < $minLength);
  0         0  
  0         0  
455              
456             }
457              
458 0         0 return $minLength;
459              
460             }
461              
462             ##################################################################
463             sub meanLengthOfPathsToRoot{
464             ##################################################################
465             # This public method returns the mean length of all paths to the
466             # root node. If the node is in fact the root, then a value of zero
467             # will be returned.
468             #
469             # Usage:
470             #
471             # my $length = $node->meanLengthOfPathsToRoot;
472              
473 0     0 1 0 my $self = shift;
474              
475 0         0 my $total = 0;
476 0         0 my $num = 0;
477              
478 0         0 foreach my $path ($self->pathsToRoot){
479              
480 0         0 $total += scalar (@{$path});
  0         0  
481 0         0 $num++;
482              
483             }
484              
485 0         0 my $average = 0;
486              
487 0 0       0 if ($num){
488              
489 0         0 $average = $total/$num;
490              
491             }
492              
493 0         0 return $average;
494              
495             }
496            
497              
498             # Methods returning a boolean
499              
500             ##################################################################
501             sub isValid{
502             ##################################################################
503             # This method can be used to check that a node has been constructed
504             # correctly. It checks that it is a child of all its parents, and a parent
505             # of all of it's children. In addition, it checks that parents exist as
506             # the most recent ancestors of the node in its paths to the root node,
507             # and vice versa.
508              
509 12556     12556 1 51461 my $self = shift;
510              
511 12556         19395 my $isValid = 1; # assume there'll be no problems
512              
513             # check we're a child of each parent
514              
515 12556         29264 foreach my $parent ($self->parentNodes){
516              
517 21643 50       43741 $isValid = 0 unless $parent->isAParentOf($self);
518              
519             }
520              
521             # check we're a parent of each child
522              
523 12556         31755 foreach my $child ($self->childNodes){
524              
525 21643 50       44666 $isValid = 0 unless $child->isAChildOf($self);
526              
527             }
528              
529             # check that the most recent ancestor in each path is a parent
530              
531 12556         29954 foreach my $path ($self->pathsToRoot){
532              
533 312066 50       717106 $isValid = 0 unless $path->[-1]->isAParentOf($self);
534 312066 50       723299 $isValid = 0 unless $self->isAChildOf($path->[-1]);
535              
536             }
537              
538 12556         56835 return $isValid;
539              
540             }
541              
542             ##################################################################
543             sub isAParentOf{
544             ##################################################################
545             # This public method returns a boolean to indicate whether a node
546             # has the supplied node as a child.
547             #
548             # Usage :
549             #
550             # if ($node->isAParentOf($anotherNode)){
551             #
552             # # blah
553             #
554             # }
555              
556 333709     333709 1 449075 my ($self, $child) = @_;
557              
558 333709         780090 return exists $self->{$kChildren}{$child->goid};
559              
560             }
561              
562             ##################################################################
563             sub isAChildOf{
564             ##################################################################
565             # This public method returns a boolean to indicate whether a node
566             # has the supplied node as a parent.
567             #
568             # Usage :
569             #
570             # if ($node->isAChildOf($anotherNode)){
571             #
572             # # blah
573             #
574             # }
575              
576 333709     333709 1 445421 my ($self, $parent) = @_;
577              
578 333709         765310 return exists $self->{$kParents}{$parent->goid};
579              
580             }
581              
582             ##################################################################
583             sub isAnAncestorOf{
584             ##################################################################
585             # This method returns a boolean to indicate whether a node is an
586             # ancestor of another.
587             #
588             # Usage:
589             #
590             # if ($node->isAnAncestorOf($anotherNode)){
591             #
592             # # blah
593             #
594             # }
595              
596 0     0 1 0 my ($self, $descendant) = @_;
597              
598 0         0 return $descendant->isADescendantOf($self);
599              
600             }
601              
602             ##################################################################
603             sub isADescendantOf{
604             ##################################################################
605             # This method returns a boolean to indicate whether a node is a
606             # descendant of another.
607             #
608             # Usage:
609             #
610             # if ($node->isADescendantOf($anotherNode)){
611             #
612             # # blah
613             #
614             # }
615              
616 0     0 1 0 my ($self, $ancestor) = @_;
617              
618             # make sure ancestors get stored in ourself, if not already
619              
620 0 0       0 $self->ancestors if (!exists $self->{$kAncestors});
621              
622             # then check if the possible ancestor is in there
623              
624 0         0 return (exists $self->{$kAncestors}{$ancestor->goid});
625              
626             }
627              
628             ##################################################################
629             sub isLeaf{
630             ##################################################################
631             # This method returns a boolean to indicate whether a node is a leaf
632             # in the ontology (i.e. it has no children).
633             #
634             # Usage:
635             #
636             # if ($node->isLeaf){
637             #
638             # # blah
639             #
640             # }
641              
642 0     0 1 0 return (!exists $_[0]->{$kChildren});
643              
644             }
645              
646             ##################################################################
647             sub isRoot{
648             #####################################################################
649             # This method returns a boolean to indicate whether a node is the root
650             # in the ontology (i.e. it has no parents).
651             #
652             # Usage:
653             #
654             # if ($node->isRoot){
655             #
656             # # blah
657             #
658             # }
659              
660 0     0 1 0 return (!exists $_[0]->{$kParents});
661              
662             }
663              
664             =pod
665              
666             =head1 Protected Methods
667              
668             =cut
669              
670             # need to make this code common to all objects, or to
671             # start using something like Params-Validate
672              
673             ############################################################################
674             sub _handleMissingArgument{
675             ############################################################################
676             =pod
677              
678             =head2 _handleMissingArgument
679              
680             This protected method simply provides a simple way for concrete
681             subclasses to deal with missing arguments from method calls. It will
682             die with an appropriate error message.
683              
684             Usage:
685              
686             $self->_handleMissingArgument(argument=>'blah');
687              
688             =cut
689             ##############################################################################
690              
691 2     2   8 my ($self, %args) = @_;
692              
693 2   33     7 my $arg = $args{'argument'} || $self->_handleMissingArgument(argument=>'argument');
694              
695 2         14 my $receiver = (caller(1))[3];
696 2         8 my $caller = (caller(2))[3];
697              
698 2         25 die "The method $caller did not provide a value for the '$arg' argument for the $receiver method";
699              
700             }
701              
702             1; # To keep Perl happy
703              
704              
705             __END__