File Coverage

blib/lib/UMLS/Interface/PathFinder.pm
Criterion Covered Total %
statement 12 1603 0.7
branch 0 674 0.0
condition 0 243 0.0
subroutine 4 44 9.0
pod 0 1 0.0
total 16 2565 0.6


line stmt bran cond sub pod time code
1             # UMLS::Interface::PathFinder
2             # (Last Updated $Id: PathFinder.pm,v 1.67 2015/12/29 17:51:41 btmcinnes Exp $)
3             #
4             # Perl module that provides a perl interface to the
5             # Unified Medical Language System (UMLS)
6             #
7             # Copyright (c) 2004-2010,
8             #
9             # Bridget T. McInnes, University of Minnesota Twin Cities
10             # bthomson at cs.umn.edu
11             #
12             # Siddharth Patwardhan, University of Utah, Salt Lake City
13             # sidd at cs.utah.edu
14             #
15             # Serguei Pakhomov, University of Minnesota Twin Cities
16             # pakh0002 at umn.edu
17             #
18             # Ted Pedersen, University of Minnesota, Duluth
19             # tpederse at d.umn.edu
20             #
21             # Ying Liu, University of Minnesota
22             # liux0935 at umn.edu
23             #
24             # This program is free software; you can redistribute it and/or
25             # modify it under the terms of the GNU General Public License
26             # as published by the Free Software Foundation; either version 2
27             # of the License, or (at your option) any later version.
28             #
29             # This program is distributed in the hope that it will be useful,
30             # but WITHOUT ANY WARRANTY; without even the implied warranty of
31             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32             # GNU General Public License for more details.
33             #
34             # You should have received a copy of the GNU General Public License
35             # along with this program; if not, write to
36             #
37             # The Free Software Foundation, Inc.,
38             # 59 Temple Place - Suite 330,
39             # Boston, MA 02111-1307, USA.
40              
41             package UMLS::Interface::PathFinder;
42              
43 24     24   87 use Fcntl;
  24         24  
  24         4251  
44 24     24   93 use strict;
  24         25  
  24         430  
45 24     24   69 use warnings;
  24         22  
  24         549  
46 24     24   73 use bytes;
  24         31  
  24         108  
47              
48             my $pkg = "UMLS::Interface::PathFinder";
49              
50             my $debug = 0;
51              
52             my $max_depth = -1;
53              
54             my $root = "";
55              
56             my $option_verbose = 0;
57             my $option_forcerun = 0;
58             my $option_realtime = 0;
59             my $option_t = 0;
60             my $option_debugpath = 0;
61             my $option_cuilist = 0;
62             my $option_undirected = 0;
63              
64             my $errorhandler = "";
65             my $cuifinder = "";
66              
67             my %maximumDepths = ();
68              
69             local(*DEBUG_FILE);
70              
71             # UMLS-specific stuff ends ----------
72              
73             # -------------------- Class methods start here --------------------
74              
75             # method to create a new UMLS::Interface::PathFinder object
76             sub new {
77              
78 0     0 0   my $self = {};
79 0           my $className = shift;
80 0           my $params = shift;
81 0           my $handler = shift;
82            
83             # bless the object.
84 0           bless($self, $className);
85              
86             # initialize the global variables
87 0           $self->_initializeGlobalVariables();
88              
89             # initialize error handler
90 0           $errorhandler = UMLS::Interface::ErrorHandler->new();
91 0 0         if(! defined $errorhandler) {
92 0           print STDERR "The error handler did not get passed properly.\n";
93 0           exit;
94             }
95              
96             # initialize the cuifinder
97 0           $cuifinder = $handler;
98 0 0         if(! (defined $handler)) {
99 0           $errorhandler->_error($pkg,
100             "new",
101             "The CuiFinder handler did not get passed properly",
102             8);
103             }
104              
105             #iInitialize the object.
106 0           $self->_initialize($params);
107              
108 0           return $self;
109             }
110              
111             sub _initializeGlobalVariables {
112              
113 0     0     $debug = 0;
114            
115 0           $max_depth = -1;
116            
117 0           $root = "";
118            
119 0           $option_verbose = 0;
120 0           $option_forcerun = 0;
121 0           $option_realtime = 0;
122 0           $option_t = 0;
123 0           $option_debugpath = 0;
124 0           $option_cuilist = 0;
125 0           $option_undirected = 0;
126            
127 0           $errorhandler = "";
128 0           $cuifinder = "";
129            
130 0           %maximumDepths = ();
131             }
132              
133             # Method to initialize the UMLS::Interface::PathFinder object.
134             sub _initialize {
135              
136 0     0     my $self = shift;
137 0           my $params = shift;
138              
139             # set function name
140 0           my $function = "_initialize";
141 0           &_debug($function);
142            
143             # check self
144 0 0 0       if(!defined $self || !ref $self) {
145 0           $errorhandler->_error($pkg, $function, "", 2);
146             }
147            
148             # get the umlsinterfaceindex database from CuiFinder
149 0           my $sdb = $cuifinder->_getIndexDB();
150 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
151 0           $self->{'sdb'} = $sdb;
152            
153             # get the root
154 0           $root = $cuifinder->_root();
155              
156             # set up the options
157 0           $self->_setOptions($params);
158             }
159              
160             # method to set the realtime global parameter options
161             # input : bool <- 1 (turn on) 0 (turn off)
162             # output:
163             sub _setRealtimeOption {
164              
165 0     0     my $self = shift;
166 0           my $option = shift;
167              
168 0           my $function = "_setRealtimeOption";
169 0           &_debug($function);
170              
171             # check self
172 0 0 0       if(!defined $self || !ref $self) {
173 0           $errorhandler->_error($pkg, $function, "", 2);
174             }
175              
176 0 0         if($option == 1) {
177 0           $option_realtime = 1;
178             }
179             else {
180 0           $option_realtime = 0;
181             }
182             }
183              
184             # method to set the undirected global parameter options
185             # input : bool <- 1 (turn on) 0 (turn off)
186             # output:
187             sub _setUndirectedOption {
188              
189 0     0     my $self = shift;
190 0           my $option = shift;
191              
192 0           my $function = "_setUndirectedOption";
193 0           &_debug($function);
194              
195             # check self
196 0 0 0       if(!defined $self || !ref $self) {
197 0           $errorhandler->_error($pkg, $function, "", 2);
198             }
199              
200 0 0         if($option == 1) {
201 0           $option_undirected = 1;
202             }
203             else {
204 0           $option_undirected = 0;
205             }
206             }
207              
208             # method to set the global parameter options
209             # input : $params <- reference to a hash
210             # output:
211             sub _setOptions {
212              
213 0     0     my $self = shift;
214 0           my $params = shift;
215              
216 0           my $function = "_setOptions";
217 0           &_debug($function);
218              
219             # check self
220 0 0 0       if(!defined $self || !ref $self) {
221 0           $errorhandler->_error($pkg, $function, "", 2);
222             }
223              
224             # get all the parameters
225 0           my $forcerun = $params->{'forcerun'};
226 0           my $verbose = $params->{'verbose'};
227 0           my $realtime = $params->{'realtime'};
228 0           my $debugoption = $params->{'debug'};
229 0           my $t = $params->{'t'};
230 0           my $debugpath = $params->{'debugpath'};
231 0           my $cuilist = $params->{'cuilist'};
232 0           my $undirected = $params->{'undirected'};
233              
234 0           my $output = "";
235 0 0 0       if(defined $forcerun || defined $verbose || defined $realtime ||
      0        
      0        
      0        
      0        
236             defined $debugoption || defined $debugpath || defined $cuilist) {
237 0           $output .= "\nPathFinder User Options:\n";
238             }
239              
240             # check if the debug option has been been defined
241 0 0         if(defined $debugoption) {
242 0           $debug = 1;
243 0           $output .= " --debug option set\n";
244             }
245            
246             # print debug if it has been set
247 0           &_debug($function);
248              
249 0 0         if(defined $t) {
250 0           $option_t = 1;
251             }
252              
253             # check if the undirected option is set for shortest path
254 0 0         if(defined $undirected) {
255 0           $option_undirected = 1;
256 0           $output .= " --undirected option set\n";
257             }
258            
259             # check if the cuilist option has been defined
260 0 0         if(defined $cuilist) {
261 0           $option_cuilist = 1;
262 0           $output .= " --cuilist option set\n";
263             }
264            
265            
266             # check if debugpath option
267 0 0         if(defined $debugpath) {
268 0           $option_debugpath = 1;
269 0           $output .= " --debugpath $debugpath\n";
270 0 0         open(DEBUG_FILE, ">$debugpath") ||
271             die "Could not open depthpath file $debugpath\n";
272             }
273              
274             # check if the realtime option has been identified
275 0 0         if(defined $realtime) {
276 0           $option_realtime = 1;
277            
278 0           $output .= " --realtime option set\n";
279             }
280              
281             # check if verbose run has been identified
282 0 0         if(defined $verbose) {
283 0           $option_verbose = 1;
284            
285 0           $output .= " --verbose option set\n";
286             }
287              
288             # check if a forced run has been identified
289 0 0         if(defined $forcerun) {
290 0           $option_forcerun = 1;
291            
292 0           $output .= " --forcerun option set\n";
293             }
294              
295 0 0         if($option_t == 0) {
296 0           print STDERR "$output\n";
297             }
298             }
299              
300             # method to return the maximum depth of a taxonomy.
301             # input :
302             # output: $int <- string containing the max depth
303             sub _depth {
304              
305 0     0     my $self = shift;
306            
307 0           my $function = "_depth";
308 0           &_debug($function);
309              
310             # check self
311 0 0 0       if(!defined $self || !ref $self) {
312 0           $errorhandler->_error($pkg, $function, "", 2);
313             }
314              
315             # get the depth and set the path information
316 0 0         if($max_depth >= 0) {
317 0           return $max_depth;
318             }
319              
320             # check if it is in the info table and if it is return
321             # that otherwise we need to find the maximum depth and
322             # then store it here
323              
324             # get the info table name
325 0           my $infoTableName = $cuifinder->_getInfoTableName();
326            
327             # set the index DB handler
328 0           my $sdb = $self->{'sdb'};
329 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
330              
331             # get maximum depth from the info table
332 0           my $arrRef = $sdb->selectcol_arrayref("select INFO from $infoTableName where ITEM=\'DEPTH\'");
333 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
334              
335             # get the depth from the array
336 0           my $depth = shift @{$arrRef};
  0            
337              
338             # if the depth was there set the maximum depth and return it
339             # otherwise we are off to find it either in realtime or through
340             # the database depending on the user options
341 0 0         if(defined $depth) {
342 0           $max_depth = $depth;
343 0           return $max_depth;
344             }
345              
346             # find the depth in realtime
347 0 0         if($option_realtime) {
348 0           my @array = ();
349 0           my %visited = ();
350              
351 0           $self->_getMaxDepth($root, 0, \@array, \%visited);
352            
353             # the _getMaxDepth method does a DFS over the entire
354             # heirarchy - I am not certain a way around this yet
355             # but while we were add I stored the maximum depth
356             # of each of the CUIs in a hash since there is not a
357             # quick way of determining this in realtime as of yet
358            
359             # we are going to store them in the info table. This is
360             # hopefully a temporary solution until I can figure out
361             # a way to speed up getMaximumDepthInRealTime - if we
362             # run out of room with this, I will just keep the hash
363             # and then have to go through the
364 0           foreach my $cui (sort keys %maximumDepths) {
365 0           my $d = $maximumDepths{$cui};
366 0           $sdb->do("INSERT INTO $infoTableName (ITEM, INFO) VALUES ('$cui', '$d')");
367 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
368             }
369             }
370             # otherwise find it in the database
371             else {
372 0           $self->_setIndex();
373             }
374              
375             # at this point we have the max depth and the variable has been set
376             # so we are going to insert this into the info table and then return it
377 0           $sdb->do("INSERT INTO $infoTableName (ITEM, INFO) VALUES ('DEPTH', '$max_depth')");
378 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
379              
380             # return the maximum depth
381 0           return $max_depth;
382             }
383              
384             # recursive method to obtain the maximum depth in realtime
385             # input : $concept <- string containing cui
386             # $d <- string containing the depth of the cui
387             # $array <- reference to an array containing the current path
388             # output: $concept <- string containing cui
389             # $int <- string containing the depth of the cui
390             # $array <- reference to an array containing the current path
391             sub _getMaxDepth {
392              
393 0     0     my $self = shift;
394 0           my $concept = shift;
395 0           my $d = shift;
396 0           my $array = shift;
397 0           my $hash = shift;
398              
399 0           my $function = "_getMaxDepth";
400              
401             # check self
402 0 0 0       if(!defined $self || !ref $self) {
403 0           $errorhandler->_error($pkg, $function, "", 2);
404             }
405              
406             # check concept was obtained
407 0 0         if(!$concept) {
408 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
409             }
410            
411             # check if valid concept
412 0 0         if(! ($errorhandler->_validCui($concept)) ) {
413 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
414             }
415              
416             # check that the concept is not a forbidden concept
417 0 0         if($cuifinder->_forbiddenConcept($concept) == 1) { return; }
  0            
418              
419             # set up the new path
420 0           my @path = @{$array};
  0            
421 0           push @path, $concept;
422 0           my $series = join " ", @path;
423              
424             # if have already been here - leave
425 0 0         if(exists ${$hash}{$concept}{$series}) { return; }
  0            
  0            
426 0           else { ${$hash}{$concept}{$series}++; }
  0            
427              
428            
429             # check to see if it is the max depth
430 0 0         if(($d) > $max_depth) { $max_depth = $d; }
  0            
431              
432             # increment the depth
433 0           $d++;
434            
435             # add to the depths hash - if we are going to go through the trouble
436             # of having to do a depth first search - we might as well find the
437             # the maximum depths of all the cuis and store these in the database
438             # I am going to see if I can store them in a hash and then dump the
439             # hash in the database when I am finished. This way we don't have
440             # to continually access the database which is really not an acceptable
441             # solution
442 0 0         if(! (exists $maximumDepths{$concept}) ) { $maximumDepths{$concept} = $d; }
  0 0          
443 0           elsif($maximumDepths{$concept} < $d) { $maximumDepths{$concept} = $d; }
444              
445             # get all the children
446 0           my $children = $cuifinder->_getChildren($concept);
447            
448             # search through the children
449 0           foreach my $child (@{$children}) {
  0            
450            
451             # check if child cui has already in the path
452 0 0         if($series=~/$child/) { next; }
  0            
453 0 0         if($child eq $concept) { next; }
  0            
454            
455             # if it isn't continue on with the depth first search
456 0           $self->_getMaxDepth($child, $d, \@path, $hash);
457             }
458             }
459              
460             # method to find all the paths from a concept to
461             # the root node of the is-a taxonomy.
462             # input : $concept <- string containing cui
463             # output: $array <- array reference containing the paths
464             sub _pathsToRoot {
465              
466 0     0     my $self = shift;
467 0           my $concept = shift;
468              
469 0           my $function = "_pathsToRoot";
470 0           &_debug($function);
471              
472             # check self
473 0 0 0       if(!defined $self || !ref $self) {
474 0           $errorhandler->_error($pkg, $function, "", 2);
475             }
476              
477             # check parameter exists
478 0 0         if(!defined $concept) {
479 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
480             }
481            
482             # check if valid concept
483 0 0         if(! ($errorhandler->_validCui($concept)) ) {
484 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
485             }
486              
487             # get the relations from the configuration file
488 0           my $configrel = $cuifinder->_getRelString();
489 0           $configrel=~/(REL) (\:\:) (include|exclude) (.*?)$/;
490 0           my $relationstring = $4;
491              
492             # check to make certain the configuration file only contains
493             # heirarchical relations (PAR/CHD or RB/RN).
494             #my @relations = split/\s*\,\s*/, $relationstring;
495             #foreach my $rel (@relations) {
496             #if(! ($rel=~/(PAR|CHD|RB|RN)/) ) {
497             #$errorhandler->_error($pkg, $function, "Method only supports heirarhical relations (PAR/CHD or RB/RN).", 10);
498             #}
499             #}
500            
501             # if the realtime option is set get the paths otherwise
502             # they are or should be stored in the database
503 0           my $paths = "";
504 0 0         if($option_realtime) {
505 0           $paths = $self->_getPathsToRootInRealtime($concept);
506             }
507             else {
508 0           $paths = $self->_getPathsToRootFromIndex($concept);
509             }
510            
511 0           return $paths
512             }
513              
514             # returns all the paths to the root from the concept
515             # this information is stored in the index - if it is
516             # not then the index is created
517             # input : $string <- string containing the cui (assumed correct)
518             # output: $array <- reference to an array containing the paths
519             sub _getPathsToRootFromIndex {
520 0     0     my $self = shift;
521 0           my $concept = shift;
522              
523 0           my $function = "_getPathsToRootFromIndex";
524            
525             # check self
526 0 0 0       if(!defined $self || !ref $self) {
527 0           $errorhandler->_error($pkg, $function, "", 2);
528             }
529              
530             # if the concept is the root then return root
531 0 0         if($concept eq $root) {
532 0           my @array = (); push @array, $root;
  0            
533 0           return \@array;
534             }
535              
536             # set the index DB handler
537 0           my $sdb = $self->{'sdb'};
538 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
539            
540             # create the index if it hasn't been created
541 0           $self->_setIndex();
542              
543             # get the table name
544 0           my $tableName = $cuifinder->_getTableName();
545            
546             # get the paths from the database
547 0           my $paths = $sdb->selectcol_arrayref("select PATH from $tableName where CUI=\'$concept\'");
548 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
549            
550 0           return $paths;
551             }
552              
553             # check the index to make certain it is load properly
554             # input :
555             # outupt:
556             sub _checkIndex {
557              
558              
559 0     0     my $self = shift;
560 0           my $tableFile = shift;
561 0           my $tableName = shift;
562 0           my $tableNameHuman = shift;
563              
564 0           my $function = "_checkIndex";
565 0           &_debug($function);
566              
567             # check self
568 0 0 0       if(!defined $self || !ref $self) {
569 0           $errorhandler->_error($pkg, $function, "", 2);
570             }
571            
572             # check the input variables
573 0 0 0       if(!$tableFile || !$tableName || !$tableNameHuman) {
      0        
574 0           $errorhandler->_error($pkg, $function, "Error with input variables.", 4);
575             }
576              
577             # set the auxillary database that holds the path information
578 0           my $sdb = $self->{'sdb'};
579 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
580              
581             # extract the check
582 0           my $arrRef = $sdb->selectcol_arrayref("select CUI from $tableName where CUI=\'CHECK\'");
583            
584 0           my $count = $#{$arrRef};
  0            
585            
586 0 0         if($count != 0) {
587 0           my $str = "Index did not complete. Remove using the removeConfigData.pl program and re-run.";
588 0           $errorhandler->_error($pkg, $function, $str, 9);
589             }
590            
591             }
592              
593             # load the index in realtime
594             # input :
595             # outupt:
596             sub _createIndex {
597              
598              
599 0     0     my $self = shift;
600 0           my $tableFile = shift;
601 0           my $tableName = shift;
602 0           my $tableNameHuman = shift;
603              
604 0           my $function = "_createIndex";
605 0           &_debug($function);
606              
607             # check self
608 0 0 0       if(!defined $self || !ref $self) {
609 0           $errorhandler->_error($pkg, $function, "", 2);
610             }
611            
612             # check the input variables
613 0 0 0       if(!$tableFile || !$tableName || !$tableNameHuman) {
      0        
614 0           $errorhandler->_error($pkg, $function, "Error with input variables.", 4);
615             }
616              
617             # set the auxillary database that holds the path information
618 0           my $sdb = $self->{'sdb'};
619 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
620              
621 0           print STDERR "You have requested path information about a concept. In\n";
622 0           print STDERR "order to obtain this information we need to create an \n";
623 0           print STDERR "index or resubmit this command using --realtime. Creating\n";
624 0           print STDERR "an index can be very time-consuming, but once it is built\n";
625 0           print STDERR "your commands will run faster than with --realtime.\n\n";
626              
627 0 0         if($option_forcerun == 0) {
628 0           print STDERR "Do you want to continue with index creation (y/n)";
629            
630 0           my $answer = ; chomp $answer;
  0            
631            
632 0 0         if($answer=~/(N|n)/) {
633 0           print STDERR "Exiting program now.\n\n";
634 0           exit;
635             }
636             }
637             else {
638 0           print "Running index ... \n";
639             }
640            
641            
642             # create the table in the umls database
643 0           $sdb->do("CREATE TABLE IF NOT EXISTS $tableName (CUI char(8), DEPTH int, PATH varchar(450))");
644 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
645            
646             # insert the name into the index
647 0           $sdb->do("INSERT INTO tableindex (TABLENAME, HEX) VALUES ('$tableNameHuman', '$tableName')");
648 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
649              
650             # for each root - this is for when we allow multiple roots
651             # right now though we only have one - the umlsRoot
652 0           $self->_initializeDepthFirstSearch($root, 0, $root);
653              
654             # add a check that the DFS has finished
655 0           $sdb->do("INSERT INTO $tableName (CUI, DEPTH, PATH) VALUES(\'CHECK\', '0', \'\')");
656 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
657              
658             # create index on the newly formed table
659 0           my $indexname = "$tableName" . "_CUIINDEX";
660 0           my $index = $sdb->do("create index $indexname on $tableName (CUI)");
661 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
662              
663 0           print "Index created.\n\n";
664             }
665              
666             # creates the index containing all of the path to root information
667             # for each concept in the sources and relations specified in the
668             # configuration file
669             # input :
670             # output:
671             sub _setIndex {
672              
673 0     0     my $self = shift;
674              
675 0           my $function = "_setIndex";
676 0           &_debug($function);
677            
678             # check self
679 0 0 0       if(!defined $self || !ref $self) {
680 0           $errorhandler->_error($pkg, $function, "", 2);
681             }
682              
683 0           my $tableName = $cuifinder->_getTableName();
684 0           my $tableFile = $cuifinder->_getTableFile();
685 0           my $tableNameHuman = $cuifinder->_getTableNameHuman();
686              
687             # if the path infomration has not been stored
688 0 0         if(! ($cuifinder->_checkTableExists($tableName))) {
689            
690             # otherwise create the tableFile and put the information in the
691             # file and the database
692 0           $self->_createIndex($tableFile, $tableName, $tableNameHuman);
693            
694             }
695              
696             # check Index
697 0           $self->_checkIndex($tableFile, $tableName, $tableNameHuman);
698              
699             # set the maximum depth
700 0           $self->_setMaximumDepth();
701             }
702              
703             # set the maximum depth variable
704             # input :
705             # output:
706             sub _setMaximumDepth {
707 0     0     my $self = shift;
708              
709 0           my $function = "_setMaximumDepth";
710            
711             # check self
712 0 0 0       if(!defined $self || !ref $self) {
713 0           $errorhandler->_error($pkg, $function, "", 2);
714             }
715              
716             # set the auxillary database that holds the path information
717 0           my $sdb = $self->{'sdb'};
718 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
719              
720             # get the table name
721 0           my $tableName = $cuifinder->_getTableName();
722              
723             # set the maximum depth
724 0           my $d = $sdb->selectcol_arrayref("select max(DEPTH) from $tableName");
725 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
726              
727 0           $max_depth = shift @{$d};
  0            
728             }
729              
730             # print out the function name to standard error
731             # input : $function <- string containing function name
732             # output:
733             sub _debug {
734 0     0     my $function = shift;
735 0 0         if($debug) { print STDERR "In UMLS::Interface::PathFinder::$function\n"; }
  0            
736             }
737              
738             # A Depth First Search (DFS) in order to determine
739             # the maximum depth of the taxonomy and obtain
740             # all of the path information
741             # input :
742             # output:
743             sub _initializeDepthFirstSearch {
744              
745 0     0     my $self = shift;
746 0           my $concept = shift;
747 0           my $d = shift;
748 0           my $root = shift;
749            
750 0           my $function = "_initializeDepthFirstSearch";
751 0           &_debug($function);
752              
753             # check self
754 0 0 0       if(!defined $self || !ref $self) {
755 0           $errorhandler->_error($pkg, $function, "", 2);
756             }
757            
758             # check the parameters are defined
759 0 0 0       if(!(defined $concept) || !(defined $d) || !(defined $root)) {
      0        
760 0           $errorhandler->_error($pkg, $function, "Error with input variables.", 4);
761             }
762              
763             # check valid concept
764 0 0         if(! ($errorhandler->_validCui($concept)) ) {
765 0           $errorhandler->_error($pkg, $function, "Incorrect input value ($concept).", 6);
766             }
767            
768 0           my $tableFile = $cuifinder->_getTableFile();
769              
770             # check if verbose mode
771 0 0         if($option_verbose) {
772 0 0         open(TABLEFILE, ">$tableFile") || die "Could not open $tableFile";
773             }
774            
775             # get the children
776 0           my $children = $cuifinder->_getChildren($concept);
777            
778 0           my $subsumers; my $leafs; my $s = 0; my $l = 0;
  0            
  0            
779             # foreach of the children continue down the taxonomy
780 0           foreach my $child (@{$children}) {
  0            
781 0           my @array = ();
782 0           push @array, $concept;
783 0           my $path = \@array;
784 0           ($subsumers, $leafs) = $self->_depthFirstSearch($child, $d,$path,*TABLEFILE);
785 0           $s += keys %{$subsumers}; $l += keys %{$leafs};
  0            
  0            
  0            
786             }
787            
788             # get the database
789 0           my $sdb = $self->{'sdb'};
790 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
791            
792             # get the table name of the intrinsic index and insert the leaves and subsumers
793 0           my $intrinsicTableName = $cuifinder->_getIntrinsicTableName();
794 0           my $arrRef = $sdb->do("INSERT INTO $intrinsicTableName (CUI, LEAVES, SUBSUMERS) VALUES(\'$concept\', '$l', \'$s\')");
795 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
796              
797             # close the table file if in verbose mode
798 0 0         if($option_verbose) {
799 0           close TABLEFILE;
800            
801             # set the table file permissions
802 0           my $temp = chmod 0777, $tableFile;
803             }
804             }
805              
806             # This is like a reverse DFS only it is not recursive
807             # due to the stack overflow errors I received when it was
808             # input :
809             # output:
810             sub _getPathsToRootInRealtime {
811              
812 0     0     my $self = shift;
813 0           my $concept = shift;
814              
815 0 0 0       return () if(!defined $self || !ref $self);
816              
817 0           my $function = "_getPathsToRootInRealtime($concept)";
818 0           &_debug($function);
819            
820             # check self
821 0 0 0       if(!defined $self || !ref $self) {
822 0           $errorhandler->_error($pkg, $function, "", 2);
823             }
824              
825             # check concept was obtained
826 0 0         if(!$concept) {
827 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
828             }
829            
830             # check if valid concept
831 0 0         if(! ($errorhandler->_validCui($concept)) ) {
832 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
833             }
834              
835             # set the storage
836 0           my @path_storage = ();
837              
838             # set the stack
839 0           my @stack = ();
840 0           push @stack, $concept;
841              
842             # set the count
843 0           my %visited = ();
844              
845             # set the paths
846 0           my @paths = ();
847 0           my @empty = ();
848 0           push @paths, \@empty;
849              
850             # now loop through the stack
851 0           while($#stack >= 0) {
852            
853 0           my $concept = $stack[$#stack];
854 0           my $path = $paths[$#paths];
855              
856             # set up the new path
857 0           my @intermediate = @{$path};
  0            
858 0           push @intermediate, $concept;
859 0           my $series = join " ", @intermediate;
860            
861             # check that the concept is not one of the forbidden concepts
862 0 0         if($cuifinder->_forbiddenConcept($concept)) {
863 0           pop @stack; pop @paths;
  0            
864 0           next;
865             }
866              
867             # check if concept has been visited already
868 0 0         if(exists $visited{$series}) {
869 0           pop @stack; pop @paths;
  0            
870 0           next;
871             }
872 0           else { $visited{$series}++; }
873            
874             # print information into the file if debugpath option is set
875 0 0         if($option_debugpath) {
876 0           my $d = $#intermediate+1;
877 0           print DEBUG_FILE "$concept\t$d\t@intermediate\n";
878             }
879            
880             # if the concept is the umls root - we are done
881 0 0         if($concept eq $root) {
882             # this is a complete path to the root so push it on the paths
883 0           my @reversed = reverse(@intermediate);
884 0           my $rseries = join " ", @reversed;
885 0           push @path_storage, $rseries;
886 0           next;
887             }
888            
889             # get all the parents
890 0           my $parents = $cuifinder->_getParents($concept);
891            
892             # if there are no children we are finished with this concept
893 0 0         if($#{$parents} < 0) {
  0            
894 0           pop @stack; pop @paths;
  0            
895 0           next;
896             }
897              
898             # search through the children
899 0           my $stackflag = 0;
900 0           foreach my $parent (@{$parents}) {
  0            
901            
902             # check if concept is already in the path
903 0 0         if($series=~/$parent/) { next; }
  0            
904 0 0         if($concept eq $parent) { next; }
  0            
905              
906             # if it isn't continue on with the depth first search
907 0           push @stack, $parent;
908 0           push @paths, \@intermediate;
909 0           $stackflag++;
910             }
911            
912             # check to make certain there were actually children
913 0 0         if($stackflag == 0) {
914 0           pop @stack; pop @paths;
  0            
915             }
916             }
917              
918 0           return \@path_storage;
919             }
920              
921             # Depth First Search (DFS) recursive function to collect the path
922             # information and store it in the umlsinterfaceindex database
923             # input : $concept <- string containing the cui
924             # $depth <- depth of the cui
925             # $array <- reference to an array containing the path
926             # output: $concept <- string containing the cui
927             # $depth <- depth of the cui
928             # $array <- reference to an array containing the path
929             sub _depthFirstSearch {
930              
931 0     0     my $self = shift;
932 0           my $concept = shift;
933 0           my $d = shift;
934 0           my $array = shift;
935 0           local(*F) = shift;
936            
937 0           my $function = "_depthFirstSearch";
938            
939             # check self
940 0 0 0       if(!defined $self || !ref $self) {
941 0           $errorhandler->_error($pkg, $function, "", 2);
942             }
943              
944             # check the parameters are defined
945 0 0 0       if(!(defined $concept) || !(defined $d)) {
946 0           $errorhandler->_error($pkg, $function, "Error with input variables.", 4);
947             }
948            
949             # check if valid concept
950 0 0         if(! ($errorhandler->_validCui($concept)) ) {
951 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
952             }
953            
954             # check that the concept is not a forbidden concept
955 0 0         if($cuifinder->_forbiddenConcept($concept)) { return; }
  0            
956            
957             # get the database
958 0           my $sdb = $self->{'sdb'};
959 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
960            
961             # get the table name of the index
962 0           my $tableName = $cuifinder->_getTableName();
963            
964             # increment the depth
965 0           $d++;
966              
967             # set up the new path
968 0           my @path = @{$array};
  0            
969 0           push @path, $concept;
970 0           my $series = join " ", @path;
971            
972             # load path information into the table
973             # check if only a specified set of cui information is required
974 0 0         if($option_cuilist) {
975            
976             # check if it is in the cuilist - and if so insert it the cui
977 0 0         if($cuifinder->_inCuiList($concept)) {
978 0           my $arrRef = $sdb->do("INSERT INTO $tableName (CUI, DEPTH, PATH) VALUES(\'$concept\', '$d', \'$series\')");
979 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
980             }
981             }
982             # otherwise we are loading all of it
983             else {
984 0           my $arrRef = $sdb->do("INSERT INTO $tableName (CUI, DEPTH, PATH) VALUES(\'$concept\', '$d', \'$series\')");
985 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
986             }
987            
988             # print information into the file if verbose option is set
989 0 0         if($option_verbose) {
990 0 0         if($option_cuilist) {
991 0 0         if($cuifinder->_inCuiList($concept)) {
992 0           print F "$concept\t$d\t$series\n";
993             }
994             }
995 0           else { print F "$concept\t$d\t$series\n"; }
996             }
997            
998             # get all the children
999 0           my $children = $cuifinder->_getChildren($concept);
1000              
1001 0           my %totalLeaves = (); my %totalSubsumers = ();
  0            
1002             # search through the children
1003 0           foreach my $child (@{$children}) {
  0            
1004            
1005             # check if child cui has already in the path
1006 0 0         if($series=~/$child/) { next; }
  0            
1007 0 0         if($child eq $concept) { next; }
  0            
1008            
1009             # if it isn't continue on with the depth first search
1010 0           my ($subsumers, $leafs) = $self->_depthFirstSearch($child, $d, \@path,*F);
1011            
1012 0 0         if(defined $leafs) {
1013 0 0         if(%{$leafs}) {
  0            
1014 0           %totalLeaves = (%totalLeaves, %{$leafs});
  0            
1015             }
1016             }
1017 0 0         if(defined $subsumers) {
1018 0 0         if(%{$subsumers}) {
  0            
1019 0           %totalSubsumers = (%totalSubsumers, %{$subsumers});
  0            
1020             }
1021             }
1022             }
1023 0           my $l = keys %totalLeaves; my $s = keys %totalSubsumers;
  0            
1024            
1025             # get the table name of the intrinsic index and insert the leaves and subsumers
1026 0           my $intrinsicTableName = $cuifinder->_getIntrinsicTableName();
1027            
1028             # check if CUI is already there
1029 0           my $ex = $sdb->selectcol_arrayref("select count(*) from $intrinsicTableName where CUI=\'$concept\'");
1030 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
1031            
1032             # return the minimum depth
1033 0           my $count = shift @{$ex};
  0            
1034 0 0         if($count == 0) {
1035 0           my $arrRef = $sdb->do("INSERT INTO $intrinsicTableName (CUI, LEAVES, SUBSUMERS) VALUES(\'$concept\', '$l', \'$s\')");
1036 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
1037             }
1038              
1039 0 0         if($#{$children} < 0) { $totalLeaves{$concept}++; } $totalSubsumers{$concept}++;
  0            
  0            
  0            
1040              
1041 0           return (\%totalSubsumers, \%totalLeaves);
1042             }
1043              
1044             # function returns the minimum depth of a concept
1045             # input : $concept <- string containing the cui
1046             # output: $int <- string containing the depth of the cui
1047             sub _findMinimumDepth {
1048              
1049 0     0     my $self = shift;
1050 0           my $cui = shift;
1051              
1052 0           my $function = "_findMinimumDepth";
1053 0           &_debug($function);
1054              
1055             # check self
1056 0 0 0       if(!defined $self || !ref $self) {
1057 0           $errorhandler->_error($pkg, $function, "", 2);
1058             }
1059              
1060             # check concept was obtained
1061 0 0         if(!$cui) {
1062 0           $errorhandler->_error($pkg, $function, "Error with input variable \$cui.", 4);
1063             }
1064            
1065             # check if valid concept
1066 0 0         if(! ($errorhandler->_validCui($cui)) ) {
1067 0           $errorhandler->_error($pkg, $function, "Concept ($cui) in not valid.", 6);
1068             }
1069            
1070             # get the database
1071 0           my $sdb = $self->{'sdb'};
1072 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
1073            
1074             # if it is in the parent taxonomy
1075 0 0         if($cuifinder->_inParentTaxonomy($cui)) { return 1; }
  0            
1076            
1077 0           my $min = 0;
1078 0 0         if($option_realtime) {
1079 0           $min = $self->_findMinimumDepthInRealTime($cui);
1080             }
1081             else {
1082            
1083             # set the depth
1084 0           $self->_setIndex();
1085              
1086             # get the table name
1087 0           my $tableName = $cuifinder->_getTableName();
1088              
1089             # get the minimum depth from the table
1090 0           my $d = $sdb->selectcol_arrayref("select min(DEPTH) from $tableName where CUI=\'$cui\'");
1091 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
1092            
1093             # return the minimum depth
1094 0           $min = shift @{$d}; $min++;
  0            
  0            
1095             }
1096            
1097 0           return $min;
1098             }
1099              
1100             # function returns maximum depth of a concept
1101             # input : $concept <- string containing the cui
1102             # output: $int <- string containing the depth of the cui
1103             sub _findMaximumDepth {
1104              
1105 0     0     my $self = shift;
1106 0           my $cui = shift;
1107              
1108 0           my $function = "_findMaximumDepth";
1109 0           &_debug($function);
1110            
1111             # check self
1112 0 0 0       if(!defined $self || !ref $self) {
1113 0           $errorhandler->_error($pkg, $function, "", 2);
1114             }
1115              
1116             # check concept was obtained
1117 0 0         if(!$cui) {
1118 0           $errorhandler->_error($pkg, $function, "Error with input variable \$cui.", 4);
1119             }
1120            
1121             # check if valid concept
1122 0 0         if(! ($errorhandler->_validCui($cui)) ) {
1123 0           $errorhandler->_error($pkg, $function, "Concept ($cui) in not valid.", 6);
1124             }
1125            
1126             # get the database
1127 0           my $sdb = $self->{'sdb'};
1128 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
1129            
1130             # initialize max
1131 0           my $max = 0;
1132             # if realtime option is set
1133 0 0         if($option_realtime) {
1134             # get the info table name
1135 0           my $infoTableName = $cuifinder->_getInfoTableName();
1136            
1137             # set the index DB handler
1138 0           my $sdb = $self->{'sdb'};
1139 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
1140              
1141             # get maximum depth from the info table
1142 0           my $arrRef = $sdb->selectcol_arrayref("select INFO from $infoTableName where ITEM=\'$cui\'");
1143 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
1144            
1145             # get the depth from the array
1146 0           my $depth = shift @{$arrRef};
  0            
1147              
1148 0 0         if(defined $depth) {
1149 0           $max = $depth;
1150             }
1151             else {
1152             # get the maximum depth
1153 0           $max = $self->_findMaximumDepthInRealTime($cui);
1154            
1155             # insert it in the info table - this is caching over multipe runs of
1156             # the program. I don't really like this solution but until I can
1157             # figure out how to speed up findMaximumDepthInRealTime then
1158             # this is going to have to do.
1159 0           $sdb->do("INSERT INTO $infoTableName (ITEM, INFO) VALUES ('$cui', '$max')");
1160 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
1161              
1162             }
1163             }
1164            
1165             # otherwise
1166             else {
1167             # set the depth
1168 0           $self->_setIndex();
1169            
1170             # get the table name
1171 0           my $tableName = $cuifinder->_getTableName();
1172            
1173             # get the depth from the table
1174 0           my $d = $sdb->selectcol_arrayref("select max(DEPTH) from $tableName where CUI=\'$cui\'");
1175 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
1176              
1177 0           $max = shift @{$d}; $max++;
  0            
  0            
1178             }
1179              
1180             # return the maximum depth
1181 0           return $max;
1182             }
1183              
1184             # find the shortest path between two concepts
1185             # input : $concept1 <- string containing the first cui
1186             # $concept2 <- string containing the second
1187             # output: $array <- reference to an array containing the shortest path(s)
1188             sub _findShortestPath {
1189              
1190 0     0     my $self = shift;
1191 0           my $concept1 = shift;
1192 0           my $concept2 = shift;
1193              
1194 0           my $function = "_findShortestPath";
1195 0           &_debug($function);
1196            
1197             # check self
1198 0 0 0       if(!defined $self || !ref $self) {
1199 0           $errorhandler->_error($pkg, $function, "", 2);
1200             }
1201              
1202             # check parameter exists
1203 0 0         if(!defined $concept1) {
1204 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
1205             }
1206 0 0         if(!defined $concept2) {
1207 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
1208             }
1209              
1210             # check if valid concept
1211 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
1212 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
1213             }
1214 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
1215 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
1216             }
1217              
1218             # if realtime option is set find the shortest path in realtime
1219 0 0         if($option_realtime) {
1220 0           return $self->_findShortestPathInRealTime($concept1, $concept2);
1221             }
1222             else {
1223 0           return $self->_findShortestPathThroughLCS($concept1, $concept2);
1224             }
1225             }
1226              
1227              
1228             # this function returns the shortest path between two concepts
1229             # input : $concept1 <- string containing the first cui
1230             # $concept2 <- string containing the second
1231             # output: $array <- reference to an array containing the lcs(es)
1232             sub _findShortestPathThroughLCS {
1233            
1234 0     0     my $self = shift;
1235 0           my $concept1 = shift;
1236 0           my $concept2 = shift;
1237            
1238 0           my $function = "_findShortestPathThroughLCS";
1239 0           &_debug($function);
1240            
1241             # check self
1242 0 0 0       if(!defined $self || !ref $self) {
1243 0           $errorhandler->_error($pkg, $function, "", 2);
1244             }
1245              
1246             # check parameter exists
1247 0 0         if(!defined $concept1) {
1248 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
1249             }
1250 0 0         if(!defined $concept2) {
1251 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
1252             }
1253              
1254             # check if valid concept
1255 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
1256 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
1257             }
1258 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
1259 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
1260             }
1261              
1262             # find the shortest path(s) and lcs - there may be more than one
1263 0           my $hash = $self->_shortestPath($concept1, $concept2);
1264            
1265             # remove the blanks from the paths
1266 0           my @paths = (); my $output = "";
  0            
1267 0           foreach my $path (sort keys %{$hash}) {
  0            
1268 0 0         if($path=~/C[0-9]+/) {
1269 0           push @paths, $path;
1270             }
1271             }
1272            
1273             # return the shortest paths (all of them)
1274 0           return \@paths;
1275             }
1276              
1277              
1278             # this function returns the least common subsummer between two concepts
1279             # input : $concept1 <- string containing the first cui
1280             # $concept2 <- string containing the second
1281             # output: $array <- reference to an array containing the lcs(es)
1282             sub _findLeastCommonSubsumer {
1283              
1284 0     0     my $self = shift;
1285 0           my $concept1 = shift;
1286 0           my $concept2 = shift;
1287            
1288 0           my $function = "_findLeastCommonSubsumer";
1289 0           &_debug($function);
1290              
1291             # check self
1292 0 0 0       if(!defined $self || !ref $self) {
1293 0           $errorhandler->_error($pkg, $function, "", 2);
1294             }
1295              
1296             # check parameter exists
1297 0 0         if(!defined $concept1) {
1298 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
1299             }
1300 0 0         if(!defined $concept2) {
1301 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
1302             }
1303              
1304             # check if valid concept
1305 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
1306 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
1307             }
1308 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
1309 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
1310             }
1311            
1312             # get the relations from the configuration file
1313 0           my $configrel = $cuifinder->_getRelString();
1314 0           $configrel=~/(REL) (\:\:) (include|exclude) (.*?)$/;
1315 0           my $relationstring = $4;
1316              
1317             # check to make certain the configuration file only contains
1318             # heirarchical relations (PAR/CHD or RB/RN).
1319 0           my @relations = split/\s*\,\s*/, $relationstring;
1320 0           foreach my $rel (@relations) {
1321 0 0         if(! ($rel=~/(PAR|CHD|RB|RN)/) ) {
1322 0           $errorhandler->_error($pkg, $function, "Method only supports heirarhical relations (PAR/CHD or RB/RN).", 10);
1323             }
1324             }
1325              
1326             # get the LCSes
1327 0 0         if($option_realtime) {
1328 0           return $self->_findLeastCommonSubsumerInRealTime($concept1, $concept2);
1329             }
1330             else {
1331              
1332             # initialize the array that will contain the lcses
1333 0           my @lcses = ();
1334            
1335             # get the lcs using the index
1336 0           my $hash = $self->_shortestPath($concept1, $concept2);
1337 0 0         if($debug) { print STDERR "done with _shortestPath\n"; }
  0            
1338 0           my %lcshash = ();
1339 0 0         if(defined $hash) {
1340 0           foreach my $path (sort keys %{$hash}) {
  0            
1341 0           my $c = ${$hash}{$path};
  0            
1342 0 0         if($c=~/C[0-9]+/) { $lcshash{$c}++; }
  0            
1343             }
1344             }
1345 0           foreach my $lcs (sort keys %lcshash) { push @lcses, $lcs; }
  0            
1346            
1347             # return the lcses
1348 0           return \@lcses;
1349             }
1350             }
1351              
1352             # this function returns the least common subsummer between two concepts
1353             # input : $concept1 <- string containing the first cui
1354             # $concept2 <- string containing the second
1355             # output: $array <- reference to an array containing the lcs(es)
1356             sub _findLeastCommonSubsumerInRealTime {
1357              
1358 0     0     my $self = shift;
1359 0           my $concept1 = shift;
1360 0           my $concept2 = shift;
1361            
1362 0           my $function = "_findLeastCommonSubsumerInRealTime";
1363 0           &_debug($function);
1364              
1365             # check self
1366 0 0 0       if(!defined $self || !ref $self) {
1367 0           $errorhandler->_error($pkg, $function, "", 2);
1368             }
1369              
1370             # check parameter exists
1371 0 0         if(!defined $concept1) {
1372 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
1373             }
1374 0 0         if(!defined $concept2) {
1375 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
1376             }
1377              
1378             # check if valid concept
1379 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
1380 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
1381             }
1382 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
1383 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
1384             }
1385            
1386             # get the shorest paths
1387 0           my $paths = $self->_findShortestPathInRealTime($concept1, $concept2);
1388            
1389             # get the child relations
1390 0           my $childstring = $cuifinder->_getChildRelations();
1391              
1392             # initialize the lcses array
1393 0           my %lcses = ();
1394            
1395             # check for the lcs in each of the paths
1396 0           foreach my $p (@{$paths}) {
  0            
1397             # get the path and the first concept
1398 0           my @path = split/\s+/, $p;
1399 0           my $concept1 = shift @path;
1400 0           my $flag = 0;
1401 0           my $counter = 0;
1402 0           my $children = 0;
1403 0           my $parent = 0;
1404 0           my @lcsarray = ();
1405            
1406 0           my $firstconcept = $concept1;
1407              
1408             # loop through the rest of the concepts looking for the first child relation
1409 0           foreach my $concept2 (@path) {
1410 0           my $relations = $cuifinder->_getRelationsBetweenCuis($concept1, $concept2);
1411 0           foreach my $item (@{$relations}) {
  0            
1412 0           $item=~/([A-Z]+) \([A-Z0-9\.]+\)/;
1413 0           my $rel = $1;
1414              
1415             # if the relation is a child we have the LCS - it is concept1
1416             # this is for the typical case
1417 0 0 0       if($childstring=~/($rel)/ && $flag == 0) {
1418 0           push @lcsarray, $concept1; $flag++;
  0            
1419             }
1420            
1421 0 0         if($childstring=~/($rel)/) { $children++; }
  0            
1422 0           else { $parent++; }
1423 0           $counter++;
1424             }
1425 0           $concept1 = $concept2;
1426             }
1427            
1428             # string of children
1429 0 0         if($counter == $children) { $lcses{$firstconcept}++; }
  0 0          
1430             # string of parents
1431 0           elsif($counter == $parent) { $lcses{$concept1}++; }
1432             # typical case
1433 0           else { foreach my $l (@lcsarray) { $lcses{$l}++; } }
  0            
1434             }
1435            
1436             # get the unique lcses - note a single lcs may have more than one path
1437 0           my @unique = ();
1438 0           foreach my $lcs (sort keys %lcses) { push @unique, $lcs; }
  0            
1439              
1440             # return the unique lcses
1441 0           return \@unique;
1442             }
1443              
1444             # method to get the Least Common Subsumer of two
1445             # paths to the root of a taxonomy
1446             # input : $array1 <- reference to an array containing
1447             # the paths to the root for cui1
1448             # $array2 <- same thing for cui2
1449             # output: $hash <- reference to a hash containing the
1450             # lcs as the key and the path as the hash
1451             sub _getLCSfromTrees {
1452              
1453 0     0     my $self = shift;
1454 0           my $arrayref1 = shift;
1455 0           my $arrayref2 = shift;
1456            
1457 0           my $function = "_getLCSfromTrees";
1458              
1459             # check self
1460 0 0 0       if(!defined $self || !ref $self) {
1461 0           $errorhandler->_error($pkg, $function, "", 2);
1462             }
1463              
1464             # check parameter exists
1465 0 0         if(!defined $arrayref1) {
1466 0           $errorhandler->_error($pkg, $function, "Error with input variable \$arrayref1.", 4);
1467             }
1468 0 0         if(!defined $arrayref2) {
1469 0           $errorhandler->_error($pkg, $function, "Error with input variable \$arrayref2.", 4);
1470             }
1471              
1472             # get the arrays
1473 0           my @array1 = split/\s+/, $arrayref1;
1474 0           my @array2 = split/\s+/, $arrayref2;
1475              
1476             # reverse them
1477 0           my @tree1 = reverse @array1;
1478 0           my @tree2 = reverse @array2;
1479 0           my $tmpString = " ".join(" ", @tree2)." ";
1480              
1481             # find the lcs
1482 0           foreach my $element (@tree1) {
1483 0 0         if($tmpString =~ / $element /) {
1484 0           return $element;
1485             }
1486             }
1487            
1488 0           return undef;
1489             }
1490              
1491             # method to find the shortest path between two concepts in realtime
1492             # input : $concept1 <- first concept
1493             # $concept2 <- second concept
1494             # output: $array <- reference to an array containing the shortest paths
1495             sub _findShortestPathInRealTime {
1496            
1497 0     0     my $self = shift;
1498 0           my $concept1 = shift;
1499 0           my $concept2 = shift;
1500            
1501 0           my $function = "_findShortestPathInRealTime";
1502 0           &_debug($function);
1503            
1504             # check self
1505 0 0 0       if(!defined $self || !ref $self) {
1506 0           $errorhandler->_error($pkg, $function, "", 2);
1507             }
1508              
1509             # check parameter exists
1510 0 0         if(!defined $concept1) {
1511 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
1512             }
1513 0 0         if(!defined $concept2) {
1514 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
1515             }
1516              
1517             # check if valid concept
1518 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
1519 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
1520             }
1521 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
1522 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
1523             }
1524              
1525             # get the length of the shortest path
1526 0           my $length = $self->_findShortestPathLengthInRealTime($concept1, $concept2);
1527            
1528             # initialize the paths array that will be returned
1529 0           my @paths = ();
1530            
1531             # if the length is two then the cuis are related in some way
1532             # so just return them
1533 0 0         if($length == 2) {
1534 0           push @paths, "$concept1 $concept2";
1535             }
1536             else {
1537              
1538             # set split to get the beginning paths
1539 0           my $split1 = int($length/2);
1540              
1541             # we need the cui itself so, if the split is zero setting the
1542             # split to one will just return the cuis
1543 0 0         if($split1 == 0) { $split1 = 1; }
  0            
1544            
1545             # set split to get the last set of paths
1546 0           my $split2 = $length - $split1 -1;
1547            
1548             # initial the hash to hold the ends
1549 0           my %ends = ();
1550            
1551             # get all the paths from concept1 of length split1
1552 0           my @paths1 = $self->_findPathsToCenter($concept1, $split1, 1, \%ends );
1553            
1554 0           my $endkey = keys %ends;
1555              
1556             # get all the paths from concept2 of length split2
1557 0           my @paths2 = $self->_findPathsToCenter($concept2, $split2, 2, \%ends );
1558              
1559             # join the two sets of paths to find all of the full paths
1560 0           @paths = $self->_joinPathsToCenter(\@paths1, \@paths2);
1561             }
1562              
1563            
1564 0           return \@paths;
1565             }
1566              
1567             # method that takes two partial paths nad joins them
1568             # input : $array1 <- reference to paths for first concept
1569             # $arrat2 <- reference to paths for second concept
1570             # output: @paths <- array containing the combined paths
1571             sub _joinPathsToCenter {
1572 0     0     my $self = shift;
1573 0           my $paths1 = shift;
1574 0           my $paths2 = shift;
1575              
1576 0           my $function = "_joinPathsToCenter";
1577 0           &_debug($function);
1578            
1579             # check self
1580 0 0 0       if(!defined $self || !ref $self) {
1581 0           $errorhandler->_error($pkg, $function, "", 2);
1582             }
1583              
1584             # check parameter exists
1585 0 0         if(!defined $paths1) {
1586 0           $errorhandler->_error($pkg, $function, "Error with input variable \$paths1.", 4);
1587             }
1588 0 0         if(!defined $paths2) {
1589 0           $errorhandler->_error($pkg, $function, "Error with input variable \$paths2.", 4);
1590             }
1591              
1592 0           my $childstring = $cuifinder->_getChildRelations();
1593 0           my $parentstring = $cuifinder->_getParentRelations();
1594            
1595 0           my @shortestpaths = ();
1596 0           foreach my $p1 (@{$paths1}) {
  0            
1597            
1598             # get the path to the center, the center and the number
1599             # of direction changes that existed in the path
1600 0           my @array1 = split/\s+/, $p1;
1601 0           my $dchange1 = pop @array1;
1602 0           my $c1 = pop @array1;
1603            
1604 0           foreach my $p2 (@{$paths2}) {
  0            
1605              
1606             # now get the paths to the center coming from the other direction,
1607             # its direction changes and the center
1608 0           my @array2 = split/\s+/, $p2;
1609 0           my $dchange2 = pop @array2;
1610 0           my $c2 = $array2[$#array2];
1611            
1612             # if the two centers are equal we have path
1613 0 0         if($c1 eq $c2) {
1614            
1615             # if undirected make certain that their is at
1616             # most one direction change
1617 0 0         if(!($option_undirected)) {
1618              
1619             # check on basic direction changes
1620 0           my $totalchanges = $dchange1 + $dchange2;
1621 0 0         if($totalchanges > 1) { next; }
  0            
1622 0 0 0       if($dchange1 > 0 && $dchange2 > 0) { next; }
  0            
1623              
1624             # set the path
1625 0           my @rarray2 = reverse @array2;
1626 0           my @path = (@array1, @rarray2);
1627            
1628             # check for complicated embedded direction changes
1629 0           my $direction = 0; my $previous = ""; my $cflag = 0;
  0            
  0            
1630 0           for my $i (0..($#path-1)) {
1631 0           my $cc1 = $path[$i];
1632 0           my $cc2 = $path[$i+1];
1633              
1634             # get the relationships the concepts
1635 0           my $ccr = $cuifinder->_getRelationsBetweenCuis($cc1, $cc2);
1636              
1637             # determine whether that relation is a
1638             # parent or a child relation
1639 0           my $pr = 0; my $cr = 0;
  0            
1640 0           foreach my $item (@{$ccr}) {
  0            
1641 0           $item=~/([A-Z]+) \([A-Za-z0-9\.]+\)/;
1642 0           my $rel = $1;
1643 0 0         if($childstring=~/($rel)/) { $cr++; }
  0            
1644 0 0         if($parentstring=~/($rel)/) { $pr++; }
  0            
1645              
1646              
1647             # determine that the first relationship in the
1648             # in the path is not a child relation
1649 0 0         if($i == 0) {
1650 0 0         if($childstring=~/($rel)/) { $cflag = 1; }
  0            
1651             }
1652             }
1653            
1654             # sometimes there are two directions
1655 0 0 0       if($cr > 0 && $pr > 0) {
1656             # if this is the case we are just going to move on
1657             # and not worry about right now. There isn't a
1658             # loop exactly
1659             }
1660             else {
1661             # determine if there has been a direction change
1662 0 0         if($previous ne "") {
1663 0 0 0       if( ($previous eq "CHD") && ($pr > 0)) { $direction++; }
  0            
1664 0 0 0       if( ($previous eq "PAR") && ($cr > 0)) { $direction++; }
  0            
1665             }
1666              
1667             # set the previous relation
1668 0 0         if($pr > 0){ $previous = "PAR"; }
  0            
1669 0 0         if($cr > 0){ $previous = "CHD"; }
  0            
1670             }
1671             }
1672              
1673             # if the path starts out with a child relation and then
1674             # moves to a parent we don't want it
1675 0 0 0       if($cflag == 1 && $direction >= 1) { next; }
  0            
1676              
1677             # if there is more than a single direction change
1678             # we don't want the path
1679 0 0         if($direction > 1) { next; }
  0            
1680              
1681             # add the path to the list of shortest paths
1682 0           my $string = join " ", @path;
1683 0           push @shortestpaths, $string;
1684             }
1685             else {
1686              
1687             # we have one or less changes if the undirectoption
1688             # was not set so we can add the path to the shortest
1689             # path array
1690 0           my @rarray2 = reverse @array2;
1691 0           my @path = (@array1, @rarray2);
1692 0           my $string = join " ", @path;
1693              
1694 0           push @shortestpaths, $string;
1695             }
1696             }
1697             }
1698             }
1699            
1700 0           return @shortestpaths;
1701             }
1702            
1703              
1704             # method that finds all the paths from a concept of a specified length
1705             # input : $start <- the concept
1706             # $length <- the length of the path
1707             # output: @paths <- array containing the paths
1708             sub _findPathsToCenter {
1709              
1710 0     0     my $self = shift;
1711 0           my $start = shift;
1712 0           my $length = shift;
1713 0           my $flag = shift;
1714 0           my $ends = shift;
1715            
1716 0           my $function = "_findPathsToCenter";
1717 0           &_debug($function);
1718            
1719             # check self
1720 0 0 0       if(!defined $self || !ref $self) {
1721 0           $errorhandler->_error($pkg, $function, "", 2);
1722             }
1723            
1724             # check parameter exists
1725 0 0         if(!defined $start) {
1726 0           $errorhandler->_error($pkg, $function, "Error with input variable \$start.", 4);
1727             }
1728            
1729             # check if valid concept
1730 0 0         if(! ($errorhandler->_validCui($start)) ) {
1731 0           $errorhandler->_error($pkg, $function, "Concept ($start) in not valid.", 6);
1732             }
1733            
1734             # set the storage
1735 0           my @path_storage= ();
1736              
1737             # set the count
1738 0           my %visited = ();
1739            
1740             # set the stack with the parents because
1741             # we want to start going up inorder to
1742             # have an LCS
1743 0           my @directions = ();
1744 0           my @relations = ();
1745 0           my @paths = ();
1746            
1747 0           my $parentstack = $cuifinder->_getParents($start);
1748 0           foreach my $element (@{$parentstack}) {
  0            
1749 0           my @array = ();
1750 0           push @paths, \@array;
1751 0           push @directions, 0;
1752 0           push @relations, "PAR";
1753             }
1754            
1755 0           my $childrenstack = $cuifinder->_getChildren($start);
1756 0           my @stack = (@{$parentstack}, @{$childrenstack});
  0            
  0            
1757 0           foreach my $element (@{$childrenstack}) {
  0            
1758 0           my @array = ();
1759 0           push @paths, \@array;
1760 0           push @directions, 0;
1761 0           push @relations, "CHD";
1762             }
1763            
1764             # now loop through the stack
1765 0           while($#stack >= 0) {
1766            
1767 0           my $concept = pop @stack;
1768 0           my $path = pop @paths;
1769 0           my $direction = pop @directions;
1770 0           my $relation = pop @relations;
1771            
1772              
1773             # set up the new path
1774 0           my @intermediate = @{$path};
  0            
1775 0           my $series = join " ", @intermediate;
1776 0           push @intermediate, $concept;
1777 0           my $distance = $#intermediate + 1;
1778              
1779             # check if the distance is greater than what we
1780             # already have - if so we are done
1781 0 0         if($distance > $length) {
1782 0           @stack = ();
1783 0           next;
1784             }
1785              
1786             # check that the concept is not one of the forbidden concepts
1787 0 0         if($cuifinder->_forbiddenConcept($concept)) { next; }
  0            
1788              
1789             # check if concept has been visited already through that path
1790 0           my $v = "$concept : $series";
1791 0 0         if(exists $visited{$v}) { next; }
  0            
1792 0           else { $visited{$v}++; }
1793              
1794             # check if we have a path of approrpiate length
1795             # if so add it to the storage
1796 0 0         if($distance == $length) {
1797 0           my $element = $intermediate[$#intermediate];
1798            
1799 0           push @intermediate, $direction;
1800              
1801 0 0         if($flag == 1) {
    0          
1802 0           ${$ends}{$element}++;
  0            
1803 0           push @path_storage, \@intermediate;
1804             }
1805             elsif($flag == 2) {
1806 0 0         if(exists ${$ends}{$element}) {
  0            
1807 0           push @path_storage, \@intermediate;
1808             }
1809            
1810             }
1811 0           next;
1812             }
1813            
1814             # print information into the file if debugpath option is set
1815 0 0         if($option_debugpath) {
1816 0           my $d = $#intermediate+1;
1817 0           print DEBUG_FILE "$concept\t$d\t@intermediate\n";
1818             }
1819            
1820              
1821             # we are going to start with the parents here; the code
1822             # for both is similar except for the relation/direction
1823             # which is why I have the seperate right now - currently
1824            
1825             # if the previous direction was a child we have a change in direction
1826 0           my $dchange = $direction;
1827            
1828             # if the undirected option is set the dchange doesn't matter
1829             # otherwise we need to check
1830 0 0         if(!$option_undirected) {
1831 0 0         if($relation eq "CHD") { $dchange = $direction + 1; }
  0            
1832             }
1833              
1834             # if we have not had more than a single direction change
1835 0 0         if($dchange < 2) {
1836             # search through the parents
1837 0           my $parents = $cuifinder->_getParents($concept);
1838 0           foreach my $parent (@{$parents}) {
  0            
1839            
1840             # check if concept is already in the path
1841 0 0         if($series=~/$parent/) { next; }
  0            
1842 0 0         if($parent eq $concept) { next; }
  0            
1843            
1844             # if it isn't add it to the stack
1845 0           unshift @stack, $parent;
1846 0           unshift @paths, \@intermediate;
1847 0           unshift @relations, "PAR";
1848 0           unshift @directions, $dchange;
1849             }
1850             }
1851            
1852             # now with the chilcren if the previous direction was a parent we have
1853             # have to change the direction
1854 0           $dchange = $direction;
1855             # if the undirected option is set the dchange doesn't matter
1856             # otherwise we need to check
1857 0 0         if(!$option_undirected) {
1858 0 0         if($relation eq "PAR") { $dchange = $direction + 1; }
  0            
1859             }
1860              
1861             # if we have not had more than a single direction change
1862 0 0         if($dchange < 2) {
1863             # now search through the children
1864 0           my $children = $cuifinder->_getChildren($concept);
1865 0           foreach my $child (@{$children}) {
  0            
1866            
1867             # check if child cui has already in the path
1868 0 0         if($series=~/$child/) { next; }
  0            
1869 0 0         if($child eq $concept) { next; }
  0            
1870              
1871             # if it isn't add it to the stack
1872 0           unshift @stack, $child;
1873 0           unshift @paths, \@intermediate;
1874 0           unshift @relations, "CHD";
1875 0           unshift @directions, $dchange;
1876             }
1877             }
1878             }
1879             # set the return
1880 0           my @return_paths = ();
1881 0           foreach my $p (@path_storage) {
1882 0           unshift @{$p}, $start;
  0            
1883 0           my $string = join " " , @{$p};
  0            
1884 0           push @return_paths, $string;
1885             }
1886              
1887 0           return @return_paths;
1888             }
1889            
1890              
1891             # method that finds the minimum depth
1892             # input : $concept <- the first concept
1893             # output: $int <- the minimum depth
1894             sub _findMinimumDepthInRealTime {
1895              
1896 0     0     my $self = shift;
1897 0           my $concept = shift;
1898            
1899 0           my $function = "_findMinimumDepthInRealTime";
1900 0           &_debug($function);
1901            
1902             # check self
1903 0 0 0       if(!defined $self || !ref $self) {
1904 0           $errorhandler->_error($pkg, $function, "", 2);
1905             }
1906              
1907             # check parameter exists
1908 0 0         if(!defined $concept) {
1909 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
1910             }
1911             # check if valid concept
1912 0 0         if(! ($errorhandler->_validCui($concept)) ) {
1913 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
1914             }
1915            
1916             # set the count
1917 0           my %visited = ();
1918            
1919             # set the stack with the roots children
1920 0           my @paths = ();
1921 0           my $rstack = $cuifinder->_getChildren($root);
1922 0           my @stack = @{$rstack};
  0            
1923              
1924 0           foreach my $element (@stack) {
1925 0           my @array = ();
1926 0           push @paths, \@array;
1927             }
1928            
1929             # now loop through the stack
1930 0           while($#stack >= 0) {
1931            
1932 0           my $cui = pop @stack;
1933 0           my $path = pop @paths;
1934            
1935             # set up the new path
1936 0           my @intermediate = @{$path};
  0            
1937 0           my $series = join " ", @intermediate;
1938 0           push @intermediate, $cui;
1939 0           my $distance = $#intermediate;
1940              
1941             # check that the concept is not one of the forbidden concepts
1942 0 0         if($cuifinder->_forbiddenConcept($cui)) { next; }
  0            
1943              
1944             # check if concept has been visited already through that path
1945 0 0         if(exists $visited{$cui}) { next; }
  0            
1946 0           else { $visited{$cui}++; }
1947            
1948             # check if it is our concept2
1949 0 0         if($cui eq $concept) {
1950 0           my $path_length = $distance + 2;
1951 0           return $path_length;
1952             }
1953            
1954             # now search through the children
1955 0           my $children = $cuifinder->_getChildren($cui);
1956 0           foreach my $child (@{$children}) {
  0            
1957             # check if child cui has already in the path
1958 0 0         if($series=~/$child/) { next; }
  0            
1959 0 0         if($child eq $cui) { next; }
  0            
1960              
1961             # if it isn't add it to the stack
1962 0           unshift @stack, $child;
1963 0           unshift @paths, \@intermediate;
1964             }
1965             }
1966             # no path was found return -1
1967 0           return -1;
1968             }
1969              
1970              
1971             # method that finds the maximum depth
1972             # input : $concept <- the first concept
1973             # output: $int <- the minimum depth
1974             sub _findMaximumDepthInRealTime {
1975              
1976 0     0     my $self = shift;
1977 0           my $concept = shift;
1978              
1979 0 0 0       return () if(!defined $self || !ref $self);
1980              
1981 0           my $function = "_findMaximumDepthInRealtime($concept)";
1982 0           &_debug($function);
1983            
1984             # set the storage
1985 0           my $maximum_path_length = -1;
1986              
1987             # set the stack
1988 0           my @stack = ();
1989 0           push @stack, $concept;
1990              
1991             # set the count
1992 0           my %visited = ();
1993              
1994             # set the paths
1995 0           my @paths = ();
1996 0           my @empty = ();
1997 0           push @paths, \@empty;
1998              
1999             # now loop through the stack
2000 0           while($#stack >= 0) {
2001            
2002 0           my $cui = $stack[$#stack];
2003 0           my $path = $paths[$#paths];
2004              
2005             # set up the new path
2006 0           my @intermediate = @{$path};
  0            
2007 0           my $series = join " ", @intermediate;
2008 0           push @intermediate, $cui;
2009            
2010             # print information into the file if debugpath option is set
2011 0 0         if($option_debugpath) {
2012 0           my $d = $#intermediate+1;
2013 0           print DEBUG_FILE "$cui\t$d\t@intermediate\n";
2014             }
2015            
2016             # check that the cui is not one of the forbidden concepts
2017 0 0         if($cuifinder->_forbiddenConcept($cui)) {
2018 0           pop @stack; pop @paths;
  0            
2019 0           next;
2020             }
2021              
2022             # check if concept has been visited already
2023 0 0         if(exists $visited{$cui}{$series}) {
2024 0           pop @stack; pop @paths;
  0            
2025 0           next;
2026             }
2027 0           else { $visited{$cui}{$series}++; }
2028            
2029             # if the concept is the umls root - we are done
2030 0 0         if($cui eq $root) {
2031 0           my $length = $#intermediate + 1;
2032 0 0         if($length > $maximum_path_length) {
2033 0           $maximum_path_length = $length;
2034             }
2035 0           next;
2036             }
2037            
2038             # get all the parents
2039 0           my $parents = $cuifinder->_getParents($cui);
2040              
2041             # if there are no children we are finished with this concept
2042 0 0         if($#{$parents} < 0) {
  0            
2043 0           pop @stack; pop @paths;
  0            
2044 0           next;
2045             }
2046              
2047             # search through the children
2048 0           my $stackflag = 0;
2049 0           foreach my $parent (@{$parents}) {
  0            
2050            
2051             # check if concept has already in the path
2052 0 0         if($series=~/$parent/) { next; }
  0            
2053 0 0         if($cui eq $parent) { next; }
  0            
2054              
2055             # if it isn't continue on with the depth first search
2056 0           push @stack, $parent;
2057 0           push @paths, \@intermediate;
2058 0           $stackflag++;
2059             }
2060            
2061             # check to make certain there were actually children
2062 0 0         if($stackflag == 0) { pop @stack; pop @paths; }
  0            
  0            
2063             }
2064              
2065 0           return $maximum_path_length;
2066             }
2067              
2068             # method that finds all the ancestors of a given concept
2069             # input : $concept1 <- the concept
2070             # output: %ancestors <- hash containing the ancestors
2071             sub _findAncestors {
2072 0     0     my $self = shift;
2073 0           my $concept = shift;
2074            
2075 0           my $function = "_findAncestors";
2076 0           &_debug($function);
2077            
2078             # check self
2079 0 0 0       if(!defined $self || !ref $self) {
2080 0           $errorhandler->_error($pkg, $function, "", 2);
2081             }
2082              
2083 0           my @paths = $self->_pathsToRoot($concept);
2084            
2085 0           my %ancestors = ();
2086 0           foreach my $path (@paths) {
2087 0           foreach my $string (@{$path}) {
  0            
2088 0           my @cuis = split/\s+/, $string;
2089 0           foreach my $cui (@cuis) {
2090 0           $ancestors{$cui} = 1;
2091             }
2092             }
2093             }
2094            
2095 0           return \%ancestors;
2096              
2097             }
2098             # method that finds the closeness centrality of a concept
2099             # input : $concept1 <- the concept
2100             # output: $double <- the closeness
2101             sub _findClosenessCentrality {
2102 0     0     my $self = shift;
2103 0           my $concept = shift;
2104            
2105 0           my $function = "_findClosenessCentrality";
2106 0           &_debug($function);
2107            
2108             # check self
2109 0 0 0       if(!defined $self || !ref $self) {
2110 0           $errorhandler->_error($pkg, $function, "", 2);
2111             }
2112              
2113             # get the cuis associated with the config file
2114 0           my $hashref= $cuifinder->_getCuiList();
2115              
2116             # calculate the length of the shortest path for each cui
2117 0           my $sum = 0;
2118 0           foreach my $cui (sort keys %{$hashref}) {
  0            
2119 0 0         if($cui eq $concept) { next; }
  0            
2120 0           my $d = $self->_findShortestPathLength($concept, $cui);
2121 0 0         if($d > 0) {
2122 0           $sum += $d;
2123             }
2124             }
2125            
2126             # return closeness
2127 0           return (1/$sum);
2128             }
2129              
2130             # method that finds the length of the shortest path
2131             # input : $concept1 <- the first concept
2132             # $concept2 <- the second concept
2133             # output: $int <- the length of the shortest path between them
2134             sub _findShortestPathLength {
2135              
2136 0     0     my $self = shift;
2137 0           my $concept1 = shift;
2138 0           my $concept2 = shift;
2139            
2140 0           my $function = "_findShortestPathLength";
2141 0           &_debug($function);
2142            
2143             # check self
2144 0 0 0       if(!defined $self || !ref $self) {
2145 0           $errorhandler->_error($pkg, $function, "", 2);
2146             }
2147              
2148 0 0         if($option_realtime) {
2149              
2150             #my $length = $self->_findShortestPathLengthInCache($concept1, $concept2);
2151             #if(defined $length) { return $length; }
2152             #else {
2153 0           my $length = $self->_findShortestPathLengthInRealTime($concept1, $concept2);
2154             #if(!$option_undirected) {
2155             #$self->_storeShortestPathLengthInCache($concept1, $concept2, $length);
2156             #}
2157 0           return $length;
2158             #}
2159             }
2160             else {
2161 0           my $paths = $self->_findShortestPathThroughLCS($concept1, $concept2);
2162 0           my $path = shift @{$paths};
  0            
2163 0 0         if(defined $path) {
2164 0           my @cuis = split/\s+/, $path;
2165 0           my $length = $#cuis + 1;
2166 0           return $length;
2167             }
2168 0           else { return -1; }
2169             }
2170             }
2171              
2172              
2173             sub _storeShortestPathLengthInCache
2174             {
2175 0     0     my $self = shift;
2176 0           my $concept1 = shift;
2177 0           my $concept2 = shift;
2178 0           my $length = shift;
2179              
2180 0           my $function = "_storeShortestPathLengthInCache";
2181 0           &_debug($function);
2182            
2183             # check self
2184 0 0 0       if(!defined $self || !ref $self) {
2185 0           $errorhandler->_error($pkg, $function, "", 2);
2186             }
2187              
2188             # get the info table name
2189 0           my $cacheTableName = $cuifinder->_getCacheTableName();
2190            
2191             # set the index DB handler
2192 0           my $sdb = $self->{'sdb'};
2193 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
2194              
2195             # store the length in the cache table
2196 0           my $arrRef = $sdb->do("INSERT INTO $cacheTableName (CUI1, CUI2, LENGTH) VALUES ('$concept1', '$concept2', '$length')");
2197 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
2198             }
2199              
2200             sub _findShortestPathLengthInCache
2201             {
2202 0     0     my $self = shift;
2203 0           my $concept1 = shift;
2204 0           my $concept2 = shift;
2205            
2206 0           my $function = "_findShortestPathLengthInCache";
2207 0           &_debug($function);
2208            
2209             # check self
2210 0 0 0       if(!defined $self || !ref $self) {
2211 0           $errorhandler->_error($pkg, $function, "", 2);
2212             }
2213             # check parameter exists
2214 0 0         if(!defined $concept1) {
2215 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
2216             }
2217 0 0         if(!defined $concept2) {
2218 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
2219             }
2220              
2221             # check if valid concept
2222 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
2223 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
2224             }
2225 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
2226 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
2227             }
2228              
2229             # get the info table name
2230 0           my $cacheTableName = $cuifinder->_getCacheTableName();
2231            
2232             # set the index DB handler
2233 0           my $sdb = $self->{'sdb'};
2234 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
2235              
2236             # get length from the cache table if it exists
2237 0           my $arrRef = $sdb->selectcol_arrayref("select LENGTH from $cacheTableName where CUI1=\'$concept1\' and CUI2=\'$concept2\'");
2238 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
2239              
2240             # get the depth from the array
2241 0           my $length = shift @{$arrRef};
  0            
2242            
2243 0           return $length;
2244             }
2245              
2246             # method that finds the length of the shortest path
2247             # input : $concept1 <- the first concept
2248             # $concept2 <- the second concept
2249             # output: $length <- the length of the shortest path between them
2250             sub _findShortestPathLengthInRealTime {
2251              
2252 0     0     my $self = shift;
2253 0           my $concept1 = shift;
2254 0           my $concept2 = shift;
2255            
2256 0           my $function = "_findShortestPathLengthInRealTime";
2257 0           &_debug($function);
2258            
2259             # check self
2260 0 0 0       if(!defined $self || !ref $self) {
2261 0           $errorhandler->_error($pkg, $function, "", 2);
2262             }
2263              
2264             # check parameter exists
2265 0 0         if(!defined $concept1) {
2266 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
2267             }
2268 0 0         if(!defined $concept2) {
2269 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
2270             }
2271              
2272             # check if valid concept
2273 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
2274 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
2275             }
2276 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
2277 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
2278             }
2279            
2280             # we need to check this in both directions because the BFS
2281             # the direction matters and with the undirected option
2282             # we always want to go up and the problem arrises in the
2283             # cases in which we continue down in a straight line such
2284             # that CUI1 is the LCS. Maybe there is a better way to
2285             # do this but I am not certain quite yet
2286             #my $l1 = $self->_findShortestPathLengthInRealTimeBFS($concept1, $concept2, -1);
2287              
2288             # now swap
2289             #my $l2 = $self->_findShortestPathLengthInRealTimeBFS($concept2, $concept1, $l1);
2290            
2291             # return the other if it is -1
2292             #if($l1 < 0) { return $l2; }
2293             #if($l2 < 0) { return $l1; }
2294            
2295             # return the lowest
2296             #return $l1 < $l2 ? $l1 : $l2;
2297              
2298 0           my $length = $self->_findShortestPathLengthInRealTimeBFS2($concept1, $concept2);
2299              
2300 0           return $length;
2301             }
2302            
2303              
2304             # method that finds the length of the shortest path
2305             # input : $concept1 <- the first concept
2306             # $concept2 <- the second concept
2307             # output: $length <- the length of the shortest path between them
2308             sub _findShortestPathLengthInRealTimeBFS2 {
2309              
2310 0     0     my $self = shift;
2311 0           my $concept1 = shift;
2312 0           my $concept2 = shift;
2313            
2314 0           my $function = "_findShortestPathLengthInRealTimeBFS2($concept1, $concept2)";
2315 0           &_debug($function);
2316            
2317             # check self
2318 0 0 0       if(!defined $self || !ref $self) {
2319 0           $errorhandler->_error($pkg, $function, "", 2);
2320             }
2321              
2322             # base case
2323 0 0         if($concept1 eq $concept2) { return 2; }
  0            
2324              
2325             # set the count
2326 0           my %visited1 = (); my %visited2 = ();
  0            
2327            
2328             # set the stack
2329 0           my $rstack1 = $cuifinder->_getParents($concept1);
2330 0           my $rstack2 = $cuifinder->_getParents($concept2);
2331 0           my @stack1 = @{$rstack1}; my @stack2 = @{$rstack2};
  0            
  0            
  0            
2332              
2333 0           my @directions1 = (); my @directions2 = ();
  0            
2334 0           my @relations1 = (); my @relations2 = ();
  0            
2335 0           my @paths1 = (); my @paths2 = ();
  0            
2336 0           my $path_length1 = -1; my $path_length2 = -1;
  0            
2337              
2338             # get the parents
2339 0           foreach my $element (@stack1) {
2340 0           my @array1 = ();
2341 0           push @paths1, \@array1;
2342 0           push @directions1, 0;
2343 0           push @relations1, "PAR";
2344             }
2345 0           foreach my $element (@stack2) {
2346 0           my @array2 = ();
2347 0           push @paths2, \@array2;
2348 0           push @directions2, 0;
2349 0           push @relations2, "PAR";
2350             }
2351              
2352             # now loop through the stack
2353 0   0       while($#stack1 >= 0 || $#stack2 >= 0) {
2354            
2355 0           my $c1 = ""; my $c2 = "";
  0            
2356 0           my $path1 = ""; my $path2 = "";
  0            
2357 0           my $direction1 = ""; my $direction2 = "";
  0            
2358 0           my $relation1 = ""; my $relation2 = "";
  0            
2359 0           my @intermediate1 = (); my @intermediate2 = ();
  0            
2360 0           my $series1 = ""; my $series2 = "";
  0            
2361 0           my $distance1 = -1; my $distance2 = -1;
  0            
2362 0           my $cui1flag = 0; my $cui2flag = 0;
  0            
2363              
2364 0 0         if($#stack1 >=0) {
2365 0           $c1 = pop @stack1;
2366 0           $path1 = pop @paths1;
2367 0           $direction1 = pop @directions1;
2368 0           $relation1 = pop @relations1;
2369              
2370 0           @intermediate1 = @{$path1};
  0            
2371 0           $series1 = join " ", @intermediate1;
2372 0           push @intermediate1, $c1;
2373 0           $distance1 = $#intermediate1;
2374 0           $cui1flag++;
2375             }
2376            
2377 0 0         if($#stack2 >=0) {
2378 0           $c2 = pop @stack2;
2379 0           $path2 = pop @paths2;
2380 0           $direction2 = pop @directions2;
2381 0           $relation2 = pop @relations2;
2382            
2383 0           @intermediate2 = @{$path2};
  0            
2384 0           $series2 = join " ", @intermediate2;
2385 0           push @intermediate2, $c2;
2386 0           $distance2 = $#intermediate2;
2387 0           $cui2flag++;
2388             }
2389              
2390             # check if it is our concept2
2391 0 0         if($c1 eq $concept2) {
2392 0           $path_length1 = $distance1 + 2;
2393 0 0         if($#stack2 < 0) { return $path_length1; }
  0            
2394             }
2395              
2396            
2397             # check if it is our concept2
2398 0 0         if($c2 eq $concept1) {
2399 0           $path_length2 = $distance2 + 2;
2400 0 0         if($#stack1 < 0) { return $path_length2; }
  0            
2401             }
2402              
2403             # if both paths have been set return the shortest
2404 0 0 0       if($path_length1 > -1 && $path_length2 > -1) {
2405 0 0         return $path_length1 < $path_length2 ? $path_length1 : $path_length2;
2406             }
2407              
2408             # if path length1 is set and is distance2 is greater then what
2409             # ever path we find for distance2 is going to be more than
2410             # for pathlength1 so return (this also works for pathlength2)
2411 0 0 0       if($path_length1 > -1 && $path_length1 <= ($distance2+2)) { return $path_length1; }
  0            
2412 0 0 0       if($path_length2 > -1 && $path_length2 <= ($distance1+2)) { return $path_length2; }
  0            
2413            
2414              
2415             # check if concept has been visited already through that path
2416 0           my $flag1 = 0; my $flag2 = 0;
  0            
2417 0 0         if(exists $visited1{$c1}) { $flag1++; }
  0            
2418 0           else { $visited1{$c1}++; }
2419              
2420 0 0         if(exists $visited2{$c2}) { $flag2++; }
  0            
2421 0           else { $visited2{$c2}++; }
2422              
2423             # set the flags if nothing exists
2424 0 0         if($cui1flag == 0) { $flag1++; }
  0            
2425 0 0         if($cui2flag == 0) { $flag2++; }
  0            
2426              
2427             # check that the concept is not one of the forbidden concepts
2428 0 0 0       if($cui1flag > 0 && $cuifinder->_forbiddenConcept($c1)) { $flag1++; }
  0            
2429 0 0 0       if($cui2flag > 0 && $cuifinder->_forbiddenConcept($c2)) { $flag2++; }
  0            
2430              
2431             # if both concepts have been flagged - next
2432 0 0 0       if($flag1 > 0 && $flag2 > 0) { next; }
  0            
2433              
2434             # if the previous direction was a child we have a change in direction
2435 0           my $dchange1 = $direction1;
2436 0           my $dchange2 = $direction2;
2437            
2438             # if the undirected option is set the dchange doesn't matter
2439             # otherwise we need to check
2440 0 0         if(!$option_undirected) {
2441 0 0         if($relation1 eq "CHD") { $dchange1 = $direction1 + 1; }
  0            
2442 0 0         if($relation2 eq "CHD") { $dchange2 = $direction2 + 1; }
  0            
2443             }
2444            
2445             # if we have not had more than a single direction change
2446 0           my $parents1; my $parents2;
2447 0 0 0       if($flag1 == 0 && $dchange1 < 2) {
2448 0           $parents1 = $cuifinder->_getParents($c1);
2449             }
2450 0 0 0       if($flag2 == 0 && $dchange2 < 2) {
2451 0           $parents2 = $cuifinder->_getParents($c2);
2452             }
2453            
2454 0           foreach my $parent1 (@{$parents1}) {
  0            
2455             # check if concept has already in the path
2456 0 0         if($series1=~/$parent1/) { next; }
  0            
2457 0 0         if($parent1 eq $c1) { next; }
  0            
2458 0           unshift @stack1, $parent1;
2459 0           unshift @paths1, \@intermediate1;
2460 0           unshift @relations1, "PAR";
2461 0           unshift @directions1, $dchange1;
2462             }
2463              
2464 0           foreach my $parent2 (@{$parents2}) {
  0            
2465             # check if concept has already in the path
2466 0 0         if($series2=~/$parent2/) { next; }
  0            
2467 0 0         if($parent2 eq $c2) { next; }
  0            
2468              
2469 0           unshift @stack2, $parent2;
2470 0           unshift @paths2, \@intermediate2;
2471 0           unshift @relations2, "PAR";
2472 0           unshift @directions2, $dchange2;
2473             }
2474            
2475            
2476             # now with the chilcren if the previous direction was a parent we have
2477             # have to change the direction
2478 0           $dchange1 = $direction1;
2479 0           $dchange2 = $direction2;
2480              
2481             # if the undirected option is set the dchange doesn't matter
2482             # otherwise we need to check
2483 0 0         if(!$option_undirected) {
2484 0 0         if($relation1 eq "PAR") { $dchange1 = $direction1 + 1; }
  0            
2485 0 0         if($relation2 eq "PAR") { $dchange2 = $direction2 + 1; }
  0            
2486             }
2487              
2488            
2489              
2490             # if we have not had more than a single direction change
2491             # now search through the children
2492 0           my $children1 = undef; my $children2 = undef;
  0            
2493 0 0 0       if($flag1 == 0 && $dchange1 < 2) {
2494 0           $children1 = $cuifinder->_getChildren($c1);
2495             }
2496              
2497 0 0 0       if($flag2 == 0 && $dchange2 < 2) {
2498 0           $children2 = $cuifinder->_getChildren($c2);
2499             }
2500            
2501 0           foreach my $child1 (@{$children1}) {
  0            
2502             # check if child cui has already in the path
2503 0 0         if($series1=~/$child1/) { next; }
  0            
2504 0 0         if($child1 eq $c1) { next; }
  0            
2505            
2506             # if not continue
2507 0           unshift @stack1, $child1;
2508 0           unshift @paths1, \@intermediate1;
2509 0           unshift @relations1, "CHD";
2510 0           unshift @directions1, $dchange1;
2511             }
2512              
2513 0           foreach my $child2 (@{$children2}) {
  0            
2514             # check if child cui has already in the path
2515 0 0         if($series2=~/$child2/) { next; }
  0            
2516 0 0         if($child2 eq $c2) { next; }
  0            
2517            
2518             # if not continue
2519 0           unshift @stack2, $child2;
2520 0           unshift @paths2, \@intermediate2;
2521 0           unshift @relations2, "CHD";
2522 0           unshift @directions2, $dchange2;
2523             }
2524             }
2525             # no path was found return -1
2526 0           return -1;
2527             }
2528              
2529              
2530             # method that finds the length of the shortest path
2531             # input : $concept1 <- the first concept
2532             # $concept2 <- the second concept
2533             # output: $length <- the length of the shortest path between them
2534             sub _findShortestPathLengthInRealTimeBFS {
2535              
2536 0     0     my $self = shift;
2537 0           my $concept1 = shift;
2538 0           my $concept2 = shift;
2539 0           my $length = shift;
2540            
2541 0           my $function = "_findShortestPathLengthInRealTimeBFS($concept1, $concept2, $length)";
2542 0           &_debug($function);
2543            
2544             # check self
2545 0 0 0       if(!defined $self || !ref $self) {
2546 0           $errorhandler->_error($pkg, $function, "", 2);
2547             }
2548              
2549             # set the count
2550 0           my %visited = ();
2551            
2552             # set the stack
2553 0           my $rstack = $cuifinder->_getParents($concept1);
2554 0           my @stack = @{$rstack};
  0            
2555 0           my @directions = ();
2556 0           my @relations = ();
2557 0           my @paths = ();
2558            
2559             # get the parents
2560 0           foreach my $element (@stack) {
2561 0           my @array = ();
2562 0           push @paths, \@array;
2563 0           push @directions, 0;
2564 0           push @relations, "PAR";
2565             }
2566            
2567             # now loop through the stack
2568 0           while($#stack >= 0) {
2569            
2570 0           my $concept = pop @stack;
2571 0           my $path = pop @paths;
2572 0           my $direction = pop @directions;
2573 0           my $relation = pop @relations;
2574            
2575             # set up the new path
2576 0           my @intermediate = @{$path};
  0            
2577 0           my $series = join " ", @intermediate;
2578 0           push @intermediate, $concept;
2579 0           my $distance = $#intermediate;
2580              
2581             # if we are going in the other direction and we
2582             # have already found a shorter path the other way
2583 0 0 0       if( ($length) > 0 && ( ($distance+2) >= $length) ) {
2584 0           return $length;
2585             }
2586              
2587             # check if it is our concept2
2588 0 0         if($concept eq $concept2) {
2589 0           my $path_length = $distance + 2;
2590 0           return $path_length;
2591             }
2592              
2593             # check if concept has been visited already through that path
2594 0 0         if(exists $visited{$concept}) { next; }
  0            
2595 0           else { $visited{$concept}++; }
2596              
2597             # check that the concept is not one of the forbidden concepts
2598 0 0         if($cuifinder->_forbiddenConcept($concept)) { next; }
  0            
2599            
2600             # print information into the file if debugpath option is set
2601 0 0         if($option_debugpath) {
2602 0           my $d = $#intermediate+1;
2603 0           print DEBUG_FILE "$concept\t$d\t@intermediate\n";
2604             }
2605            
2606             # if the previous direction was a child we have a change in direction
2607 0           my $dchange = $direction;
2608            
2609             # if the undirected option is set the dchange doesn't matter
2610             # otherwise we need to check
2611 0 0         if(!$option_undirected) {
2612 0 0         if($relation eq "CHD") { $dchange = $direction + 1; }
  0            
2613             }
2614            
2615             # if we have not had more than a single direction change
2616 0 0         if($dchange < 2) {
2617             # search through the parents
2618 0           my $parents = $cuifinder->_getParents($concept);
2619 0           foreach my $parent (@{$parents}) {
  0            
2620             # check if concept has already in the path
2621 0 0         if($series=~/$parent/) { next; }
  0            
2622 0 0         if($parent eq $concept) { next; }
  0            
2623 0           unshift @stack, $parent;
2624 0           unshift @paths, \@intermediate;
2625 0           unshift @relations, "PAR";
2626 0           unshift @directions, $dchange;
2627             }
2628             }
2629            
2630             # now with the chilcren if the previous direction was a parent we have
2631             # have to change the direction
2632 0           $dchange = $direction;
2633             # if the undirected option is set the dchange doesn't matter
2634             # otherwise we need to check
2635 0 0         if(!$option_undirected) {
2636 0 0         if($relation eq "PAR") { $dchange = $direction + 1; }
  0            
2637             }
2638            
2639             # if we have not had more than a single direction change
2640 0 0         if($dchange < 2) {
2641             # now search through the children
2642 0           my $children = $cuifinder->_getChildren($concept);
2643 0           foreach my $child (@{$children}) {
  0            
2644            
2645             # check if child cui has already in the path
2646 0 0         if($series=~/$child/) { next; }
  0            
2647 0 0         if($child eq $concept) { next; }
  0            
2648            
2649             # if not continue
2650 0           unshift @stack, $child;
2651 0           unshift @paths, \@intermediate;
2652 0           unshift @relations, "CHD";
2653 0           unshift @directions, $dchange;
2654             }
2655             }
2656             }
2657            
2658             # no path was found return -1
2659 0           return -1;
2660             }
2661              
2662              
2663             # this function finds the shortest path between
2664             # two concepts and returns the path. in the process
2665             # it determines the least common subsumer for that
2666             # path so it returns both
2667             # input : $concept1 <- string containing the first cui
2668             # $concept2 <- string containing the second
2669             # output: $hash <- reference to a hash containing the
2670             # lcs as the key and the path as the
2671             # value
2672             sub _shortestPath {
2673              
2674 0     0     my $self = shift;
2675 0           my $concept1 = shift;
2676 0           my $concept2 = shift;
2677              
2678 0           my $function = "_shortestPath";
2679 0           &_debug($function);
2680            
2681             # check self
2682 0 0 0       if(!defined $self || !ref $self) {
2683 0           $errorhandler->_error($pkg, $function, "", 2);
2684             }
2685              
2686             # check parameter exists
2687 0 0         if(!defined $concept1) {
2688 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
2689             }
2690 0 0         if(!defined $concept2) {
2691 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
2692             }
2693              
2694             # check if valid concept
2695 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
2696 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
2697             }
2698 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
2699 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
2700             }
2701              
2702             # Get the paths to root for each ofhte concepts
2703 0           my $lTrees = $self->_pathsToRoot($concept1);
2704              
2705 0           my $rTrees = $self->_pathsToRoot($concept2);
2706            
2707             # Find the shortest path in these trees.
2708 0           my %lcsLengths = ();
2709 0           my %lcsPaths = ();
2710 0           my $lcs = "";
2711 0           foreach my $lTree (@{$lTrees}) {
  0            
2712 0           foreach my $rTree (@{$rTrees}) {
  0            
2713 0           $lcs = $self->_getLCSfromTrees($lTree, $rTree);
2714 0 0         if(defined $lcs) {
2715            
2716 0           my $lCount = 0;
2717 0           my $rCount = 0;
2718 0           my $length = 0;
2719 0           my $concept = "";
2720            
2721 0           my @lArray = ();
2722 0           my @rArray = ();
2723            
2724 0           my @lTreeArray = split/\s+/, $lTree;
2725 0           my @rTreeArray = split/\s+/, $rTree;
2726            
2727 0           foreach $concept (reverse @lTreeArray) {
2728 0           $lCount++;
2729 0           push @lArray, $concept;
2730 0 0         last if($concept eq $lcs);
2731              
2732             }
2733 0           foreach $concept (reverse @rTreeArray) {
2734 0           $rCount++;
2735 0 0         last if($concept eq $lcs);
2736 0           push @rArray, $concept;
2737            
2738             }
2739              
2740             # length of the path
2741 0 0         if(exists $lcsLengths{$lcs}) {
2742 0 0         if($lcsLengths{$lcs} >= ($rCount + $lCount - 1)) {
2743 0           $lcsLengths{$lcs} = $rCount + $lCount - 1;
2744 0           my @fullpath = (@lArray, (reverse @rArray));
2745 0           push @{$lcsPaths{$lcs}}, \@fullpath;
  0            
2746             }
2747             }
2748             else {
2749 0           $lcsLengths{$lcs} = $rCount + $lCount - 1;
2750 0           my @fullpath = (@lArray, (reverse @rArray));
2751 0           push @{$lcsPaths{$lcs}}, \@fullpath;
  0            
2752             }
2753             }
2754             }
2755             }
2756            
2757             # If no paths exist
2758 0 0         if(!scalar(keys(%lcsPaths))) {
2759 0           return undef;
2760             }
2761              
2762             # get the lcses and their associated path(s)
2763 0           my %rhash = ();
2764 0           my $prev_len = -1;
2765 0           foreach my $lcs (sort {$lcsLengths{$a} <=> $lcsLengths{$b}} keys(%lcsLengths)) {
  0            
2766 0 0 0       if( ($prev_len == -1) or ($prev_len == $lcsLengths{$lcs}) ) {
2767 0           foreach my $pathref (@{$lcsPaths{$lcs}}) {
  0            
2768 0 0         if( ($#{$pathref}+1) == $lcsLengths{$lcs}) {
  0            
2769 0           my $path = join " ", @{$pathref};
  0            
2770 0           $rhash{$path} = $lcs;
2771             }
2772             }
2773             }
2774 0           else { last; }
2775 0           $prev_len = $lcsLengths{$lcs};
2776             }
2777            
2778             # return a reference to the hash containing the lcses and their path(s)
2779 0           return \%rhash;
2780             }
2781              
2782              
2783             # method that finds the length of the shortest path
2784             # input : $concept1 <- the first concept
2785             # $concept2 <- the second concept
2786             # output: $int <- number cuis closer to concept1 than concept2
2787             sub _findNumberOfCloserConcepts {
2788              
2789 0     0     my $self = shift;
2790 0           my $concept1 = shift;
2791 0           my $concept2 = shift;
2792            
2793 0           my $function = "_findNumberOfCloserConcepts($concept1, $concept2)";
2794 0           &_debug($function);
2795            
2796             # check self
2797 0 0 0       if(!defined $self || !ref $self) {
2798 0           $errorhandler->_error($pkg, $function, "", 2);
2799             }
2800              
2801 0 0         if($concept1 eq $concept2) { return 0; }
  0            
2802              
2803 0           my %closerConceptHash = ();
2804              
2805             # set the count
2806 0           my %visited1 = (); my %visited2 = ();
  0            
2807            
2808             # set the stack
2809 0           my $rstack1 = $cuifinder->_getParents($concept1);
2810 0           my $rstack2 = $cuifinder->_getParents($concept2);
2811 0           my @stack1 = @{$rstack1}; my @stack2 = @{$rstack2};
  0            
  0            
  0            
2812              
2813 0           my @directions1 = (); my @directions2 = ();
  0            
2814 0           my @relations1 = (); my @relations2 = ();
  0            
2815 0           my @paths1 = (); my @paths2 = ();
  0            
2816 0           my $path_length1 = -1; my $path_length2 = -1;
  0            
2817              
2818             # get the parents
2819 0           foreach my $element (@stack1) {
2820 0           my @array1 = ();
2821 0           push @paths1, \@array1;
2822 0           push @directions1, 0;
2823 0           push @relations1, "PAR";
2824             }
2825 0           foreach my $element (@stack2) {
2826 0           my @array2 = ();
2827 0           push @paths2, \@array2;
2828 0           push @directions2, 0;
2829 0           push @relations2, "PAR";
2830             }
2831            
2832             # now loop through the stack
2833 0   0       while($#stack1 >= 0 || $#stack2 >= 0) {
2834            
2835 0           my $c1 = ""; my $c2 = "";
  0            
2836 0           my $path1 = ""; my $path2 = "";
  0            
2837 0           my $direction1 = ""; my $direction2 = "";
  0            
2838 0           my $relation1 = ""; my $relation2 = "";
  0            
2839 0           my @intermediate1 = (); my @intermediate2 = ();
  0            
2840 0           my $series1 = ""; my $series2 = "";
  0            
2841 0           my $distance1 = -1; my $distance2 = -1;
  0            
2842 0           my $cui1flag = 0; my $cui2flag = 0;
  0            
2843              
2844 0 0         if($#stack1 >=0) {
2845 0           $c1 = pop @stack1;
2846 0           $path1 = pop @paths1;
2847 0           $direction1 = pop @directions1;
2848 0           $relation1 = pop @relations1;
2849              
2850 0           @intermediate1 = @{$path1};
  0            
2851 0           $series1 = join " ", @intermediate1;
2852 0           push @intermediate1, $c1;
2853 0           $distance1 = $#intermediate1;
2854 0           $cui1flag++;
2855             }
2856            
2857 0 0         if($#stack2 >=0) {
2858 0           $c2 = pop @stack2;
2859 0           $path2 = pop @paths2;
2860 0           $direction2 = pop @directions2;
2861 0           $relation2 = pop @relations2;
2862            
2863 0           @intermediate2 = @{$path2};
  0            
2864 0           $series2 = join " ", @intermediate2;
2865 0           push @intermediate2, $c2;
2866 0           $distance2 = $#intermediate2;
2867 0           $cui2flag++;
2868             }
2869            
2870              
2871            
2872             # check if it is our concept2
2873 0 0         if($c1 eq $concept2) {
2874 0           $path_length1 = $distance1 + 2;
2875 0 0         if($#stack2 < 0) { last; }
  0            
2876             }
2877              
2878            
2879             # check if it is our concept2
2880 0 0         if($c2 eq $concept1) {
2881 0           $path_length2 = $distance2 + 2;
2882 0 0         if($#stack1 < 0) { last; }
  0            
2883             }
2884              
2885             # if both paths have been set return the shortest
2886 0 0 0       if($path_length1 > -1 && $path_length2 > -1) { last; }
  0            
2887              
2888             # if path length1 is set and is distance2 is greater then what
2889             # ever path we find for distance2 is going to be more than
2890             # for pathlength1 so return (this also works for pathlength2)
2891 0 0 0       if($path_length1 > -1 && $path_length1 <= ($distance2+2)) { last; }
  0            
2892 0 0 0       if($path_length2 > -1 && $path_length2 <= ($distance1+2)) { last; }
  0            
2893            
2894              
2895             # check if concept has been visited already through that path
2896 0           my $flag1 = 0; my $flag2 = 0;
  0            
2897 0 0         if(exists $visited1{$c1}) { $flag1++; }
  0            
2898 0           else { $visited1{$c1}++; }
2899              
2900 0 0         if(exists $visited2{$c2}) { $flag2++; }
  0            
2901 0           else { $visited2{$c2}++; }
2902              
2903             # set the flags if nothing exists
2904 0 0         if($cui1flag == 0) { $flag1++; }
  0            
2905 0 0         if($cui2flag == 0) { $flag2++; }
  0            
2906              
2907             # check that the concept is not one of the forbidden concepts
2908 0 0 0       if($cui1flag > 0 && $cuifinder->_forbiddenConcept($c1)) { $flag1++; }
  0            
2909 0 0 0       if($cui2flag > 0 && $cuifinder->_forbiddenConcept($c2)) { $flag2++; }
  0            
2910              
2911             # if both concepts have been flagged - next
2912 0 0 0       if($flag1 > 0 && $flag2 > 0) { next; }
  0            
2913              
2914             # add concepts to the closest hash if closer
2915 0 0         if($flag1 <= 0) {
2916 0 0         if(! (exists $closerConceptHash{$c1}) ) {
2917 0           $closerConceptHash{$c1} = $distance1 + 2;
2918             }
2919             }
2920 0 0         if($flag2 <= 0) {
2921 0 0         if(! (exists $closerConceptHash{$c2}) ) {
2922 0           $closerConceptHash{$c2} = $distance2 + 2;
2923             }
2924             }
2925              
2926             # if the previous direction was a child we have a change in direction
2927 0           my $dchange1 = $direction1;
2928 0           my $dchange2 = $direction2;
2929            
2930             # if the undirected option is set the dchange doesn't matter
2931             # otherwise we need to check
2932 0 0         if(!$option_undirected) {
2933 0 0         if($relation1 eq "CHD") { $dchange1 = $direction1 + 1; }
  0            
2934 0 0         if($relation2 eq "CHD") { $dchange2 = $direction2 + 1; }
  0            
2935             }
2936            
2937             # if we have not had more than a single direction change
2938 0           my $parents1 = undef; my $parents2 = undef;
  0            
2939 0 0 0       if($flag1 == 0 && $dchange1 < 2) {
2940 0           $parents1 = $cuifinder->_getParents($c1);
2941             }
2942 0 0 0       if($flag2 == 0 && $dchange2 < 2) {
2943 0           $parents2 = $cuifinder->_getParents($c2);
2944             }
2945            
2946 0           foreach my $parent1 (@{$parents1}) {
  0            
2947             # check if concept has already in the path
2948 0 0         if($series1=~/$parent1/) { next; }
  0            
2949 0 0         if($parent1 eq $c1) { next; }
  0            
2950 0           unshift @stack1, $parent1;
2951 0           unshift @paths1, \@intermediate1;
2952 0           unshift @relations1, "PAR";
2953 0           unshift @directions1, $dchange1;
2954             }
2955              
2956 0           foreach my $parent2 (@{$parents2}) {
  0            
2957             # check if concept has already in the path
2958 0 0         if($series2=~/$parent2/) { next; }
  0            
2959 0 0         if($parent2 eq $c2) { next; }
  0            
2960              
2961 0           unshift @stack2, $parent2;
2962 0           unshift @paths2, \@intermediate2;
2963 0           unshift @relations2, "PAR";
2964 0           unshift @directions2, $dchange2;
2965             }
2966            
2967            
2968             # now with the chilcren if the previous direction was a parent we have
2969             # have to change the direction
2970 0           $dchange1 = $direction1;
2971 0           $dchange2 = $direction2;
2972              
2973             # if the undirected option is set the dchange doesn't matter
2974             # otherwise we need to check
2975 0 0         if(!$option_undirected) {
2976 0 0         if($relation1 eq "PAR") { $dchange1 = $direction1 + 1; }
  0            
2977 0 0         if($relation2 eq "PAR") { $dchange2 = $direction2 + 1; }
  0            
2978             }
2979              
2980            
2981              
2982             # if we have not had more than a single direction change
2983             # now search through the children
2984 0           my $children1 = undef; my $children2 = undef;
  0            
2985 0 0 0       if($flag1 == 0 && $dchange1 < 2) {
2986 0           $children1 = $cuifinder->_getChildren($c1);
2987             }
2988              
2989 0 0 0       if($flag2 == 0 && $dchange2 < 2) {
2990 0           $children2 = $cuifinder->_getChildren($c2);
2991             }
2992            
2993 0           foreach my $child1 (@{$children1}) {
  0            
2994             # check if child cui has already in the path
2995 0 0         if($series1=~/$child1/) { next; }
  0            
2996 0 0         if($child1 eq $c1) { next; }
  0            
2997            
2998             # if not continue
2999 0           unshift @stack1, $child1;
3000 0           unshift @paths1, \@intermediate1;
3001 0           unshift @relations1, "CHD";
3002 0           unshift @directions1, $dchange1;
3003             }
3004              
3005 0           foreach my $child2 (@{$children2}) {
  0            
3006             # check if child cui has already in the path
3007 0 0         if($series2=~/$child2/) { next; }
  0            
3008 0 0         if($child2 eq $c2) { next; }
  0            
3009            
3010             # if not continue
3011 0           unshift @stack2, $child2;
3012 0           unshift @paths2, \@intermediate2;
3013 0           unshift @relations2, "CHD";
3014 0           unshift @directions2, $dchange2;
3015             }
3016             }
3017              
3018 0 0 0       if($path_length1 < 0 && $path_length2 < 0) { return -1; }
  0            
3019            
3020 0 0         my $length = $path_length1 < $path_length2 ? $path_length1 : $path_length2;
3021            
3022 0 0         if($path_length1 < 0) { $length = $path_length2; }
  0            
3023 0 0         if($path_length2 < 0) { $length = $path_length1; }
  0            
3024              
3025 0           my $counter = 0;
3026 0           foreach my $cui (sort keys %closerConceptHash) {
3027 0 0         if($closerConceptHash{$cui} < $length) {
3028 0           $counter++;
3029             }
3030             }
3031              
3032             # no path was found return -1
3033 0           return $counter;
3034             }
3035              
3036             1;
3037              
3038             __END__