File Coverage

blib/lib/UMLS/Interface/PathFinder.pm
Criterion Covered Total %
statement 12 1644 0.7
branch 0 688 0.0
condition 0 246 0.0
subroutine 4 46 8.7
pod 0 1 0.0
total 16 2625 0.6


line stmt bran cond sub pod time code
1             # UMLS::Interface::PathFinder
2             # (Last Updated $Id: PathFinder.pm,v 1.68 2016/10/18 16:10:06 btmcinnes Exp $)
3             #
4             # Perl module that provides a perl interface to the
5             # Unified Medical Language System (UMLS)
6             #
7             # Copyright (c) 2004-2010,
8             #
9             # Bridget T. McInnes, University of Minnesota Twin Cities
10             # bthomson at cs.umn.edu
11             #
12             # Siddharth Patwardhan, University of Utah, Salt Lake City
13             # sidd at cs.utah.edu
14             #
15             # Serguei Pakhomov, University of Minnesota Twin Cities
16             # pakh0002 at umn.edu
17             #
18             # Ted Pedersen, University of Minnesota, Duluth
19             # tpederse at d.umn.edu
20             #
21             # Ying Liu, University of Minnesota
22             # liux0935 at umn.edu
23             #
24             # This program is free software; you can redistribute it and/or
25             # modify it under the terms of the GNU General Public License
26             # as published by the Free Software Foundation; either version 2
27             # of the License, or (at your option) any later version.
28             #
29             # This program is distributed in the hope that it will be useful,
30             # but WITHOUT ANY WARRANTY; without even the implied warranty of
31             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32             # GNU General Public License for more details.
33             #
34             # You should have received a copy of the GNU General Public License
35             # along with this program; if not, write to
36             #
37             # The Free Software Foundation, Inc.,
38             # 59 Temple Place - Suite 330,
39             # Boston, MA 02111-1307, USA.
40              
41             package UMLS::Interface::PathFinder;
42              
43 24     24   89 use Fcntl;
  24         29  
  24         4832  
44 24     24   115 use strict;
  24         27  
  24         500  
45 24     24   73 use warnings;
  24         29  
  24         689  
46 24     24   75 use bytes;
  24         28  
  24         136  
47              
48             my $pkg = "UMLS::Interface::PathFinder";
49              
50             my $debug = 0;
51              
52             my $max_depth = -1;
53              
54             my $root = "";
55              
56             my $option_verbose = 0;
57             my $option_forcerun = 0;
58             my $option_realtime = 0;
59             my $option_t = 0;
60             my $option_debugpath = 0;
61             my $option_cuilist = 0;
62             my $option_undirected = 0;
63              
64             my $errorhandler = "";
65             my $cuifinder = "";
66              
67             my %maximumDepths = ();
68              
69             local(*DEBUG_FILE);
70              
71             # UMLS-specific stuff ends ----------
72              
73             # -------------------- Class methods start here --------------------
74              
75             # method to create a new UMLS::Interface::PathFinder object
76             sub new {
77              
78 0     0 0   my $self = {};
79 0           my $className = shift;
80 0           my $params = shift;
81 0           my $handler = shift;
82            
83             # bless the object.
84 0           bless($self, $className);
85              
86             # initialize the global variables
87 0           $self->_initializeGlobalVariables();
88              
89             # initialize error handler
90 0           $errorhandler = UMLS::Interface::ErrorHandler->new();
91 0 0         if(! defined $errorhandler) {
92 0           print STDERR "The error handler did not get passed properly.\n";
93 0           exit;
94             }
95              
96             # initialize the cuifinder
97 0           $cuifinder = $handler;
98 0 0         if(! (defined $handler)) {
99 0           $errorhandler->_error($pkg,
100             "new",
101             "The CuiFinder handler did not get passed properly",
102             8);
103             }
104              
105             #iInitialize the object.
106 0           $self->_initialize($params);
107              
108 0           return $self;
109             }
110              
111             sub _initializeGlobalVariables {
112              
113 0     0     $debug = 0;
114            
115 0           $max_depth = -1;
116            
117 0           $root = "";
118            
119 0           $option_verbose = 0;
120 0           $option_forcerun = 0;
121 0           $option_realtime = 0;
122 0           $option_t = 0;
123 0           $option_debugpath = 0;
124 0           $option_cuilist = 0;
125 0           $option_undirected = 0;
126            
127 0           $errorhandler = "";
128 0           $cuifinder = "";
129            
130 0           %maximumDepths = ();
131             }
132              
133             # Method to initialize the UMLS::Interface::PathFinder object.
134             sub _initialize {
135              
136 0     0     my $self = shift;
137 0           my $params = shift;
138              
139             # set function name
140 0           my $function = "_initialize";
141 0           &_debug($function);
142            
143             # check self
144 0 0 0       if(!defined $self || !ref $self) {
145 0           $errorhandler->_error($pkg, $function, "", 2);
146             }
147            
148             # get the umlsinterfaceindex database from CuiFinder
149 0           my $sdb = $cuifinder->_getIndexDB();
150 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
151 0           $self->{'sdb'} = $sdb;
152            
153             # get the root
154 0           $root = $cuifinder->_root();
155              
156             # set up the options
157 0           $self->_setOptions($params);
158             }
159              
160             # method to set the realtime global parameter options
161             # input : bool <- 1 (turn on) 0 (turn off)
162             # output:
163             sub _setRealtimeOption {
164              
165 0     0     my $self = shift;
166 0           my $option = shift;
167              
168 0           my $function = "_setRealtimeOption";
169 0           &_debug($function);
170              
171             # check self
172 0 0 0       if(!defined $self || !ref $self) {
173 0           $errorhandler->_error($pkg, $function, "", 2);
174             }
175              
176 0 0         if($option == 1) {
177 0           $option_realtime = 1;
178             }
179             else {
180 0           $option_realtime = 0;
181             }
182             }
183              
184             # method to set the undirected global parameter options
185             # input : bool <- 1 (turn on) 0 (turn off)
186             # output:
187             sub _setUndirectedOption {
188              
189 0     0     my $self = shift;
190 0           my $option = shift;
191              
192 0           my $function = "_setUndirectedOption";
193 0           &_debug($function);
194              
195             # check self
196 0 0 0       if(!defined $self || !ref $self) {
197 0           $errorhandler->_error($pkg, $function, "", 2);
198             }
199              
200 0 0         if($option == 1) {
201 0           $option_undirected = 1;
202             }
203             else {
204 0           $option_undirected = 0;
205             }
206             }
207              
208             # method to set the global parameter options
209             # input : $params <- reference to a hash
210             # output:
211             sub _setOptions {
212              
213 0     0     my $self = shift;
214 0           my $params = shift;
215              
216 0           my $function = "_setOptions";
217 0           &_debug($function);
218              
219             # check self
220 0 0 0       if(!defined $self || !ref $self) {
221 0           $errorhandler->_error($pkg, $function, "", 2);
222             }
223              
224             # get all the parameters
225 0           my $forcerun = $params->{'forcerun'};
226 0           my $verbose = $params->{'verbose'};
227 0           my $realtime = $params->{'realtime'};
228 0           my $debugoption = $params->{'debug'};
229 0           my $t = $params->{'t'};
230 0           my $debugpath = $params->{'debugpath'};
231 0           my $cuilist = $params->{'cuilist'};
232 0           my $undirected = $params->{'undirected'};
233              
234 0           my $output = "";
235 0 0 0       if(defined $forcerun || defined $verbose || defined $realtime ||
      0        
      0        
      0        
      0        
236             defined $debugoption || defined $debugpath || defined $cuilist) {
237 0           $output .= "\nPathFinder User Options:\n";
238             }
239              
240             # check if the debug option has been been defined
241 0 0         if(defined $debugoption) {
242 0           $debug = 1;
243 0           $output .= " --debug option set\n";
244             }
245            
246             # print debug if it has been set
247 0           &_debug($function);
248              
249 0 0         if(defined $t) {
250 0           $option_t = 1;
251             }
252              
253             # check if the undirected option is set for shortest path
254 0 0         if(defined $undirected) {
255 0           $option_undirected = 1;
256 0           $output .= " --undirected option set\n";
257             }
258            
259             # check if the cuilist option has been defined
260 0 0         if(defined $cuilist) {
261 0           $option_cuilist = 1;
262 0           $output .= " --cuilist option set\n";
263             }
264            
265            
266             # check if debugpath option
267 0 0         if(defined $debugpath) {
268 0           $option_debugpath = 1;
269 0           $output .= " --debugpath $debugpath\n";
270 0 0         open(DEBUG_FILE, ">$debugpath") ||
271             die "Could not open depthpath file $debugpath\n";
272             }
273              
274             # check if the realtime option has been identified
275 0 0         if(defined $realtime) {
276 0           $option_realtime = 1;
277            
278 0           $output .= " --realtime option set\n";
279             }
280              
281             # check if verbose run has been identified
282 0 0         if(defined $verbose) {
283 0           $option_verbose = 1;
284            
285 0           $output .= " --verbose option set\n";
286             }
287              
288             # check if a forced run has been identified
289 0 0         if(defined $forcerun) {
290 0           $option_forcerun = 1;
291            
292 0           $output .= " --forcerun option set\n";
293             }
294              
295 0 0         if($option_t == 0) {
296 0           print STDERR "$output\n";
297             }
298             }
299              
300             # method to return the maximum depth of a taxonomy.
301             # input :
302             # output: $int <- string containing the max depth
303             sub _depth {
304              
305 0     0     my $self = shift;
306            
307 0           my $function = "_depth";
308 0           &_debug($function);
309              
310             # check self
311 0 0 0       if(!defined $self || !ref $self) {
312 0           $errorhandler->_error($pkg, $function, "", 2);
313             }
314              
315             # get the depth and set the path information
316 0 0         if($max_depth >= 0) {
317 0           return $max_depth;
318             }
319              
320             # check if it is in the info table and if it is return
321             # that otherwise we need to find the maximum depth and
322             # then store it here
323              
324             # get the info table name
325 0           my $infoTableName = $cuifinder->_getInfoTableName();
326            
327             # set the index DB handler
328 0           my $sdb = $self->{'sdb'};
329 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
330              
331             # get maximum depth from the info table
332 0           my $arrRef = $sdb->selectcol_arrayref("select INFO from $infoTableName where ITEM=\'DEPTH\'");
333 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
334              
335             # get the depth from the array
336 0           my $depth = shift @{$arrRef};
  0            
337              
338             # if the depth was there set the maximum depth and return it
339             # otherwise we are off to find it either in realtime or through
340             # the database depending on the user options
341 0 0         if(defined $depth) {
342 0           $max_depth = $depth;
343 0           return $max_depth;
344             }
345              
346             # find the depth in realtime
347 0 0         if($option_realtime) {
348 0           my @array = ();
349 0           my %visited = ();
350              
351 0           $self->_getMaxDepth($root, 0, \@array, \%visited);
352            
353             # the _getMaxDepth method does a DFS over the entire
354             # heirarchy - I am not certain a way around this yet
355             # but while we were add I stored the maximum depth
356             # of each of the CUIs in a hash since there is not a
357             # quick way of determining this in realtime as of yet
358            
359             # we are going to store them in the info table. This is
360             # hopefully a temporary solution until I can figure out
361             # a way to speed up getMaximumDepthInRealTime - if we
362             # run out of room with this, I will just keep the hash
363             # and then have to go through the
364 0           foreach my $cui (sort keys %maximumDepths) {
365 0           my $d = $maximumDepths{$cui};
366 0           $sdb->do("INSERT INTO $infoTableName (ITEM, INFO) VALUES ('$cui', '$d')");
367 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
368             }
369             }
370             # otherwise find it in the database
371             else {
372 0           $self->_setIndex();
373             }
374              
375             # at this point we have the max depth and the variable has been set
376             # so we are going to insert this into the info table and then return it
377 0           $sdb->do("INSERT INTO $infoTableName (ITEM, INFO) VALUES ('DEPTH', '$max_depth')");
378 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
379              
380             # return the maximum depth
381 0           return $max_depth;
382             }
383              
384             # recursive method to obtain the maximum depth in realtime
385             # input : $concept <- string containing cui
386             # $d <- string containing the depth of the cui
387             # $array <- reference to an array containing the current path
388             # output: $concept <- string containing cui
389             # $int <- string containing the depth of the cui
390             # $array <- reference to an array containing the current path
391             sub _getMaxDepth {
392              
393 0     0     my $self = shift;
394 0           my $concept = shift;
395 0           my $d = shift;
396 0           my $array = shift;
397 0           my $hash = shift;
398              
399 0           my $function = "_getMaxDepth";
400              
401             # check self
402 0 0 0       if(!defined $self || !ref $self) {
403 0           $errorhandler->_error($pkg, $function, "", 2);
404             }
405              
406             # check concept was obtained
407 0 0         if(!$concept) {
408 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
409             }
410            
411             # check if valid concept
412 0 0         if(! ($errorhandler->_validCui($concept)) ) {
413 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
414             }
415              
416             # check that the concept is not a forbidden concept
417 0 0         if($cuifinder->_forbiddenConcept($concept) == 1) { return; }
  0            
418              
419             # set up the new path
420 0           my @path = @{$array};
  0            
421 0           push @path, $concept;
422 0           my $series = join " ", @path;
423              
424             # if have already been here - leave
425 0 0         if(exists ${$hash}{$concept}{$series}) { return; }
  0            
  0            
426 0           else { ${$hash}{$concept}{$series}++; }
  0            
427              
428            
429             # check to see if it is the max depth
430 0 0         if(($d) > $max_depth) { $max_depth = $d; }
  0            
431              
432             # increment the depth
433 0           $d++;
434            
435             # add to the depths hash - if we are going to go through the trouble
436             # of having to do a depth first search - we might as well find the
437             # the maximum depths of all the cuis and store these in the database
438             # I am going to see if I can store them in a hash and then dump the
439             # hash in the database when I am finished. This way we don't have
440             # to continually access the database which is really not an acceptable
441             # solution
442 0 0         if(! (exists $maximumDepths{$concept}) ) { $maximumDepths{$concept} = $d; }
  0 0          
443 0           elsif($maximumDepths{$concept} < $d) { $maximumDepths{$concept} = $d; }
444              
445             # get all the children
446 0           my $children = $cuifinder->_getChildren($concept);
447            
448             # search through the children
449 0           foreach my $child (@{$children}) {
  0            
450            
451             # check if child cui has already in the path
452 0 0         if($series=~/$child/) { next; }
  0            
453 0 0         if($child eq $concept) { next; }
  0            
454            
455             # if it isn't continue on with the depth first search
456 0           $self->_getMaxDepth($child, $d, \@path, $hash);
457             }
458             }
459              
460             # method to find all the paths from a concept to
461             # the root node of the is-a taxonomy.
462             # input : $concept <- string containing cui
463             # output: $array <- array reference containing the paths
464             sub _pathsToRoot {
465              
466 0     0     my $self = shift;
467 0           my $concept = shift;
468              
469 0           my $function = "_pathsToRoot";
470 0           &_debug($function);
471              
472             # check self
473 0 0 0       if(!defined $self || !ref $self) {
474 0           $errorhandler->_error($pkg, $function, "", 2);
475             }
476              
477             # check parameter exists
478 0 0         if(!defined $concept) {
479 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
480             }
481            
482             # check if valid concept
483 0 0         if(! ($errorhandler->_validCui($concept)) ) {
484 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
485             }
486              
487             # get the relations from the configuration file
488 0           my $configrel = $cuifinder->_getRelString();
489 0           $configrel=~/(REL) (\:\:) (include|exclude) (.*?)$/;
490 0           my $relationstring = $4;
491              
492             # check to make certain the configuration file only contains
493             # heirarchical relations (PAR/CHD or RB/RN).
494             #my @relations = split/\s*\,\s*/, $relationstring;
495             #foreach my $rel (@relations) {
496             #if(! ($rel=~/(PAR|CHD|RB|RN)/) ) {
497             #$errorhandler->_error($pkg, $function, "Method only supports heirarhical relations (PAR/CHD or RB/RN).", 10);
498             #}
499             #}
500            
501             # if the realtime option is set get the paths otherwise
502             # they are or should be stored in the database
503 0           my $paths = "";
504 0 0         if($option_realtime) {
505 0           $paths = $self->_getPathsToRootInRealtime($concept);
506             }
507             else {
508 0           $paths = $self->_getPathsToRootFromIndex($concept);
509             }
510            
511 0           return $paths
512             }
513              
514             # returns all the paths to the root from the concept
515             # this information is stored in the index - if it is
516             # not then the index is created
517             # input : $string <- string containing the cui (assumed correct)
518             # output: $array <- reference to an array containing the paths
519             sub _getPathsToRootFromIndex {
520 0     0     my $self = shift;
521 0           my $concept = shift;
522              
523 0           my $function = "_getPathsToRootFromIndex";
524            
525             # check self
526 0 0 0       if(!defined $self || !ref $self) {
527 0           $errorhandler->_error($pkg, $function, "", 2);
528             }
529              
530             # if the concept is the root then return root
531 0 0         if($concept eq $root) {
532 0           my @array = (); push @array, $root;
  0            
533 0           return \@array;
534             }
535              
536             # set the index DB handler
537 0           my $sdb = $self->{'sdb'};
538 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
539            
540             # create the index if it hasn't been created
541 0           $self->_setIndex();
542              
543             # get the table name
544 0           my $tableName = $cuifinder->_getTableName();
545            
546             # get the paths from the database
547 0           my $paths = $sdb->selectcol_arrayref("select PATH from $tableName where CUI=\'$concept\'");
548 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
549            
550 0           return $paths;
551             }
552              
553             # check the index to make certain it is load properly
554             # input :
555             # outupt:
556             sub _checkIndex {
557              
558              
559 0     0     my $self = shift;
560 0           my $tableFile = shift;
561 0           my $tableName = shift;
562 0           my $tableNameHuman = shift;
563              
564 0           my $function = "_checkIndex";
565 0           &_debug($function);
566              
567             # check self
568 0 0 0       if(!defined $self || !ref $self) {
569 0           $errorhandler->_error($pkg, $function, "", 2);
570             }
571            
572             # check the input variables
573 0 0 0       if(!$tableFile || !$tableName || !$tableNameHuman) {
      0        
574 0           $errorhandler->_error($pkg, $function, "Error with input variables.", 4);
575             }
576              
577             # set the auxillary database that holds the path information
578 0           my $sdb = $self->{'sdb'};
579 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
580              
581             # extract the check
582 0           my $arrRef = $sdb->selectcol_arrayref("select CUI from $tableName where CUI=\'CHECK\'");
583            
584 0           my $count = $#{$arrRef};
  0            
585            
586 0 0         if($count != 0) {
587 0           my $str = "Index did not complete. Remove using the removeConfigData.pl program and re-run.";
588 0           $errorhandler->_error($pkg, $function, $str, 9);
589             }
590            
591             }
592              
593             # load the index in realtime
594             # input :
595             # outupt:
596             sub _createIndex {
597              
598              
599 0     0     my $self = shift;
600 0           my $tableFile = shift;
601 0           my $tableName = shift;
602 0           my $tableNameHuman = shift;
603              
604 0           my $function = "_createIndex";
605 0           &_debug($function);
606              
607             # check self
608 0 0 0       if(!defined $self || !ref $self) {
609 0           $errorhandler->_error($pkg, $function, "", 2);
610             }
611            
612             # check the input variables
613 0 0 0       if(!$tableFile || !$tableName || !$tableNameHuman) {
      0        
614 0           $errorhandler->_error($pkg, $function, "Error with input variables.", 4);
615             }
616              
617             # set the auxillary database that holds the path information
618 0           my $sdb = $self->{'sdb'};
619 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
620              
621 0           print STDERR "You have requested path information about a concept. In\n";
622 0           print STDERR "order to obtain this information we need to create an \n";
623 0           print STDERR "index or resubmit this command using --realtime. Creating\n";
624 0           print STDERR "an index can be very time-consuming, but once it is built\n";
625 0           print STDERR "your commands will run faster than with --realtime.\n\n";
626              
627 0 0         if($option_forcerun == 0) {
628 0           print STDERR "Do you want to continue with index creation (y/n)";
629            
630 0           my $answer = ; chomp $answer;
  0            
631            
632 0 0         if($answer=~/(N|n)/) {
633 0           print STDERR "Exiting program now.\n\n";
634 0           exit;
635             }
636             }
637             else {
638 0           print "Running index ... \n";
639             }
640            
641            
642             # create the table in the umls database
643 0           $sdb->do("CREATE TABLE IF NOT EXISTS $tableName (CUI char(8), DEPTH int, PATH varchar(450))");
644 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
645            
646             # insert the name into the index
647 0           $sdb->do("INSERT INTO tableindex (TABLENAME, HEX) VALUES ('$tableNameHuman', '$tableName')");
648 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
649              
650             # for each root - this is for when we allow multiple roots
651             # right now though we only have one - the umlsRoot
652 0           $self->_initializeDepthFirstSearch($root, 0, $root);
653              
654             # add a check that the DFS has finished
655 0           $sdb->do("INSERT INTO $tableName (CUI, DEPTH, PATH) VALUES(\'CHECK\', '0', \'\')");
656 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
657              
658             # create index on the newly formed table
659 0           my $indexname = "$tableName" . "_CUIINDEX";
660 0           my $index = $sdb->do("create index $indexname on $tableName (CUI)");
661 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
662              
663 0           print "Index created.\n\n";
664             }
665              
666             # creates the index containing all of the path to root information
667             # for each concept in the sources and relations specified in the
668             # configuration file
669             # input :
670             # output:
671             sub _setIndex {
672              
673 0     0     my $self = shift;
674              
675 0           my $function = "_setIndex";
676 0           &_debug($function);
677            
678             # check self
679 0 0 0       if(!defined $self || !ref $self) {
680 0           $errorhandler->_error($pkg, $function, "", 2);
681             }
682              
683 0           my $tableName = $cuifinder->_getTableName();
684 0           my $tableFile = $cuifinder->_getTableFile();
685 0           my $tableNameHuman = $cuifinder->_getTableNameHuman();
686              
687             # if the path infomration has not been stored
688 0 0         if(! ($cuifinder->_checkTableExists($tableName))) {
689            
690             # otherwise create the tableFile and put the information in the
691             # file and the database
692 0           $self->_createIndex($tableFile, $tableName, $tableNameHuman);
693            
694             }
695              
696             # check Index
697 0           $self->_checkIndex($tableFile, $tableName, $tableNameHuman);
698              
699             # set the maximum depth
700 0           $self->_setMaximumDepth();
701             }
702              
703             # set the maximum depth variable
704             # input :
705             # output:
706             sub _setMaximumDepth {
707 0     0     my $self = shift;
708              
709 0           my $function = "_setMaximumDepth";
710            
711             # check self
712 0 0 0       if(!defined $self || !ref $self) {
713 0           $errorhandler->_error($pkg, $function, "", 2);
714             }
715              
716             # set the auxillary database that holds the path information
717 0           my $sdb = $self->{'sdb'};
718 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
719              
720             # get the table name
721 0           my $tableName = $cuifinder->_getTableName();
722              
723             # set the maximum depth
724 0           my $d = $sdb->selectcol_arrayref("select max(DEPTH) from $tableName");
725 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
726              
727 0           $max_depth = shift @{$d};
  0            
728             }
729              
730             # print out the function name to standard error
731             # input : $function <- string containing function name
732             # output:
733             sub _debug {
734 0     0     my $function = shift;
735 0 0         if($debug) { print STDERR "In UMLS::Interface::PathFinder::$function\n"; }
  0            
736             }
737              
738             # A Depth First Search (DFS) in order to determine
739             # the maximum depth of the taxonomy and obtain
740             # all of the path information
741             # input :
742             # output:
743             sub _initializeDepthFirstSearch {
744              
745 0     0     my $self = shift;
746 0           my $concept = shift;
747 0           my $d = shift;
748 0           my $root = shift;
749            
750 0           my $function = "_initializeDepthFirstSearch";
751 0           &_debug($function);
752              
753             # check self
754 0 0 0       if(!defined $self || !ref $self) {
755 0           $errorhandler->_error($pkg, $function, "", 2);
756             }
757            
758             # check the parameters are defined
759 0 0 0       if(!(defined $concept) || !(defined $d) || !(defined $root)) {
      0        
760 0           $errorhandler->_error($pkg, $function, "Error with input variables.", 4);
761             }
762              
763             # check valid concept
764 0 0         if(! ($errorhandler->_validCui($concept)) ) {
765 0           $errorhandler->_error($pkg, $function, "Incorrect input value ($concept).", 6);
766             }
767            
768 0           my $tableFile = $cuifinder->_getTableFile();
769              
770             # check if verbose mode
771 0 0         if($option_verbose) {
772 0 0         open(TABLEFILE, ">$tableFile") || die "Could not open $tableFile";
773             }
774            
775             # get the children
776 0           my $children = $cuifinder->_getChildren($concept);
777            
778 0           my $subsumers; my $leafs; my $s = 0; my $l = 0;
  0            
  0            
779             # foreach of the children continue down the taxonomy
780 0           foreach my $child (@{$children}) {
  0            
781 0           my @array = ();
782 0           push @array, $concept;
783 0           my $path = \@array;
784 0           ($subsumers, $leafs) = $self->_depthFirstSearch($child, $d,$path,*TABLEFILE);
785 0           $s += keys %{$subsumers}; $l += keys %{$leafs};
  0            
  0            
  0            
786             }
787            
788             # get the database
789 0           my $sdb = $self->{'sdb'};
790 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
791            
792             # get the table name of the intrinsic index and insert the leaves and subsumers
793 0           my $intrinsicTableName = $cuifinder->_getIntrinsicTableName();
794 0           my $arrRef = $sdb->do("INSERT INTO $intrinsicTableName (CUI, LEAVES, SUBSUMERS) VALUES(\'$concept\', '$l', \'$s\')");
795 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
796              
797             # close the table file if in verbose mode
798 0 0         if($option_verbose) {
799 0           close TABLEFILE;
800            
801             # set the table file permissions
802 0           my $temp = chmod 0777, $tableFile;
803             }
804             }
805              
806             # This is like a reverse DFS only it is not recursive
807             # due to the stack overflow errors I received when it was
808             # input :
809             # output:
810             sub _getPathsToRootInRealtime {
811              
812 0     0     my $self = shift;
813 0           my $concept = shift;
814              
815 0 0 0       return () if(!defined $self || !ref $self);
816              
817 0           my $function = "_getPathsToRootInRealtime($concept)";
818 0           &_debug($function);
819            
820             # check self
821 0 0 0       if(!defined $self || !ref $self) {
822 0           $errorhandler->_error($pkg, $function, "", 2);
823             }
824              
825             # check concept was obtained
826 0 0         if(!$concept) {
827 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
828             }
829            
830             # check if valid concept
831 0 0         if(! ($errorhandler->_validCui($concept)) ) {
832 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
833             }
834              
835             # set the storage
836 0           my @path_storage = ();
837              
838             # set the stack
839 0           my @stack = ();
840 0           push @stack, $concept;
841              
842             # set the count
843 0           my %visited = ();
844              
845             # set the paths
846 0           my @paths = ();
847 0           my @empty = ();
848 0           push @paths, \@empty;
849              
850             # now loop through the stack
851 0           while($#stack >= 0) {
852            
853 0           my $concept = $stack[$#stack];
854 0           my $path = $paths[$#paths];
855              
856             # set up the new path
857 0           my @intermediate = @{$path};
  0            
858 0           push @intermediate, $concept;
859 0           my $series = join " ", @intermediate;
860            
861             # check that the concept is not one of the forbidden concepts
862 0 0         if($cuifinder->_forbiddenConcept($concept)) {
863 0           pop @stack; pop @paths;
  0            
864 0           next;
865             }
866              
867             # check if concept has been visited already
868 0 0         if(exists $visited{$series}) {
869 0           pop @stack; pop @paths;
  0            
870 0           next;
871             }
872 0           else { $visited{$series}++; }
873            
874             # print information into the file if debugpath option is set
875 0 0         if($option_debugpath) {
876 0           my $d = $#intermediate+1;
877 0           print DEBUG_FILE "$concept\t$d\t@intermediate\n";
878             }
879            
880             # if the concept is the umls root - we are done
881 0 0         if($concept eq $root) {
882             # this is a complete path to the root so push it on the paths
883 0           my @reversed = reverse(@intermediate);
884 0           my $rseries = join " ", @reversed;
885 0           push @path_storage, $rseries;
886 0           next;
887             }
888            
889             # get all the parents
890 0           my $parents = $cuifinder->_getParents($concept);
891            
892             # if there are no children we are finished with this concept
893 0 0         if($#{$parents} < 0) {
  0            
894 0           pop @stack; pop @paths;
  0            
895 0           next;
896             }
897              
898             # search through the children
899 0           my $stackflag = 0;
900 0           foreach my $parent (@{$parents}) {
  0            
901            
902             # check if concept is already in the path
903 0 0         if($series=~/$parent/) { next; }
  0            
904 0 0         if($concept eq $parent) { next; }
  0            
905              
906             # if it isn't continue on with the depth first search
907 0           push @stack, $parent;
908 0           push @paths, \@intermediate;
909 0           $stackflag++;
910             }
911            
912             # check to make certain there were actually children
913 0 0         if($stackflag == 0) {
914 0           pop @stack; pop @paths;
  0            
915             }
916             }
917              
918 0           return \@path_storage;
919             }
920              
921             # Depth First Search (DFS) recursive function to collect the path
922             # information and store it in the umlsinterfaceindex database
923             # input : $concept <- string containing the cui
924             # $depth <- depth of the cui
925             # $array <- reference to an array containing the path
926             # output: $concept <- string containing the cui
927             # $depth <- depth of the cui
928             # $array <- reference to an array containing the path
929             sub _depthFirstSearch {
930              
931 0     0     my $self = shift;
932 0           my $concept = shift;
933 0           my $d = shift;
934 0           my $array = shift;
935 0           local(*F) = shift;
936            
937 0           my $function = "_depthFirstSearch";
938            
939             # check self
940 0 0 0       if(!defined $self || !ref $self) {
941 0           $errorhandler->_error($pkg, $function, "", 2);
942             }
943              
944             # check the parameters are defined
945 0 0 0       if(!(defined $concept) || !(defined $d)) {
946 0           $errorhandler->_error($pkg, $function, "Error with input variables.", 4);
947             }
948            
949             # check if valid concept
950 0 0         if(! ($errorhandler->_validCui($concept)) ) {
951 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
952             }
953            
954             # check that the concept is not a forbidden concept
955 0 0         if($cuifinder->_forbiddenConcept($concept)) { return; }
  0            
956            
957             # get the database
958 0           my $sdb = $self->{'sdb'};
959 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
960            
961             # get the table name of the index
962 0           my $tableName = $cuifinder->_getTableName();
963            
964             # increment the depth
965 0           $d++;
966              
967             # set up the new path
968 0           my @path = @{$array};
  0            
969 0           push @path, $concept;
970 0           my $series = join " ", @path;
971            
972             # load path information into the table
973             # check if only a specified set of cui information is required
974 0 0         if($option_cuilist) {
975            
976             # check if it is in the cuilist - and if so insert it the cui
977 0 0         if($cuifinder->_inCuiList($concept)) {
978 0           my $arrRef = $sdb->do("INSERT INTO $tableName (CUI, DEPTH, PATH) VALUES(\'$concept\', '$d', \'$series\')");
979 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
980             }
981             }
982             # otherwise we are loading all of it
983             else {
984 0           my $arrRef = $sdb->do("INSERT INTO $tableName (CUI, DEPTH, PATH) VALUES(\'$concept\', '$d', \'$series\')");
985 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
986             }
987            
988             # print information into the file if verbose option is set
989 0 0         if($option_verbose) {
990 0 0         if($option_cuilist) {
991 0 0         if($cuifinder->_inCuiList($concept)) {
992 0           print F "$concept\t$d\t$series\n";
993             }
994             }
995 0           else { print F "$concept\t$d\t$series\n"; }
996             }
997            
998             # get all the children
999 0           my $children = $cuifinder->_getChildren($concept);
1000              
1001 0           my %totalLeaves = (); my %totalSubsumers = ();
  0            
1002             # search through the children
1003 0           foreach my $child (@{$children}) {
  0            
1004            
1005             # check if child cui has already in the path
1006 0 0         if($series=~/$child/) { next; }
  0            
1007 0 0         if($child eq $concept) { next; }
  0            
1008            
1009             # if it isn't continue on with the depth first search
1010 0           my ($subsumers, $leafs) = $self->_depthFirstSearch($child, $d, \@path,*F);
1011            
1012 0 0         if(defined $leafs) {
1013 0 0         if(%{$leafs}) {
  0            
1014 0           %totalLeaves = (%totalLeaves, %{$leafs});
  0            
1015             }
1016             }
1017 0 0         if(defined $subsumers) {
1018 0 0         if(%{$subsumers}) {
  0            
1019 0           %totalSubsumers = (%totalSubsumers, %{$subsumers});
  0            
1020             }
1021             }
1022             }
1023 0           my $l = keys %totalLeaves; my $s = keys %totalSubsumers;
  0            
1024            
1025             # get the table name of the intrinsic index and insert the leaves and subsumers
1026 0           my $intrinsicTableName = $cuifinder->_getIntrinsicTableName();
1027            
1028             # check if CUI is already there
1029 0           my $ex = $sdb->selectcol_arrayref("select count(*) from $intrinsicTableName where CUI=\'$concept\'");
1030 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
1031            
1032             # return the minimum depth
1033 0           my $count = shift @{$ex};
  0            
1034 0 0         if($count == 0) {
1035 0           my $arrRef = $sdb->do("INSERT INTO $intrinsicTableName (CUI, LEAVES, SUBSUMERS) VALUES(\'$concept\', '$l', \'$s\')");
1036 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
1037             }
1038              
1039 0 0         if($#{$children} < 0) { $totalLeaves{$concept}++; } $totalSubsumers{$concept}++;
  0            
  0            
  0            
1040              
1041 0           return (\%totalSubsumers, \%totalLeaves);
1042             }
1043              
1044             # function returns the minimum depth of a concept
1045             # input : $concept <- string containing the cui
1046             # output: $int <- string containing the depth of the cui
1047             sub _findMinimumDepth {
1048              
1049 0     0     my $self = shift;
1050 0           my $cui = shift;
1051              
1052 0           my $function = "_findMinimumDepth";
1053 0           &_debug($function);
1054              
1055             # check self
1056 0 0 0       if(!defined $self || !ref $self) {
1057 0           $errorhandler->_error($pkg, $function, "", 2);
1058             }
1059              
1060             # check concept was obtained
1061 0 0         if(!$cui) {
1062 0           $errorhandler->_error($pkg, $function, "Error with input variable \$cui.", 4);
1063             }
1064            
1065             # check if valid concept
1066 0 0         if(! ($errorhandler->_validCui($cui)) ) {
1067 0           $errorhandler->_error($pkg, $function, "Concept ($cui) in not valid.", 6);
1068             }
1069            
1070             # get the database
1071 0           my $sdb = $self->{'sdb'};
1072 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
1073            
1074             # if it is in the parent taxonomy
1075 0 0         if($cuifinder->_inParentTaxonomy($cui)) { return 1; }
  0            
1076            
1077 0           my $min = 0;
1078 0 0         if($option_realtime) {
1079 0           $min = $self->_findMinimumDepthInRealTime($cui);
1080             }
1081             else {
1082            
1083             # set the depth
1084 0           $self->_setIndex();
1085              
1086             # get the table name
1087 0           my $tableName = $cuifinder->_getTableName();
1088              
1089             # get the minimum depth from the table
1090 0           my $d = $sdb->selectcol_arrayref("select min(DEPTH) from $tableName where CUI=\'$cui\'");
1091 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
1092            
1093             # return the minimum depth
1094 0           $min = shift @{$d}; $min++;
  0            
  0            
1095             }
1096            
1097 0           return $min;
1098             }
1099              
1100             # function returns maximum depth of a concept
1101             # input : $concept <- string containing the cui
1102             # output: $int <- string containing the depth of the cui
1103             sub _findMaximumDepth {
1104              
1105 0     0     my $self = shift;
1106 0           my $cui = shift;
1107              
1108 0           my $function = "_findMaximumDepth";
1109 0           &_debug($function);
1110            
1111             # check self
1112 0 0 0       if(!defined $self || !ref $self) {
1113 0           $errorhandler->_error($pkg, $function, "", 2);
1114             }
1115              
1116             # check concept was obtained
1117 0 0         if(!$cui) {
1118 0           $errorhandler->_error($pkg, $function, "Error with input variable \$cui.", 4);
1119             }
1120            
1121             # check if valid concept
1122 0 0         if(! ($errorhandler->_validCui($cui)) ) {
1123 0           $errorhandler->_error($pkg, $function, "Concept ($cui) in not valid.", 6);
1124             }
1125            
1126             # get the database
1127 0           my $sdb = $self->{'sdb'};
1128 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
1129            
1130             # initialize max
1131 0           my $max = 0;
1132             # if realtime option is set
1133 0 0         if($option_realtime) {
1134             # get the info table name
1135 0           my $infoTableName = $cuifinder->_getInfoTableName();
1136            
1137             # set the index DB handler
1138 0           my $sdb = $self->{'sdb'};
1139 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
1140              
1141             # get maximum depth from the info table
1142 0           my $arrRef = $sdb->selectcol_arrayref("select INFO from $infoTableName where ITEM=\'$cui\'");
1143 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
1144            
1145             # get the depth from the array
1146 0           my $depth = shift @{$arrRef};
  0            
1147              
1148 0 0         if(defined $depth) {
1149 0           $max = $depth;
1150             }
1151             else {
1152             # get the maximum depth
1153 0           $max = $self->_findMaximumDepthInRealTime($cui);
1154            
1155             # insert it in the info table - this is caching over multipe runs of
1156             # the program. I don't really like this solution but until I can
1157             # figure out how to speed up findMaximumDepthInRealTime then
1158             # this is going to have to do.
1159 0           $sdb->do("INSERT INTO $infoTableName (ITEM, INFO) VALUES ('$cui', '$max')");
1160 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
1161              
1162             }
1163             }
1164            
1165             # otherwise
1166             else {
1167             # set the depth
1168 0           $self->_setIndex();
1169            
1170             # get the table name
1171 0           my $tableName = $cuifinder->_getTableName();
1172            
1173             # get the depth from the table
1174 0           my $d = $sdb->selectcol_arrayref("select max(DEPTH) from $tableName where CUI=\'$cui\'");
1175 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
1176              
1177 0           $max = shift @{$d}; $max++;
  0            
  0            
1178             }
1179              
1180             # return the maximum depth
1181 0           return $max;
1182             }
1183              
1184             # find the shortest path between two concepts
1185             # input : $concept1 <- string containing the first cui
1186             # $concept2 <- string containing the second
1187             # output: $array <- reference to an array containing the shortest path(s)
1188             sub _findShortestPath {
1189              
1190 0     0     my $self = shift;
1191 0           my $concept1 = shift;
1192 0           my $concept2 = shift;
1193              
1194 0           my $function = "_findShortestPath";
1195 0           &_debug($function);
1196            
1197             # check self
1198 0 0 0       if(!defined $self || !ref $self) {
1199 0           $errorhandler->_error($pkg, $function, "", 2);
1200             }
1201              
1202             # check parameter exists
1203 0 0         if(!defined $concept1) {
1204 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
1205             }
1206 0 0         if(!defined $concept2) {
1207 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
1208             }
1209              
1210             # check if valid concept
1211 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
1212 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
1213             }
1214 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
1215 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
1216             }
1217              
1218             # if realtime option is set find the shortest path in realtime
1219 0 0         if($option_realtime) {
1220 0           return $self->_findShortestPathInRealTime($concept1, $concept2);
1221             }
1222             else {
1223 0           return $self->_findShortestPathThroughLCS($concept1, $concept2);
1224             }
1225             }
1226              
1227              
1228             # this function returns the shortest path between two concepts
1229             # input : $concept1 <- string containing the first cui
1230             # $concept2 <- string containing the second
1231             # output: $array <- reference to an array containing the lcs(es)
1232             sub _findShortestPathThroughLCS {
1233            
1234 0     0     my $self = shift;
1235 0           my $concept1 = shift;
1236 0           my $concept2 = shift;
1237            
1238 0           my $function = "_findShortestPathThroughLCS";
1239 0           &_debug($function);
1240            
1241             # check self
1242 0 0 0       if(!defined $self || !ref $self) {
1243 0           $errorhandler->_error($pkg, $function, "", 2);
1244             }
1245              
1246             # check parameter exists
1247 0 0         if(!defined $concept1) {
1248 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
1249             }
1250 0 0         if(!defined $concept2) {
1251 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
1252             }
1253              
1254             # check if valid concept
1255 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
1256 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
1257             }
1258 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
1259 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
1260             }
1261              
1262             # find the shortest path(s) and lcs - there may be more than one
1263 0           my $hash = $self->_shortestPath($concept1, $concept2);
1264            
1265             # remove the blanks from the paths
1266 0           my @paths = (); my $output = "";
  0            
1267 0           foreach my $path (sort keys %{$hash}) {
  0            
1268 0 0         if($path=~/C[0-9]+/) {
1269 0           push @paths, $path;
1270             }
1271             }
1272            
1273             # return the shortest paths (all of them)
1274 0           return \@paths;
1275             }
1276              
1277              
1278             # this function returns the least common subsummer between two concepts
1279             # input : $concept1 <- string containing the first cui
1280             # $concept2 <- string containing the second
1281             # output: $array <- reference to an array containing the lcs(es)
1282             sub _findLeastCommonSubsumer {
1283              
1284 0     0     my $self = shift;
1285 0           my $concept1 = shift;
1286 0           my $concept2 = shift;
1287            
1288 0           my $function = "_findLeastCommonSubsumer";
1289 0           &_debug($function);
1290              
1291             # check self
1292 0 0 0       if(!defined $self || !ref $self) {
1293 0           $errorhandler->_error($pkg, $function, "", 2);
1294             }
1295              
1296             # check parameter exists
1297 0 0         if(!defined $concept1) {
1298 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
1299             }
1300 0 0         if(!defined $concept2) {
1301 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
1302             }
1303              
1304             # check if valid concept
1305 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
1306 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
1307             }
1308 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
1309 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
1310             }
1311            
1312             # get the relations from the configuration file
1313 0           my $configrel = $cuifinder->_getRelString();
1314 0           $configrel=~/(REL) (\:\:) (include|exclude) (.*?)$/;
1315 0           my $relationstring = $4;
1316              
1317             # check to make certain the configuration file only contains
1318             # heirarchical relations (PAR/CHD or RB/RN).
1319 0           my @relations = split/\s*\,\s*/, $relationstring;
1320 0           foreach my $rel (@relations) {
1321 0 0         if(! ($rel=~/(PAR|CHD|RB|RN)/) ) {
1322 0           $errorhandler->_error($pkg, $function, "Method only supports heirarhical relations (PAR/CHD or RB/RN).", 10);
1323             }
1324             }
1325              
1326             # get the LCSes
1327 0 0         if($option_realtime) {
1328 0           return $self->_findLeastCommonSubsumerInRealTime($concept1, $concept2);
1329             }
1330             else {
1331              
1332             # initialize the array that will contain the lcses
1333 0           my @lcses = ();
1334            
1335             # get the lcs using the index
1336 0           my $hash = $self->_shortestPath($concept1, $concept2);
1337 0 0         if($debug) { print STDERR "done with _shortestPath\n"; }
  0            
1338 0           my %lcshash = ();
1339 0 0         if(defined $hash) {
1340 0           foreach my $path (sort keys %{$hash}) {
  0            
1341 0           my $c = ${$hash}{$path};
  0            
1342 0 0         if($c=~/C[0-9]+/) { $lcshash{$c}++; }
  0            
1343             }
1344             }
1345 0           foreach my $lcs (sort keys %lcshash) { push @lcses, $lcs; }
  0            
1346            
1347             # return the lcses
1348 0           return \@lcses;
1349             }
1350             }
1351              
1352             # this function returns the least common subsummer between two concepts
1353             # input : $concept1 <- string containing the first cui
1354             # $concept2 <- string containing the second
1355             # output: $array <- reference to an array containing the lcs(es)
1356             sub _findLeastCommonSubsumerInRealTime {
1357              
1358 0     0     my $self = shift;
1359 0           my $concept1 = shift;
1360 0           my $concept2 = shift;
1361            
1362 0           my $function = "_findLeastCommonSubsumerInRealTime";
1363 0           &_debug($function);
1364              
1365             # check self
1366 0 0 0       if(!defined $self || !ref $self) {
1367 0           $errorhandler->_error($pkg, $function, "", 2);
1368             }
1369              
1370             # check parameter exists
1371 0 0         if(!defined $concept1) {
1372 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
1373             }
1374 0 0         if(!defined $concept2) {
1375 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
1376             }
1377              
1378             # check if valid concept
1379 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
1380 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
1381             }
1382 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
1383 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
1384             }
1385            
1386             # get the shorest paths
1387 0           my $paths = $self->_findShortestPathInRealTime($concept1, $concept2);
1388            
1389             # get the child relations
1390 0           my $childstring = $cuifinder->_getChildRelations();
1391              
1392             # initialize the lcses array
1393 0           my %lcses = ();
1394            
1395             # check for the lcs in each of the paths
1396 0           foreach my $p (@{$paths}) {
  0            
1397             # get the path and the first concept
1398 0           my @path = split/\s+/, $p;
1399 0           my $concept1 = shift @path;
1400 0           my $flag = 0;
1401 0           my $counter = 0;
1402 0           my $children = 0;
1403 0           my $parent = 0;
1404 0           my @lcsarray = ();
1405            
1406 0           my $firstconcept = $concept1;
1407              
1408             # loop through the rest of the concepts looking for the first child relation
1409 0           foreach my $concept2 (@path) {
1410 0           my $relations = $cuifinder->_getRelationsBetweenCuis($concept1, $concept2);
1411 0           foreach my $item (@{$relations}) {
  0            
1412 0           $item=~/([A-Z]+) \([A-Z0-9\.]+\)/;
1413 0           my $rel = $1;
1414              
1415             # if the relation is a child we have the LCS - it is concept1
1416             # this is for the typical case
1417 0 0 0       if($childstring=~/($rel)/ && $flag == 0) {
1418 0           push @lcsarray, $concept1; $flag++;
  0            
1419             }
1420            
1421 0 0         if($childstring=~/($rel)/) { $children++; }
  0            
1422 0           else { $parent++; }
1423 0           $counter++;
1424             }
1425 0           $concept1 = $concept2;
1426             }
1427            
1428             # string of children
1429 0 0         if($counter == $children) { $lcses{$firstconcept}++; }
  0 0          
1430             # string of parents
1431 0           elsif($counter == $parent) { $lcses{$concept1}++; }
1432             # typical case
1433 0           else { foreach my $l (@lcsarray) { $lcses{$l}++; } }
  0            
1434             }
1435            
1436             # get the unique lcses - note a single lcs may have more than one path
1437 0           my @unique = ();
1438 0           foreach my $lcs (sort keys %lcses) { push @unique, $lcs; }
  0            
1439              
1440             # return the unique lcses
1441 0           return \@unique;
1442             }
1443              
1444             # method to get the Least Common Subsumer of two
1445             # paths to the root of a taxonomy
1446             # input : $array1 <- reference to an array containing
1447             # the paths to the root for cui1
1448             # $array2 <- same thing for cui2
1449             # output: $hash <- reference to a hash containing the
1450             # lcs as the key and the path as the hash
1451             sub _getLCSfromTrees {
1452              
1453 0     0     my $self = shift;
1454 0           my $arrayref1 = shift;
1455 0           my $arrayref2 = shift;
1456            
1457 0           my $function = "_getLCSfromTrees";
1458              
1459             # check self
1460 0 0 0       if(!defined $self || !ref $self) {
1461 0           $errorhandler->_error($pkg, $function, "", 2);
1462             }
1463              
1464             # check parameter exists
1465 0 0         if(!defined $arrayref1) {
1466 0           $errorhandler->_error($pkg, $function, "Error with input variable \$arrayref1.", 4);
1467             }
1468 0 0         if(!defined $arrayref2) {
1469 0           $errorhandler->_error($pkg, $function, "Error with input variable \$arrayref2.", 4);
1470             }
1471              
1472             # get the arrays
1473 0           my @array1 = split/\s+/, $arrayref1;
1474 0           my @array2 = split/\s+/, $arrayref2;
1475              
1476             # reverse them
1477 0           my @tree1 = reverse @array1;
1478 0           my @tree2 = reverse @array2;
1479 0           my $tmpString = " ".join(" ", @tree2)." ";
1480              
1481             # find the lcs
1482 0           foreach my $element (@tree1) {
1483 0 0         if($tmpString =~ / $element /) {
1484 0           return $element;
1485             }
1486             }
1487            
1488 0           return undef;
1489             }
1490              
1491             # method to find the shortest path between two concepts in realtime
1492             # input : $concept1 <- first concept
1493             # $concept2 <- second concept
1494             # output: $array <- reference to an array containing the shortest paths
1495             sub _findShortestPathInRealTime {
1496            
1497 0     0     my $self = shift;
1498 0           my $concept1 = shift;
1499 0           my $concept2 = shift;
1500            
1501 0           my $function = "_findShortestPathInRealTime";
1502 0           &_debug($function);
1503            
1504             # check self
1505 0 0 0       if(!defined $self || !ref $self) {
1506 0           $errorhandler->_error($pkg, $function, "", 2);
1507             }
1508              
1509             # check parameter exists
1510 0 0         if(!defined $concept1) {
1511 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
1512             }
1513 0 0         if(!defined $concept2) {
1514 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
1515             }
1516              
1517             # check if valid concept
1518 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
1519 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
1520             }
1521 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
1522 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
1523             }
1524              
1525             # get the length of the shortest path
1526 0           my $length = $self->_findShortestPathLengthInRealTime($concept1, $concept2);
1527            
1528             # initialize the paths array that will be returned
1529 0           my @paths = ();
1530            
1531             # if the length is two then the cuis are related in some way
1532             # so just return them
1533 0 0         if($length == 2) {
1534 0           push @paths, "$concept1 $concept2";
1535             }
1536             else {
1537              
1538             # set split to get the beginning paths
1539 0           my $split1 = int($length/2);
1540              
1541             # we need the cui itself so, if the split is zero setting the
1542             # split to one will just return the cuis
1543 0 0         if($split1 == 0) { $split1 = 1; }
  0            
1544            
1545             # set split to get the last set of paths
1546 0           my $split2 = $length - $split1 -1;
1547            
1548             # initial the hash to hold the ends
1549 0           my %ends = ();
1550            
1551             # get all the paths from concept1 of length split1
1552 0           my @paths1 = $self->_findPathsToCenter($concept1, $split1, 1, \%ends );
1553            
1554 0           my $endkey = keys %ends;
1555              
1556             # get all the paths from concept2 of length split2
1557 0           my @paths2 = $self->_findPathsToCenter($concept2, $split2, 2, \%ends );
1558              
1559             # join the two sets of paths to find all of the full paths
1560 0           @paths = $self->_joinPathsToCenter(\@paths1, \@paths2);
1561             }
1562              
1563            
1564 0           return \@paths;
1565             }
1566              
1567             # method that takes two partial paths nad joins them
1568             # input : $array1 <- reference to paths for first concept
1569             # $arrat2 <- reference to paths for second concept
1570             # output: @paths <- array containing the combined paths
1571             sub _joinPathsToCenter {
1572 0     0     my $self = shift;
1573 0           my $paths1 = shift;
1574 0           my $paths2 = shift;
1575              
1576 0           my $function = "_joinPathsToCenter";
1577 0           &_debug($function);
1578            
1579             # check self
1580 0 0 0       if(!defined $self || !ref $self) {
1581 0           $errorhandler->_error($pkg, $function, "", 2);
1582             }
1583              
1584             # check parameter exists
1585 0 0         if(!defined $paths1) {
1586 0           $errorhandler->_error($pkg, $function, "Error with input variable \$paths1.", 4);
1587             }
1588 0 0         if(!defined $paths2) {
1589 0           $errorhandler->_error($pkg, $function, "Error with input variable \$paths2.", 4);
1590             }
1591              
1592 0           my $childstring = $cuifinder->_getChildRelations();
1593 0           my $parentstring = $cuifinder->_getParentRelations();
1594            
1595 0           my @shortestpaths = ();
1596 0           foreach my $p1 (@{$paths1}) {
  0            
1597            
1598             # get the path to the center, the center and the number
1599             # of direction changes that existed in the path
1600 0           my @array1 = split/\s+/, $p1;
1601 0           my $dchange1 = pop @array1;
1602 0           my $c1 = pop @array1;
1603            
1604 0           foreach my $p2 (@{$paths2}) {
  0            
1605              
1606             # now get the paths to the center coming from the other direction,
1607             # its direction changes and the center
1608 0           my @array2 = split/\s+/, $p2;
1609 0           my $dchange2 = pop @array2;
1610 0           my $c2 = $array2[$#array2];
1611            
1612             # if the two centers are equal we have path
1613 0 0         if($c1 eq $c2) {
1614            
1615             # if undirected make certain that their is at
1616             # most one direction change
1617 0 0         if(!($option_undirected)) {
1618              
1619             # check on basic direction changes
1620 0           my $totalchanges = $dchange1 + $dchange2;
1621 0 0         if($totalchanges > 1) { next; }
  0            
1622 0 0 0       if($dchange1 > 0 && $dchange2 > 0) { next; }
  0            
1623              
1624             # set the path
1625 0           my @rarray2 = reverse @array2;
1626 0           my @path = (@array1, @rarray2);
1627            
1628             # check for complicated embedded direction changes
1629 0           my $direction = 0; my $previous = ""; my $cflag = 0;
  0            
  0            
1630 0           for my $i (0..($#path-1)) {
1631 0           my $cc1 = $path[$i];
1632 0           my $cc2 = $path[$i+1];
1633              
1634             # get the relationships the concepts
1635 0           my $ccr = $cuifinder->_getRelationsBetweenCuis($cc1, $cc2);
1636              
1637             # determine whether that relation is a
1638             # parent or a child relation
1639 0           my $pr = 0; my $cr = 0;
  0            
1640 0           foreach my $item (@{$ccr}) {
  0            
1641 0           $item=~/([A-Z]+) \([A-Za-z0-9\.]+\)/;
1642 0           my $rel = $1;
1643 0 0         if($childstring=~/($rel)/) { $cr++; }
  0            
1644 0 0         if($parentstring=~/($rel)/) { $pr++; }
  0            
1645              
1646              
1647             # determine that the first relationship in the
1648             # in the path is not a child relation
1649 0 0         if($i == 0) {
1650 0 0         if($childstring=~/($rel)/) { $cflag = 1; }
  0            
1651             }
1652             }
1653            
1654             # sometimes there are two directions
1655 0 0 0       if($cr > 0 && $pr > 0) {
1656             # if this is the case we are just going to move on
1657             # and not worry about right now. There isn't a
1658             # loop exactly
1659             }
1660             else {
1661             # determine if there has been a direction change
1662 0 0         if($previous ne "") {
1663 0 0 0       if( ($previous eq "CHD") && ($pr > 0)) { $direction++; }
  0            
1664 0 0 0       if( ($previous eq "PAR") && ($cr > 0)) { $direction++; }
  0            
1665             }
1666              
1667             # set the previous relation
1668 0 0         if($pr > 0){ $previous = "PAR"; }
  0            
1669 0 0         if($cr > 0){ $previous = "CHD"; }
  0            
1670             }
1671             }
1672              
1673             # if the path starts out with a child relation and then
1674             # moves to a parent we don't want it
1675 0 0 0       if($cflag == 1 && $direction >= 1) { next; }
  0            
1676              
1677             # if there is more than a single direction change
1678             # we don't want the path
1679 0 0         if($direction > 1) { next; }
  0            
1680              
1681             # add the path to the list of shortest paths
1682 0           my $string = join " ", @path;
1683 0           push @shortestpaths, $string;
1684             }
1685             else {
1686              
1687             # we have one or less changes if the undirectoption
1688             # was not set so we can add the path to the shortest
1689             # path array
1690 0           my @rarray2 = reverse @array2;
1691 0           my @path = (@array1, @rarray2);
1692 0           my $string = join " ", @path;
1693              
1694 0           push @shortestpaths, $string;
1695             }
1696             }
1697             }
1698             }
1699            
1700 0           return @shortestpaths;
1701             }
1702            
1703              
1704             # method that finds all the paths from a concept of a specified length
1705             # input : $start <- the concept
1706             # $length <- the length of the path
1707             # output: @paths <- array containing the paths
1708             sub _findPathsToCenter {
1709              
1710 0     0     my $self = shift;
1711 0           my $start = shift;
1712 0           my $length = shift;
1713 0           my $flag = shift;
1714 0           my $ends = shift;
1715            
1716 0           my $function = "_findPathsToCenter";
1717 0           &_debug($function);
1718            
1719             # check self
1720 0 0 0       if(!defined $self || !ref $self) {
1721 0           $errorhandler->_error($pkg, $function, "", 2);
1722             }
1723            
1724             # check parameter exists
1725 0 0         if(!defined $start) {
1726 0           $errorhandler->_error($pkg, $function, "Error with input variable \$start.", 4);
1727             }
1728            
1729             # check if valid concept
1730 0 0         if(! ($errorhandler->_validCui($start)) ) {
1731 0           $errorhandler->_error($pkg, $function, "Concept ($start) in not valid.", 6);
1732             }
1733            
1734             # set the storage
1735 0           my @path_storage= ();
1736              
1737             # set the count
1738 0           my %visited = ();
1739            
1740             # set the stack with the parents because
1741             # we want to start going up inorder to
1742             # have an LCS
1743 0           my @directions = ();
1744 0           my @relations = ();
1745 0           my @paths = ();
1746            
1747 0           my $parentstack = $cuifinder->_getParents($start);
1748 0           foreach my $element (@{$parentstack}) {
  0            
1749 0           my @array = ();
1750 0           push @paths, \@array;
1751 0           push @directions, 0;
1752 0           push @relations, "PAR";
1753             }
1754            
1755 0           my $childrenstack = $cuifinder->_getChildren($start);
1756 0           my @stack = (@{$parentstack}, @{$childrenstack});
  0            
  0            
1757 0           foreach my $element (@{$childrenstack}) {
  0            
1758 0           my @array = ();
1759 0           push @paths, \@array;
1760 0           push @directions, 0;
1761 0           push @relations, "CHD";
1762             }
1763            
1764             # now loop through the stack
1765 0           while($#stack >= 0) {
1766            
1767 0           my $concept = pop @stack;
1768 0           my $path = pop @paths;
1769 0           my $direction = pop @directions;
1770 0           my $relation = pop @relations;
1771            
1772              
1773             # set up the new path
1774 0           my @intermediate = @{$path};
  0            
1775 0           my $series = join " ", @intermediate;
1776 0           push @intermediate, $concept;
1777 0           my $distance = $#intermediate + 1;
1778              
1779             # check if the distance is greater than what we
1780             # already have - if so we are done
1781 0 0         if($distance > $length) {
1782 0           @stack = ();
1783 0           next;
1784             }
1785              
1786             # check that the concept is not one of the forbidden concepts
1787 0 0         if($cuifinder->_forbiddenConcept($concept)) { next; }
  0            
1788              
1789             # check if concept has been visited already through that path
1790 0           my $v = "$concept : $series";
1791 0 0         if(exists $visited{$v}) { next; }
  0            
1792 0           else { $visited{$v}++; }
1793              
1794             # check if we have a path of approrpiate length
1795             # if so add it to the storage
1796 0 0         if($distance == $length) {
1797 0           my $element = $intermediate[$#intermediate];
1798            
1799 0           push @intermediate, $direction;
1800              
1801 0 0         if($flag == 1) {
    0          
1802 0           ${$ends}{$element}++;
  0            
1803 0           push @path_storage, \@intermediate;
1804             }
1805             elsif($flag == 2) {
1806 0 0         if(exists ${$ends}{$element}) {
  0            
1807 0           push @path_storage, \@intermediate;
1808             }
1809            
1810             }
1811 0           next;
1812             }
1813            
1814             # print information into the file if debugpath option is set
1815 0 0         if($option_debugpath) {
1816 0           my $d = $#intermediate+1;
1817 0           print DEBUG_FILE "$concept\t$d\t@intermediate\n";
1818             }
1819            
1820              
1821             # we are going to start with the parents here; the code
1822             # for both is similar except for the relation/direction
1823             # which is why I have the seperate right now - currently
1824            
1825             # if the previous direction was a child we have a change in direction
1826 0           my $dchange = $direction;
1827            
1828             # if the undirected option is set the dchange doesn't matter
1829             # otherwise we need to check
1830 0 0         if(!$option_undirected) {
1831 0 0         if($relation eq "CHD") { $dchange = $direction + 1; }
  0            
1832             }
1833              
1834             # if we have not had more than a single direction change
1835 0 0         if($dchange < 2) {
1836             # search through the parents
1837 0           my $parents = $cuifinder->_getParents($concept);
1838 0           foreach my $parent (@{$parents}) {
  0            
1839            
1840             # check if concept is already in the path
1841 0 0         if($series=~/$parent/) { next; }
  0            
1842 0 0         if($parent eq $concept) { next; }
  0            
1843            
1844             # if it isn't add it to the stack
1845 0           unshift @stack, $parent;
1846 0           unshift @paths, \@intermediate;
1847 0           unshift @relations, "PAR";
1848 0           unshift @directions, $dchange;
1849             }
1850             }
1851            
1852             # now with the chilcren if the previous direction was a parent we have
1853             # have to change the direction
1854 0           $dchange = $direction;
1855             # if the undirected option is set the dchange doesn't matter
1856             # otherwise we need to check
1857 0 0         if(!$option_undirected) {
1858 0 0         if($relation eq "PAR") { $dchange = $direction + 1; }
  0            
1859             }
1860              
1861             # if we have not had more than a single direction change
1862 0 0         if($dchange < 2) {
1863             # now search through the children
1864 0           my $children = $cuifinder->_getChildren($concept);
1865 0           foreach my $child (@{$children}) {
  0            
1866            
1867             # check if child cui has already in the path
1868 0 0         if($series=~/$child/) { next; }
  0            
1869 0 0         if($child eq $concept) { next; }
  0            
1870              
1871             # if it isn't add it to the stack
1872 0           unshift @stack, $child;
1873 0           unshift @paths, \@intermediate;
1874 0           unshift @relations, "CHD";
1875 0           unshift @directions, $dchange;
1876             }
1877             }
1878             }
1879             # set the return
1880 0           my @return_paths = ();
1881 0           foreach my $p (@path_storage) {
1882 0           unshift @{$p}, $start;
  0            
1883 0           my $string = join " " , @{$p};
  0            
1884 0           push @return_paths, $string;
1885             }
1886              
1887 0           return @return_paths;
1888             }
1889            
1890              
1891             # method that finds the minimum depth
1892             # input : $concept <- the first concept
1893             # output: $int <- the minimum depth
1894             sub _findMinimumDepthInRealTime {
1895              
1896 0     0     my $self = shift;
1897 0           my $concept = shift;
1898            
1899 0           my $function = "_findMinimumDepthInRealTime";
1900 0           &_debug($function);
1901            
1902             # check self
1903 0 0 0       if(!defined $self || !ref $self) {
1904 0           $errorhandler->_error($pkg, $function, "", 2);
1905             }
1906              
1907             # check parameter exists
1908 0 0         if(!defined $concept) {
1909 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
1910             }
1911             # check if valid concept
1912 0 0         if(! ($errorhandler->_validCui($concept)) ) {
1913 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
1914             }
1915            
1916             # set the count
1917 0           my %visited = ();
1918            
1919             # set the stack with the roots children
1920 0           my @paths = ();
1921 0           my $rstack = $cuifinder->_getChildren($root);
1922 0           my @stack = @{$rstack};
  0            
1923              
1924 0           foreach my $element (@stack) {
1925 0           my @array = ();
1926 0           push @paths, \@array;
1927             }
1928            
1929             # now loop through the stack
1930 0           while($#stack >= 0) {
1931            
1932 0           my $cui = pop @stack;
1933 0           my $path = pop @paths;
1934            
1935             # set up the new path
1936 0           my @intermediate = @{$path};
  0            
1937 0           my $series = join " ", @intermediate;
1938 0           push @intermediate, $cui;
1939 0           my $distance = $#intermediate;
1940              
1941             # check that the concept is not one of the forbidden concepts
1942 0 0         if($cuifinder->_forbiddenConcept($cui)) { next; }
  0            
1943              
1944             # check if concept has been visited already through that path
1945 0 0         if(exists $visited{$cui}) { next; }
  0            
1946 0           else { $visited{$cui}++; }
1947            
1948             # check if it is our concept2
1949 0 0         if($cui eq $concept) {
1950 0           my $path_length = $distance + 2;
1951 0           return $path_length;
1952             }
1953            
1954             # now search through the children
1955 0           my $children = $cuifinder->_getChildren($cui);
1956 0           foreach my $child (@{$children}) {
  0            
1957             # check if child cui has already in the path
1958 0 0         if($series=~/$child/) { next; }
  0            
1959 0 0         if($child eq $cui) { next; }
  0            
1960              
1961             # if it isn't add it to the stack
1962 0           unshift @stack, $child;
1963 0           unshift @paths, \@intermediate;
1964             }
1965             }
1966             # no path was found return -1
1967 0           return -1;
1968             }
1969              
1970              
1971             # method that finds the maximum depth
1972             # input : $concept <- the first concept
1973             # output: $int <- the minimum depth
1974             sub _findMaximumDepthInRealTime {
1975              
1976 0     0     my $self = shift;
1977 0           my $concept = shift;
1978              
1979 0 0 0       return () if(!defined $self || !ref $self);
1980              
1981 0           my $function = "_findMaximumDepthInRealtime($concept)";
1982 0           &_debug($function);
1983            
1984             # set the storage
1985 0           my $maximum_path_length = -1;
1986              
1987             # set the stack
1988 0           my @stack = ();
1989 0           push @stack, $concept;
1990              
1991             # set the count
1992 0           my %visited = ();
1993              
1994             # set the paths
1995 0           my @paths = ();
1996 0           my @empty = ();
1997 0           push @paths, \@empty;
1998              
1999             # now loop through the stack
2000 0           while($#stack >= 0) {
2001            
2002 0           my $cui = $stack[$#stack];
2003 0           my $path = $paths[$#paths];
2004              
2005             # set up the new path
2006 0           my @intermediate = @{$path};
  0            
2007 0           my $series = join " ", @intermediate;
2008 0           push @intermediate, $cui;
2009            
2010             # print information into the file if debugpath option is set
2011 0 0         if($option_debugpath) {
2012 0           my $d = $#intermediate+1;
2013 0           print DEBUG_FILE "$cui\t$d\t@intermediate\n";
2014             }
2015            
2016             # check that the cui is not one of the forbidden concepts
2017 0 0         if($cuifinder->_forbiddenConcept($cui)) {
2018 0           pop @stack; pop @paths;
  0            
2019 0           next;
2020             }
2021              
2022             # check if concept has been visited already
2023 0 0         if(exists $visited{$cui}{$series}) {
2024 0           pop @stack; pop @paths;
  0            
2025 0           next;
2026             }
2027 0           else { $visited{$cui}{$series}++; }
2028            
2029             # if the concept is the umls root - we are done
2030 0 0         if($cui eq $root) {
2031 0           my $length = $#intermediate + 1;
2032 0 0         if($length > $maximum_path_length) {
2033 0           $maximum_path_length = $length;
2034             }
2035 0           next;
2036             }
2037            
2038             # get all the parents
2039 0           my $parents = $cuifinder->_getParents($cui);
2040              
2041             # if there are no children we are finished with this concept
2042 0 0         if($#{$parents} < 0) {
  0            
2043 0           pop @stack; pop @paths;
  0            
2044 0           next;
2045             }
2046              
2047             # search through the children
2048 0           my $stackflag = 0;
2049 0           foreach my $parent (@{$parents}) {
  0            
2050            
2051             # check if concept has already in the path
2052 0 0         if($series=~/$parent/) { next; }
  0            
2053 0 0         if($cui eq $parent) { next; }
  0            
2054              
2055             # if it isn't continue on with the depth first search
2056 0           push @stack, $parent;
2057 0           push @paths, \@intermediate;
2058 0           $stackflag++;
2059             }
2060            
2061             # check to make certain there were actually children
2062 0 0         if($stackflag == 0) { pop @stack; pop @paths; }
  0            
  0            
2063             }
2064              
2065 0           return $maximum_path_length;
2066             }
2067              
2068             # method that finds all the ancestors of a given concept
2069             # input : $concept1 <- the concept
2070             # output: %ancestors <- hash containing the ancestors
2071             sub _findAncestors {
2072 0     0     my $self = shift;
2073 0           my $concept = shift;
2074            
2075 0           my $function = "_findAncestors";
2076 0           &_debug($function);
2077            
2078             # check self
2079 0 0 0       if(!defined $self || !ref $self) {
2080 0           $errorhandler->_error($pkg, $function, "", 2);
2081             }
2082 0           my @paths = $self->_pathsToRoot($concept);
2083            
2084 0           my %ancestors = ();
2085 0           foreach my $path (@paths) {
2086 0           foreach my $string (@{$path}) {
  0            
2087 0           my @cuis = split/\s+/, $string;
2088 0           foreach my $cui (@cuis) {
2089 0           $ancestors{$cui} = 1;
2090             }
2091             }
2092             }
2093            
2094 0           return \%ancestors;
2095              
2096             }
2097             # method that finds the closeness centrality of a concept
2098             # input : $concept1 <- the concept
2099             # output: $double <- the closeness
2100             sub _findClosenessCentrality {
2101 0     0     my $self = shift;
2102 0           my $concept = shift;
2103            
2104 0           my $function = "_findClosenessCentrality";
2105 0           &_debug($function);
2106            
2107             # check self
2108 0 0 0       if(!defined $self || !ref $self) {
2109 0           $errorhandler->_error($pkg, $function, "", 2);
2110             }
2111              
2112             # get the cuis associated with the config file
2113 0           my $hashref= $cuifinder->_getCuiList();
2114              
2115             # calculate the length of the shortest path for each cui
2116 0           my $sum = 0;
2117 0           foreach my $cui (sort keys %{$hashref}) {
  0            
2118 0 0         if($cui eq $concept) { next; }
  0            
2119 0           my $d = $self->_findShortestPathLength($concept, $cui);
2120 0 0         if($d > 0) {
2121 0           $sum += $d;
2122             }
2123             }
2124            
2125             # return closeness
2126 0           return (1/$sum);
2127             }
2128              
2129             # method that finds the length of the shortest path
2130             # input : $concept1 <- the first concept
2131             # $concept2 <- the second concept
2132             # output: $int <- the length of the shortest path between them
2133             sub _findShortestPathLength {
2134              
2135 0     0     my $self = shift;
2136 0           my $concept1 = shift;
2137 0           my $concept2 = shift;
2138            
2139 0           my $function = "_findShortestPathLength";
2140 0           &_debug($function);
2141            
2142             # check self
2143 0 0 0       if(!defined $self || !ref $self) {
2144 0           $errorhandler->_error($pkg, $function, "", 2);
2145             }
2146              
2147 0 0         if($option_realtime) {
2148              
2149             #my $length = $self->_findShortestPathLengthInCache($concept1, $concept2);
2150             #if(defined $length) { return $length; }
2151             #else {
2152 0           my $length = $self->_findShortestPathLengthInRealTime($concept1, $concept2);
2153             #if(!$option_undirected) {
2154             #$self->_storeShortestPathLengthInCache($concept1, $concept2, $length);
2155             #}
2156 0           return $length;
2157             #}
2158             }
2159             else {
2160 0           my $paths = $self->_findShortestPathThroughLCS($concept1, $concept2);
2161 0           my $path = shift @{$paths};
  0            
2162 0 0         if(defined $path) {
2163 0           my @cuis = split/\s+/, $path;
2164 0           my $length = $#cuis + 1;
2165 0           return $length;
2166             }
2167 0           else { return -1; }
2168             }
2169             }
2170              
2171              
2172             sub _storeShortestPathLengthInCache
2173             {
2174 0     0     my $self = shift;
2175 0           my $concept1 = shift;
2176 0           my $concept2 = shift;
2177 0           my $length = shift;
2178              
2179 0           my $function = "_storeShortestPathLengthInCache";
2180 0           &_debug($function);
2181            
2182             # check self
2183 0 0 0       if(!defined $self || !ref $self) {
2184 0           $errorhandler->_error($pkg, $function, "", 2);
2185             }
2186              
2187             # get the info table name
2188 0           my $cacheTableName = $cuifinder->_getCacheTableName();
2189            
2190             # set the index DB handler
2191 0           my $sdb = $self->{'sdb'};
2192 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
2193              
2194             # store the length in the cache table
2195 0           my $arrRef = $sdb->do("INSERT INTO $cacheTableName (CUI1, CUI2, LENGTH) VALUES ('$concept1', '$concept2', '$length')");
2196 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
2197             }
2198              
2199             sub _findShortestPathLengthInCache
2200             {
2201 0     0     my $self = shift;
2202 0           my $concept1 = shift;
2203 0           my $concept2 = shift;
2204            
2205 0           my $function = "_findShortestPathLengthInCache";
2206 0           &_debug($function);
2207            
2208             # check self
2209 0 0 0       if(!defined $self || !ref $self) {
2210 0           $errorhandler->_error($pkg, $function, "", 2);
2211             }
2212             # check parameter exists
2213 0 0         if(!defined $concept1) {
2214 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
2215             }
2216 0 0         if(!defined $concept2) {
2217 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
2218             }
2219              
2220             # check if valid concept
2221 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
2222 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
2223             }
2224 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
2225 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
2226             }
2227              
2228             # get the info table name
2229 0           my $cacheTableName = $cuifinder->_getCacheTableName();
2230            
2231             # set the index DB handler
2232 0           my $sdb = $self->{'sdb'};
2233 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
2234              
2235             # get length from the cache table if it exists
2236 0           my $arrRef = $sdb->selectcol_arrayref("select LENGTH from $cacheTableName where CUI1=\'$concept1\' and CUI2=\'$concept2\'");
2237 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
2238              
2239             # get the depth from the array
2240 0           my $length = shift @{$arrRef};
  0            
2241            
2242 0           return $length;
2243             }
2244              
2245             # method that finds the length of the shortest path
2246             # input : $concept1 <- the first concept
2247             # $concept2 <- the second concept
2248             # output: $length <- the length of the shortest path between them
2249             sub _findShortestPathLengthInRealTime {
2250              
2251 0     0     my $self = shift;
2252 0           my $concept1 = shift;
2253 0           my $concept2 = shift;
2254            
2255 0           my $function = "_findShortestPathLengthInRealTime";
2256 0           &_debug($function);
2257            
2258             # check self
2259 0 0 0       if(!defined $self || !ref $self) {
2260 0           $errorhandler->_error($pkg, $function, "", 2);
2261             }
2262              
2263             # check parameter exists
2264 0 0         if(!defined $concept1) {
2265 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
2266             }
2267 0 0         if(!defined $concept2) {
2268 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
2269             }
2270              
2271             # check if valid concept
2272 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
2273 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
2274             }
2275 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
2276 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
2277             }
2278            
2279             # we need to check this in both directions because the BFS
2280             # the direction matters and with the undirected option
2281             # we always want to go up and the problem arrises in the
2282             # cases in which we continue down in a straight line such
2283             # that CUI1 is the LCS. Maybe there is a better way to
2284             # do this but I am not certain quite yet
2285             #my $l1 = $self->_findShortestPathLengthInRealTimeBFS($concept1, $concept2, -1);
2286              
2287             # now swap
2288             #my $l2 = $self->_findShortestPathLengthInRealTimeBFS($concept2, $concept1, $l1);
2289            
2290             # return the other if it is -1
2291             #if($l1 < 0) { return $l2; }
2292             #if($l2 < 0) { return $l1; }
2293            
2294             # return the lowest
2295             #return $l1 < $l2 ? $l1 : $l2;
2296              
2297 0           my $length = $self->_findShortestPathLengthInRealTimeBFS2($concept1, $concept2);
2298              
2299 0           return $length;
2300             }
2301            
2302              
2303             # method that finds the length of the shortest path
2304             # input : $concept1 <- the first concept
2305             # $concept2 <- the second concept
2306             # output: $length <- the length of the shortest path between them
2307             sub _findShortestPathLengthInRealTimeBFS2 {
2308              
2309 0     0     my $self = shift;
2310 0           my $concept1 = shift;
2311 0           my $concept2 = shift;
2312            
2313 0           my $function = "_findShortestPathLengthInRealTimeBFS2($concept1, $concept2)";
2314 0           &_debug($function);
2315            
2316             # check self
2317 0 0 0       if(!defined $self || !ref $self) {
2318 0           $errorhandler->_error($pkg, $function, "", 2);
2319             }
2320              
2321             # base case
2322 0 0         if($concept1 eq $concept2) { return 2; }
  0            
2323              
2324             # set the count
2325 0           my %visited1 = (); my %visited2 = ();
  0            
2326            
2327             # set the stack
2328 0           my $rstack1 = $cuifinder->_getParents($concept1);
2329 0           my $rstack2 = $cuifinder->_getParents($concept2);
2330 0           my @stack1 = @{$rstack1}; my @stack2 = @{$rstack2};
  0            
  0            
  0            
2331              
2332 0           my @directions1 = (); my @directions2 = ();
  0            
2333 0           my @relations1 = (); my @relations2 = ();
  0            
2334 0           my @paths1 = (); my @paths2 = ();
  0            
2335 0           my $path_length1 = -1; my $path_length2 = -1;
  0            
2336              
2337             # get the parents
2338 0           foreach my $element (@stack1) {
2339 0           my @array1 = ();
2340 0           push @paths1, \@array1;
2341 0           push @directions1, 0;
2342 0           push @relations1, "PAR";
2343             }
2344 0           foreach my $element (@stack2) {
2345 0           my @array2 = ();
2346 0           push @paths2, \@array2;
2347 0           push @directions2, 0;
2348 0           push @relations2, "PAR";
2349             }
2350              
2351             # now loop through the stack
2352 0   0       while($#stack1 >= 0 || $#stack2 >= 0) {
2353            
2354 0           my $c1 = ""; my $c2 = "";
  0            
2355 0           my $path1 = ""; my $path2 = "";
  0            
2356 0           my $direction1 = ""; my $direction2 = "";
  0            
2357 0           my $relation1 = ""; my $relation2 = "";
  0            
2358 0           my @intermediate1 = (); my @intermediate2 = ();
  0            
2359 0           my $series1 = ""; my $series2 = "";
  0            
2360 0           my $distance1 = -1; my $distance2 = -1;
  0            
2361 0           my $cui1flag = 0; my $cui2flag = 0;
  0            
2362              
2363 0 0         if($#stack1 >=0) {
2364 0           $c1 = pop @stack1;
2365 0           $path1 = pop @paths1;
2366 0           $direction1 = pop @directions1;
2367 0           $relation1 = pop @relations1;
2368              
2369 0           @intermediate1 = @{$path1};
  0            
2370 0           $series1 = join " ", @intermediate1;
2371 0           push @intermediate1, $c1;
2372 0           $distance1 = $#intermediate1;
2373 0           $cui1flag++;
2374             }
2375            
2376 0 0         if($#stack2 >=0) {
2377 0           $c2 = pop @stack2;
2378 0           $path2 = pop @paths2;
2379 0           $direction2 = pop @directions2;
2380 0           $relation2 = pop @relations2;
2381            
2382 0           @intermediate2 = @{$path2};
  0            
2383 0           $series2 = join " ", @intermediate2;
2384 0           push @intermediate2, $c2;
2385 0           $distance2 = $#intermediate2;
2386 0           $cui2flag++;
2387             }
2388              
2389             # check if it is our concept2
2390 0 0         if($c1 eq $concept2) {
2391 0           $path_length1 = $distance1 + 2;
2392 0 0         if($#stack2 < 0) { return $path_length1; }
  0            
2393             }
2394              
2395            
2396             # check if it is our concept2
2397 0 0         if($c2 eq $concept1) {
2398 0           $path_length2 = $distance2 + 2;
2399 0 0         if($#stack1 < 0) { return $path_length2; }
  0            
2400             }
2401              
2402             # if both paths have been set return the shortest
2403 0 0 0       if($path_length1 > -1 && $path_length2 > -1) {
2404 0 0         return $path_length1 < $path_length2 ? $path_length1 : $path_length2;
2405             }
2406              
2407             # if path length1 is set and is distance2 is greater then what
2408             # ever path we find for distance2 is going to be more than
2409             # for pathlength1 so return (this also works for pathlength2)
2410 0 0 0       if($path_length1 > -1 && $path_length1 <= ($distance2+2)) { return $path_length1; }
  0            
2411 0 0 0       if($path_length2 > -1 && $path_length2 <= ($distance1+2)) { return $path_length2; }
  0            
2412            
2413              
2414             # check if concept has been visited already through that path
2415 0           my $flag1 = 0; my $flag2 = 0;
  0            
2416 0 0         if(exists $visited1{$c1}) { $flag1++; }
  0            
2417 0           else { $visited1{$c1}++; }
2418              
2419 0 0         if(exists $visited2{$c2}) { $flag2++; }
  0            
2420 0           else { $visited2{$c2}++; }
2421              
2422             # set the flags if nothing exists
2423 0 0         if($cui1flag == 0) { $flag1++; }
  0            
2424 0 0         if($cui2flag == 0) { $flag2++; }
  0            
2425              
2426             # check that the concept is not one of the forbidden concepts
2427 0 0 0       if($cui1flag > 0 && $cuifinder->_forbiddenConcept($c1)) { $flag1++; }
  0            
2428 0 0 0       if($cui2flag > 0 && $cuifinder->_forbiddenConcept($c2)) { $flag2++; }
  0            
2429              
2430             # if both concepts have been flagged - next
2431 0 0 0       if($flag1 > 0 && $flag2 > 0) { next; }
  0            
2432              
2433             # if the previous direction was a child we have a change in direction
2434 0           my $dchange1 = $direction1;
2435 0           my $dchange2 = $direction2;
2436            
2437             # if the undirected option is set the dchange doesn't matter
2438             # otherwise we need to check
2439 0 0         if(!$option_undirected) {
2440 0 0         if($relation1 eq "CHD") { $dchange1 = $direction1 + 1; }
  0            
2441 0 0         if($relation2 eq "CHD") { $dchange2 = $direction2 + 1; }
  0            
2442             }
2443            
2444             # if we have not had more than a single direction change
2445 0           my $parents1; my $parents2;
2446 0 0 0       if($flag1 == 0 && $dchange1 < 2) {
2447 0           $parents1 = $cuifinder->_getParents($c1);
2448             }
2449 0 0 0       if($flag2 == 0 && $dchange2 < 2) {
2450 0           $parents2 = $cuifinder->_getParents($c2);
2451             }
2452            
2453 0           foreach my $parent1 (@{$parents1}) {
  0            
2454             # check if concept has already in the path
2455 0 0         if($series1=~/$parent1/) { next; }
  0            
2456 0 0         if($parent1 eq $c1) { next; }
  0            
2457 0           unshift @stack1, $parent1;
2458 0           unshift @paths1, \@intermediate1;
2459 0           unshift @relations1, "PAR";
2460 0           unshift @directions1, $dchange1;
2461             }
2462              
2463 0           foreach my $parent2 (@{$parents2}) {
  0            
2464             # check if concept has already in the path
2465 0 0         if($series2=~/$parent2/) { next; }
  0            
2466 0 0         if($parent2 eq $c2) { next; }
  0            
2467              
2468 0           unshift @stack2, $parent2;
2469 0           unshift @paths2, \@intermediate2;
2470 0           unshift @relations2, "PAR";
2471 0           unshift @directions2, $dchange2;
2472             }
2473            
2474            
2475             # now with the chilcren if the previous direction was a parent we have
2476             # have to change the direction
2477 0           $dchange1 = $direction1;
2478 0           $dchange2 = $direction2;
2479              
2480             # if the undirected option is set the dchange doesn't matter
2481             # otherwise we need to check
2482 0 0         if(!$option_undirected) {
2483 0 0         if($relation1 eq "PAR") { $dchange1 = $direction1 + 1; }
  0            
2484 0 0         if($relation2 eq "PAR") { $dchange2 = $direction2 + 1; }
  0            
2485             }
2486              
2487            
2488              
2489             # if we have not had more than a single direction change
2490             # now search through the children
2491 0           my $children1 = undef; my $children2 = undef;
  0            
2492 0 0 0       if($flag1 == 0 && $dchange1 < 2) {
2493 0           $children1 = $cuifinder->_getChildren($c1);
2494             }
2495              
2496 0 0 0       if($flag2 == 0 && $dchange2 < 2) {
2497 0           $children2 = $cuifinder->_getChildren($c2);
2498             }
2499            
2500 0           foreach my $child1 (@{$children1}) {
  0            
2501             # check if child cui has already in the path
2502 0 0         if($series1=~/$child1/) { next; }
  0            
2503 0 0         if($child1 eq $c1) { next; }
  0            
2504            
2505             # if not continue
2506 0           unshift @stack1, $child1;
2507 0           unshift @paths1, \@intermediate1;
2508 0           unshift @relations1, "CHD";
2509 0           unshift @directions1, $dchange1;
2510             }
2511              
2512 0           foreach my $child2 (@{$children2}) {
  0            
2513             # check if child cui has already in the path
2514 0 0         if($series2=~/$child2/) { next; }
  0            
2515 0 0         if($child2 eq $c2) { next; }
  0            
2516            
2517             # if not continue
2518 0           unshift @stack2, $child2;
2519 0           unshift @paths2, \@intermediate2;
2520 0           unshift @relations2, "CHD";
2521 0           unshift @directions2, $dchange2;
2522             }
2523             }
2524             # no path was found return -1
2525 0           return -1;
2526             }
2527              
2528              
2529             # method that finds the length of the shortest path
2530             # input : $concept1 <- the first concept
2531             # $concept2 <- the second concept
2532             # output: $length <- the length of the shortest path between them
2533             sub _findShortestPathLengthInRealTimeBFS {
2534              
2535 0     0     my $self = shift;
2536 0           my $concept1 = shift;
2537 0           my $concept2 = shift;
2538 0           my $length = shift;
2539            
2540 0           my $function = "_findShortestPathLengthInRealTimeBFS($concept1, $concept2, $length)";
2541 0           &_debug($function);
2542            
2543             # check self
2544 0 0 0       if(!defined $self || !ref $self) {
2545 0           $errorhandler->_error($pkg, $function, "", 2);
2546             }
2547              
2548             # set the count
2549 0           my %visited = ();
2550            
2551             # set the stack
2552 0           my $rstack = $cuifinder->_getParents($concept1);
2553 0           my @stack = @{$rstack};
  0            
2554 0           my @directions = ();
2555 0           my @relations = ();
2556 0           my @paths = ();
2557            
2558             # get the parents
2559 0           foreach my $element (@stack) {
2560 0           my @array = ();
2561 0           push @paths, \@array;
2562 0           push @directions, 0;
2563 0           push @relations, "PAR";
2564             }
2565            
2566             # now loop through the stack
2567 0           while($#stack >= 0) {
2568            
2569 0           my $concept = pop @stack;
2570 0           my $path = pop @paths;
2571 0           my $direction = pop @directions;
2572 0           my $relation = pop @relations;
2573            
2574             # set up the new path
2575 0           my @intermediate = @{$path};
  0            
2576 0           my $series = join " ", @intermediate;
2577 0           push @intermediate, $concept;
2578 0           my $distance = $#intermediate;
2579              
2580             # if we are going in the other direction and we
2581             # have already found a shorter path the other way
2582 0 0 0       if( ($length) > 0 && ( ($distance+2) >= $length) ) {
2583 0           return $length;
2584             }
2585              
2586             # check if it is our concept2
2587 0 0         if($concept eq $concept2) {
2588 0           my $path_length = $distance + 2;
2589 0           return $path_length;
2590             }
2591              
2592             # check if concept has been visited already through that path
2593 0 0         if(exists $visited{$concept}) { next; }
  0            
2594 0           else { $visited{$concept}++; }
2595              
2596             # check that the concept is not one of the forbidden concepts
2597 0 0         if($cuifinder->_forbiddenConcept($concept)) { next; }
  0            
2598            
2599             # print information into the file if debugpath option is set
2600 0 0         if($option_debugpath) {
2601 0           my $d = $#intermediate+1;
2602 0           print DEBUG_FILE "$concept\t$d\t@intermediate\n";
2603             }
2604            
2605             # if the previous direction was a child we have a change in direction
2606 0           my $dchange = $direction;
2607            
2608             # if the undirected option is set the dchange doesn't matter
2609             # otherwise we need to check
2610 0 0         if(!$option_undirected) {
2611 0 0         if($relation eq "CHD") { $dchange = $direction + 1; }
  0            
2612             }
2613            
2614             # if we have not had more than a single direction change
2615 0 0         if($dchange < 2) {
2616             # search through the parents
2617 0           my $parents = $cuifinder->_getParents($concept);
2618 0           foreach my $parent (@{$parents}) {
  0            
2619             # check if concept has already in the path
2620 0 0         if($series=~/$parent/) { next; }
  0            
2621 0 0         if($parent eq $concept) { next; }
  0            
2622 0           unshift @stack, $parent;
2623 0           unshift @paths, \@intermediate;
2624 0           unshift @relations, "PAR";
2625 0           unshift @directions, $dchange;
2626             }
2627             }
2628            
2629             # now with the chilcren if the previous direction was a parent we have
2630             # have to change the direction
2631 0           $dchange = $direction;
2632             # if the undirected option is set the dchange doesn't matter
2633             # otherwise we need to check
2634 0 0         if(!$option_undirected) {
2635 0 0         if($relation eq "PAR") { $dchange = $direction + 1; }
  0            
2636             }
2637            
2638             # if we have not had more than a single direction change
2639 0 0         if($dchange < 2) {
2640             # now search through the children
2641 0           my $children = $cuifinder->_getChildren($concept);
2642 0           foreach my $child (@{$children}) {
  0            
2643            
2644             # check if child cui has already in the path
2645 0 0         if($series=~/$child/) { next; }
  0            
2646 0 0         if($child eq $concept) { next; }
  0            
2647            
2648             # if not continue
2649 0           unshift @stack, $child;
2650 0           unshift @paths, \@intermediate;
2651 0           unshift @relations, "CHD";
2652 0           unshift @directions, $dchange;
2653             }
2654             }
2655             }
2656            
2657             # no path was found return -1
2658 0           return -1;
2659             }
2660              
2661              
2662             # this function finds the shortest path between
2663             # two concepts and returns the path. in the process
2664             # it determines the least common subsumer for that
2665             # path so it returns both
2666             # input : $concept1 <- string containing the first cui
2667             # $concept2 <- string containing the second
2668             # output: $hash <- reference to a hash containing the
2669             # lcs as the key and the path as the
2670             # value
2671             sub _shortestPath {
2672              
2673 0     0     my $self = shift;
2674 0           my $concept1 = shift;
2675 0           my $concept2 = shift;
2676              
2677 0           my $function = "_shortestPath";
2678 0           &_debug($function);
2679            
2680             # check self
2681 0 0 0       if(!defined $self || !ref $self) {
2682 0           $errorhandler->_error($pkg, $function, "", 2);
2683             }
2684              
2685             # check parameter exists
2686 0 0         if(!defined $concept1) {
2687 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
2688             }
2689 0 0         if(!defined $concept2) {
2690 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept2.", 4);
2691             }
2692              
2693             # check if valid concept
2694 0 0         if(! ($errorhandler->_validCui($concept1)) ) {
2695 0           $errorhandler->_error($pkg, $function, "Concept ($concept1) in not valid.", 6);
2696             }
2697 0 0         if(! ($errorhandler->_validCui($concept2)) ) {
2698 0           $errorhandler->_error($pkg, $function, "Concept ($concept2) in not valid.", 6);
2699             }
2700              
2701             # Get the paths to root for each ofhte concepts
2702 0           my $lTrees = $self->_pathsToRoot($concept1);
2703              
2704 0           my $rTrees = $self->_pathsToRoot($concept2);
2705            
2706             # Find the shortest path in these trees.
2707 0           my %lcsLengths = ();
2708 0           my %lcsPaths = ();
2709 0           my $lcs = "";
2710 0           foreach my $lTree (@{$lTrees}) {
  0            
2711 0           foreach my $rTree (@{$rTrees}) {
  0            
2712 0           $lcs = $self->_getLCSfromTrees($lTree, $rTree);
2713 0 0         if(defined $lcs) {
2714            
2715 0           my $lCount = 0;
2716 0           my $rCount = 0;
2717 0           my $length = 0;
2718 0           my $concept = "";
2719            
2720 0           my @lArray = ();
2721 0           my @rArray = ();
2722            
2723 0           my @lTreeArray = split/\s+/, $lTree;
2724 0           my @rTreeArray = split/\s+/, $rTree;
2725            
2726 0           foreach $concept (reverse @lTreeArray) {
2727 0           $lCount++;
2728 0           push @lArray, $concept;
2729 0 0         last if($concept eq $lcs);
2730              
2731             }
2732 0           foreach $concept (reverse @rTreeArray) {
2733 0           $rCount++;
2734 0 0         last if($concept eq $lcs);
2735 0           push @rArray, $concept;
2736            
2737             }
2738              
2739             # length of the path
2740 0 0         if(exists $lcsLengths{$lcs}) {
2741 0 0         if($lcsLengths{$lcs} >= ($rCount + $lCount - 1)) {
2742 0           $lcsLengths{$lcs} = $rCount + $lCount - 1;
2743 0           my @fullpath = (@lArray, (reverse @rArray));
2744 0           push @{$lcsPaths{$lcs}}, \@fullpath;
  0            
2745             }
2746             }
2747             else {
2748 0           $lcsLengths{$lcs} = $rCount + $lCount - 1;
2749 0           my @fullpath = (@lArray, (reverse @rArray));
2750 0           push @{$lcsPaths{$lcs}}, \@fullpath;
  0            
2751             }
2752             }
2753             }
2754             }
2755            
2756             # If no paths exist
2757 0 0         if(!scalar(keys(%lcsPaths))) {
2758 0           return undef;
2759             }
2760              
2761             # get the lcses and their associated path(s)
2762 0           my %rhash = ();
2763 0           my $prev_len = -1;
2764 0           foreach my $lcs (sort {$lcsLengths{$a} <=> $lcsLengths{$b}} keys(%lcsLengths)) {
  0            
2765 0 0 0       if( ($prev_len == -1) or ($prev_len == $lcsLengths{$lcs}) ) {
2766 0           foreach my $pathref (@{$lcsPaths{$lcs}}) {
  0            
2767 0 0         if( ($#{$pathref}+1) == $lcsLengths{$lcs}) {
  0            
2768 0           my $path = join " ", @{$pathref};
  0            
2769 0           $rhash{$path} = $lcs;
2770             }
2771             }
2772             }
2773 0           else { last; }
2774 0           $prev_len = $lcsLengths{$lcs};
2775             }
2776            
2777             # return a reference to the hash containing the lcses and their path(s)
2778 0           return \%rhash;
2779             }
2780              
2781              
2782             # method that finds the length of the shortest path
2783             # input : $concept1 <- the first concept
2784             # $concept2 <- the second concept
2785             # output: $int <- number cuis closer to concept1 than concept2
2786             sub _findNumberOfCloserConcepts {
2787              
2788 0     0     my $self = shift;
2789 0           my $concept1 = shift;
2790 0           my $concept2 = shift;
2791            
2792 0           my $function = "_findNumberOfCloserConcepts($concept1, $concept2)";
2793 0           &_debug($function);
2794            
2795             # check self
2796 0 0 0       if(!defined $self || !ref $self) {
2797 0           $errorhandler->_error($pkg, $function, "", 2);
2798             }
2799              
2800 0 0         if($concept1 eq $concept2) { return 0; }
  0            
2801              
2802 0           my %closerConceptHash = ();
2803              
2804             # set the count
2805 0           my %visited1 = (); my %visited2 = ();
  0            
2806            
2807             # set the stack
2808 0           my $rstack1 = $cuifinder->_getParents($concept1);
2809 0           my $rstack2 = $cuifinder->_getParents($concept2);
2810 0           my @stack1 = @{$rstack1}; my @stack2 = @{$rstack2};
  0            
  0            
  0            
2811              
2812 0           my @directions1 = (); my @directions2 = ();
  0            
2813 0           my @relations1 = (); my @relations2 = ();
  0            
2814 0           my @paths1 = (); my @paths2 = ();
  0            
2815 0           my $path_length1 = -1; my $path_length2 = -1;
  0            
2816              
2817             # get the parents
2818 0           foreach my $element (@stack1) {
2819 0           my @array1 = ();
2820 0           push @paths1, \@array1;
2821 0           push @directions1, 0;
2822 0           push @relations1, "PAR";
2823             }
2824 0           foreach my $element (@stack2) {
2825 0           my @array2 = ();
2826 0           push @paths2, \@array2;
2827 0           push @directions2, 0;
2828 0           push @relations2, "PAR";
2829             }
2830            
2831             # now loop through the stack
2832 0   0       while($#stack1 >= 0 || $#stack2 >= 0) {
2833            
2834 0           my $c1 = ""; my $c2 = "";
  0            
2835 0           my $path1 = ""; my $path2 = "";
  0            
2836 0           my $direction1 = ""; my $direction2 = "";
  0            
2837 0           my $relation1 = ""; my $relation2 = "";
  0            
2838 0           my @intermediate1 = (); my @intermediate2 = ();
  0            
2839 0           my $series1 = ""; my $series2 = "";
  0            
2840 0           my $distance1 = -1; my $distance2 = -1;
  0            
2841 0           my $cui1flag = 0; my $cui2flag = 0;
  0            
2842              
2843 0 0         if($#stack1 >=0) {
2844 0           $c1 = pop @stack1;
2845 0           $path1 = pop @paths1;
2846 0           $direction1 = pop @directions1;
2847 0           $relation1 = pop @relations1;
2848              
2849 0           @intermediate1 = @{$path1};
  0            
2850 0           $series1 = join " ", @intermediate1;
2851 0           push @intermediate1, $c1;
2852 0           $distance1 = $#intermediate1;
2853 0           $cui1flag++;
2854             }
2855            
2856 0 0         if($#stack2 >=0) {
2857 0           $c2 = pop @stack2;
2858 0           $path2 = pop @paths2;
2859 0           $direction2 = pop @directions2;
2860 0           $relation2 = pop @relations2;
2861            
2862 0           @intermediate2 = @{$path2};
  0            
2863 0           $series2 = join " ", @intermediate2;
2864 0           push @intermediate2, $c2;
2865 0           $distance2 = $#intermediate2;
2866 0           $cui2flag++;
2867             }
2868            
2869              
2870            
2871             # check if it is our concept2
2872 0 0         if($c1 eq $concept2) {
2873 0           $path_length1 = $distance1 + 2;
2874 0 0         if($#stack2 < 0) { last; }
  0            
2875             }
2876              
2877            
2878             # check if it is our concept2
2879 0 0         if($c2 eq $concept1) {
2880 0           $path_length2 = $distance2 + 2;
2881 0 0         if($#stack1 < 0) { last; }
  0            
2882             }
2883              
2884             # if both paths have been set return the shortest
2885 0 0 0       if($path_length1 > -1 && $path_length2 > -1) { last; }
  0            
2886              
2887             # if path length1 is set and is distance2 is greater then what
2888             # ever path we find for distance2 is going to be more than
2889             # for pathlength1 so return (this also works for pathlength2)
2890 0 0 0       if($path_length1 > -1 && $path_length1 <= ($distance2+2)) { last; }
  0            
2891 0 0 0       if($path_length2 > -1 && $path_length2 <= ($distance1+2)) { last; }
  0            
2892            
2893              
2894             # check if concept has been visited already through that path
2895 0           my $flag1 = 0; my $flag2 = 0;
  0            
2896 0 0         if(exists $visited1{$c1}) { $flag1++; }
  0            
2897 0           else { $visited1{$c1}++; }
2898              
2899 0 0         if(exists $visited2{$c2}) { $flag2++; }
  0            
2900 0           else { $visited2{$c2}++; }
2901              
2902             # set the flags if nothing exists
2903 0 0         if($cui1flag == 0) { $flag1++; }
  0            
2904 0 0         if($cui2flag == 0) { $flag2++; }
  0            
2905              
2906             # check that the concept is not one of the forbidden concepts
2907 0 0 0       if($cui1flag > 0 && $cuifinder->_forbiddenConcept($c1)) { $flag1++; }
  0            
2908 0 0 0       if($cui2flag > 0 && $cuifinder->_forbiddenConcept($c2)) { $flag2++; }
  0            
2909              
2910             # if both concepts have been flagged - next
2911 0 0 0       if($flag1 > 0 && $flag2 > 0) { next; }
  0            
2912              
2913             # add concepts to the closest hash if closer
2914 0 0         if($flag1 <= 0) {
2915 0 0         if(! (exists $closerConceptHash{$c1}) ) {
2916 0           $closerConceptHash{$c1} = $distance1 + 2;
2917             }
2918             }
2919 0 0         if($flag2 <= 0) {
2920 0 0         if(! (exists $closerConceptHash{$c2}) ) {
2921 0           $closerConceptHash{$c2} = $distance2 + 2;
2922             }
2923             }
2924              
2925             # if the previous direction was a child we have a change in direction
2926 0           my $dchange1 = $direction1;
2927 0           my $dchange2 = $direction2;
2928            
2929             # if the undirected option is set the dchange doesn't matter
2930             # otherwise we need to check
2931 0 0         if(!$option_undirected) {
2932 0 0         if($relation1 eq "CHD") { $dchange1 = $direction1 + 1; }
  0            
2933 0 0         if($relation2 eq "CHD") { $dchange2 = $direction2 + 1; }
  0            
2934             }
2935            
2936             # if we have not had more than a single direction change
2937 0           my $parents1 = undef; my $parents2 = undef;
  0            
2938 0 0 0       if($flag1 == 0 && $dchange1 < 2) {
2939 0           $parents1 = $cuifinder->_getParents($c1);
2940             }
2941 0 0 0       if($flag2 == 0 && $dchange2 < 2) {
2942 0           $parents2 = $cuifinder->_getParents($c2);
2943             }
2944            
2945 0           foreach my $parent1 (@{$parents1}) {
  0            
2946             # check if concept has already in the path
2947 0 0         if($series1=~/$parent1/) { next; }
  0            
2948 0 0         if($parent1 eq $c1) { next; }
  0            
2949 0           unshift@stack1, $parent1;
2950 0           unshift @paths1, \@intermediate1;
2951 0           unshift @relations1, "PAR";
2952 0           unshift @directions1, $dchange1;
2953             }
2954              
2955 0           foreach my $parent2 (@{$parents2}) {
  0            
2956             # check if concept has already in the path
2957 0 0         if($series2=~/$parent2/) { next; }
  0            
2958 0 0         if($parent2 eq $c2) { next; }
  0            
2959              
2960 0           unshift @stack2, $parent2;
2961 0           unshift @paths2, \@intermediate2;
2962 0           unshift @relations2, "PAR";
2963 0           unshift @directions2, $dchange2;
2964             }
2965            
2966            
2967             # now with the chilcren if the previous direction was a parent we have
2968             # have to change the direction
2969 0           $dchange1 = $direction1;
2970 0           $dchange2 = $direction2;
2971              
2972             # if the undirected option is set the dchange doesn't matter
2973             # otherwise we need to check
2974 0 0         if(!$option_undirected) {
2975 0 0         if($relation1 eq "PAR") { $dchange1 = $direction1 + 1; }
  0            
2976 0 0         if($relation2 eq "PAR") { $dchange2 = $direction2 + 1; }
  0            
2977             }
2978              
2979            
2980              
2981             # if we have not had more than a single direction change
2982             # now search through the children
2983 0           my $children1 = undef; my $children2 = undef;
  0            
2984 0 0 0       if($flag1 == 0 && $dchange1 < 2) {
2985 0           $children1 = $cuifinder->_getChildren($c1);
2986             }
2987              
2988 0 0 0       if($flag2 == 0 && $dchange2 < 2) {
2989 0           $children2 = $cuifinder->_getChildren($c2);
2990             }
2991            
2992 0           foreach my $child1 (@{$children1}) {
  0            
2993             # check if child cui has already in the path
2994 0 0         if($series1=~/$child1/) { next; }
  0            
2995 0 0         if($child1 eq $c1) { next; }
  0            
2996            
2997             # if not continue
2998 0           unshift @stack1, $child1;
2999 0           unshift @paths1, \@intermediate1;
3000 0           unshift @relations1, "CHD";
3001 0           unshift @directions1, $dchange1;
3002             }
3003              
3004 0           foreach my $child2 (@{$children2}) {
  0            
3005             # check if child cui has already in the path
3006 0 0         if($series2=~/$child2/) { next; }
  0            
3007 0 0         if($child2 eq $c2) { next; }
  0            
3008            
3009             # if not continue
3010 0           unshift @stack2, $child2;
3011 0           unshift @paths2, \@intermediate2;
3012 0           unshift @relations2, "CHD";
3013 0           unshift @directions2, $dchange2;
3014             }
3015             }
3016              
3017 0 0 0       if($path_length1 < 0 && $path_length2 < 0) { return -1; }
  0            
3018            
3019 0 0         my $length = $path_length1 < $path_length2 ? $path_length1 : $path_length2;
3020            
3021 0 0         if($path_length1 < 0) { $length = $path_length2; }
  0            
3022 0 0         if($path_length2 < 0) { $length = $path_length1; }
  0            
3023              
3024 0           my $counter = 0;
3025 0           foreach my $cui (sort keys %closerConceptHash) {
3026 0 0         if($closerConceptHash{$cui} < $length) {
3027 0           $counter++;
3028             }
3029             }
3030              
3031             # no path was found return -1
3032 0           return $counter;
3033             }
3034              
3035             sub _findDescendants {
3036 0     0     my $self = shift;
3037 0           my $concept = shift;
3038            
3039 0           my $function = "_findDescendants";
3040 0           &_debug($function);
3041            
3042             # check self
3043 0 0 0       if(!defined $self || !ref $self) {
3044 0           $errorhandler->_error($pkg, $function, "", 2);
3045             }
3046             # check parameter exists
3047 0 0         if(!defined $concept) {
3048 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept1.", 4);
3049             }
3050             # check if valid concept
3051 0 0         if(! ($errorhandler->_validCui($concept)) ) {
3052 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
3053             }
3054            
3055 0           my %decendents = ();
3056            
3057 0           my $children = $cuifinder->_getChildren($concept);
3058              
3059 0           foreach my $child (@{$children}) {
  0            
3060 0           my @array = ();
3061 0           push @array, $root;
3062 0           my $path = \@array;
3063              
3064 0           &_descendentSearch($child, $path, \%decendents, *F);
3065            
3066             }
3067            
3068 0           return \%decendents;
3069             }
3070              
3071             sub _descendentSearch
3072             {
3073 0     0     my $concept = shift;
3074 0           my $array = shift;
3075 0           my $decendents = shift;
3076 0           local(*F) = shift;
3077              
3078             # set the new path
3079 0           my @path = @{$array};
  0            
3080 0           push @path, $concept;
3081            
3082 0           my $series = join " ", @path;
3083            
3084             # print information into the file if debugpath option is set
3085 0 0         if($option_debugpath) { print F "$concept\t$series\n"; }
  0            
3086            
3087             # get all the children
3088 0           my $children = $cuifinder->_getChildren($concept);
3089              
3090             # search through the children
3091 0           foreach my $child (@{$children}) {
  0            
3092            
3093             # check if child cui has already in the path
3094 0           my $flag = 0;
3095 0           foreach my $cui (@path) {
3096 0 0         if($cui eq $child) {
3097 0           $flag = 1;
3098             }
3099             }
3100              
3101             # if it isn't continue on with the depth first search
3102 0 0         if($flag == 0) {
3103             # check that the concept is not a forbidden concept
3104 0 0         if($cuifinder->_forbiddenConcept($child) == 1) { return; }
  0            
3105              
3106             # store the decendents
3107 0           $decendents->{$child} = 1;
3108            
3109 0           &_descendentSearch($child, \@path, $decendents, *F);
3110             }
3111             }
3112             }
3113              
3114             1;
3115              
3116             __END__