File Coverage

blib/lib/UMLS/Interface/ICFinder.pm
Criterion Covered Total %
statement 15 561 2.6
branch 0 226 0.0
condition 0 84 0.0
subroutine 5 30 16.6
pod 0 1 0.0
total 20 902 2.2


line stmt bran cond sub pod time code
1             # UMLS::Interface::ICFinder
2             # (Last Updated $Id: ICFinder.pm,v 1.37 2014/06/27 13:23:47 btmcinnes Exp $)
3             #
4             # Perl module that provides a perl interface to the
5             # Unified Medical Language System (UMLS)
6             #
7             # Copyright (c) 2004-2010,
8             #
9             # Bridget T. McInnes, University of Minnesota Twin Cities
10             # bthomson at cs.umn.edu
11             #
12             # Siddharth Patwardhan, University of Utah, Salt Lake City
13             # sidd at cs.utah.edu
14             #
15             # Serguei Pakhomov, University of Minnesota Twin Cities
16             # pakh0002 at umn.edu
17             #
18             # Ted Pedersen, University of Minnesota, Duluth
19             # tpederse at d.umn.edu
20             #
21             # Ying Liu, University of Minnesota
22             # liux0935 at umn.edu
23             #
24             # This program is free software; you can redistribute it and/or
25             # modify it under the terms of the GNU General Public License
26             # as published by the Free Software Foundation; either version 2
27             # of the License, or (at your option) any later version.
28             #
29             # This program is distributed in the hope that it will be useful,
30             # but WITHOUT ANY WARRANTY; without even the implied warranty of
31             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32             # GNU General Public License for more details.
33             #
34             # You should have received a copy of the GNU General Public License
35             # along with this program; if not, write to
36             #
37             # The Free Software Foundation, Inc.,
38             # 59 Temple Place - Suite 330,
39             # Boston, MA 02111-1307, USA.
40              
41             package UMLS::Interface::ICFinder;
42              
43 24     24   80 use Fcntl;
  24         30  
  24         4073  
44 24     24   94 use strict;
  24         25  
  24         389  
45 24     24   68 use warnings;
  24         23  
  24         533  
46 24     24   73 use DBI;
  24         22  
  24         651  
47 24     24   73 use bytes;
  24         21  
  24         102  
48              
49             my $pkg = "UMLS::Interface::ICFinder";
50              
51             my $root = "";
52              
53             my $debug = 0;
54              
55             my %propagationFreq = ();
56             my %propagationHash = ();
57              
58             my $propagationFile = "";
59             my $frequencyFile = "";
60              
61             my %frequencyHash = ();
62              
63             my $option_realtime = undef;
64             my $option_icpropagation = undef;
65             my $option_icfrequency = undef;
66             my $option_t = undef;
67             my $smooth = 0;
68             my $configN = 0;
69              
70             my $errorhandler = "";
71             my $cuifinder = "";
72             my $pathfinder = "";
73              
74             my $max_leaves = 0;
75              
76             # UMLS-specific stuff ends ----------
77              
78             # -------------------- Class methods start here --------------------
79              
80             # method to create a new UMLS::Interface::PathFinder object
81             sub new {
82 0     0 0   my $self = {};
83 0           my $className = shift;
84 0           my $params = shift;
85 0           my $chandler = shift;
86 0           my $phandler = shift;
87              
88             # initialize error handler
89 0           $errorhandler = UMLS::Interface::ErrorHandler->new();
90 0 0         if(! defined $errorhandler) {
91 0           print STDERR "The error handler did not get passed properly.\n";
92 0           exit;
93             }
94            
95             # initialize the cuifinder
96 0           $cuifinder = $chandler;
97 0 0         if(! (defined $chandler)) {
98 0           $errorhandler->_error($pkg,
99             "new",
100             "The CuiFinder handler did not get passed properly",
101             8);
102             }
103              
104             # initialize the pathfinder
105 0           $pathfinder = $phandler;
106 0 0         if(! (defined $phandler)) {
107 0           $errorhandler->_error($pkg,
108             "new",
109             "The PathFinder handler did not get passed properly",
110             8);
111             }
112              
113             # bless the object.
114 0           bless($self, $className);
115              
116 0           return $self;
117             }
118              
119             # Method to initialize the UMLS::Interface::ICFinder object.
120             sub _setPropagationParameters
121             {
122 0     0     my $self = shift;
123 0           my $params = shift;
124              
125             # set function name
126 0           my $function = "_setPropagationParameters";
127 0           &_debug($function);
128            
129             # check self
130 0 0 0       if(!defined $self || !ref $self) {
131 0           $errorhandler->_error($pkg, $function, "", 2);
132             }
133            
134             # get the umlsinterfaceindex database from CuiFinder
135 0           my $sdb = $cuifinder->_getIndexDB();
136 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
137 0           $self->{'sdb'} = $sdb;
138            
139             # get the root
140 0           $root = $cuifinder->_root();
141              
142             # set up the options
143 0           $self->_setOptions($params);
144              
145             # load the propagation hash if the option is specified
146 0 0         if($option_icpropagation) {
147 0           $self->_loadPropagationHashFromFile();
148             }
149              
150             # load the frequency hash if hte option is specified
151 0 0         if($option_icfrequency) {
152 0           $self->_loadFrequencyHash();
153             }
154             }
155              
156              
157             # print out the function name to standard error
158             # input : $function <- string containing function name
159             # output:
160             sub _debug {
161 0     0     my $function = shift;
162 0 0         if($debug) { print STDERR "In UMLS::Interface::ICFinder::$function\n"; }
  0            
163             }
164              
165             # method to set the global parameter options
166             # input : $params <- reference to a hash
167             # output:
168             sub _setOptions
169             {
170 0     0     my $self = shift;
171 0           my $params = shift;
172            
173 0           my $function = "_setOptions";
174            
175             # check self
176 0 0 0       if(!defined $self || !ref $self) {
177 0           $errorhandler->_error($pkg, $function, "", 2);
178             }
179            
180             # get all the parameters
181 0           my $debugoption = $params->{'debug'};
182 0           my $t = $params->{'t'};
183 0           my $icpropagation = $params->{'icpropagation'};
184 0           my $icfrequency = $params->{'icfrequency'};
185 0           my $icsmooth = $params->{'smooth'};
186 0           my $realtime = $params->{'realtime'};
187              
188 0           my $output = "";
189              
190             # check if options have been defined
191 0 0 0       if(defined $icpropagation || defined $icfrequency || defined $realtime ||
      0        
      0        
      0        
192             defined $debugoption || defined $icsmooth) {
193 0           $output .= "\nICFinder User Options:\n";
194             }
195              
196             # check if the debug option has been been defined
197 0 0         if(defined $debugoption) {
198 0           $debug = 1;
199 0           $output .= " --debug option set\n";
200             }
201            
202 0 0         if(defined $icsmooth) {
203 0           $smooth = $icsmooth;
204 0           $output .= " --smooth\n";
205             }
206              
207             # check if the propagation option has been identified
208 0 0         if(defined $icpropagation) {
209 0           $option_icpropagation = 1;
210 0           $propagationFile = $icpropagation;
211 0           $output .= " --icpropagation $icpropagation\n";
212             }
213              
214             # check if the frequency option has been identified
215 0 0         if(defined $icfrequency) {
216 0           $option_icfrequency = 1;
217 0           $frequencyFile = $icfrequency;
218 0           $output .= " --icfrequency $icfrequency\n";
219             }
220              
221             # check if the realtime option has been identified
222 0 0         if(defined $realtime) {
223 0           $option_realtime = 1;
224 0           $output .= " --realtime option set\n";
225             }
226              
227 0           &_debug($function);
228            
229 0 0         if(defined $t) {
230 0           $option_t = 1;
231             }
232             else {
233 0           print STDERR "$output\n";
234             }
235             }
236              
237              
238             # method to set the realtime global parameter options
239             # input : bool <- 1 (turn on) 0 (turn off)
240             # output:
241             sub _setRealtimeOption {
242              
243 0     0     my $self = shift;
244 0           my $option = shift;
245              
246 0           my $function = "_setRealtimeOption";
247 0           &_debug($function);
248              
249             # check self
250 0 0 0       if(!defined $self || !ref $self) {
251 0           $errorhandler->_error($pkg, $function, "", 2);
252             }
253              
254 0 0         if($option == 1) {
255 0           $option_realtime = 1;
256             }
257             else {
258 0           $option_realtime = 0;
259             }
260             }
261              
262             # method returns the configN - the total number of CUIs
263             # input:
264             # output: int
265             sub _getN
266             {
267 0     0     my $self = shift;
268            
269 0           my $function = "_getN";
270 0           &_debug($function);
271              
272 0 0         if($configN == 0) {
273 0           my $hash = $cuifinder->_getCuiList();
274 0           $configN = keys %{$hash};
  0            
275             }
276 0           return $configN;
277             }
278              
279            
280             # returns the intrinsic information content (IC) of a cui
281             # input : $concept <- string containing a cui
282             # output: $double <- double containing its IC
283             sub _getSecoIntrinsicIC
284             {
285 0     0     my $self = shift;
286 0           my $concept = shift;
287              
288 0           my $function = "_getSecoInrinsicIC";
289 0           &_debug($function);
290              
291             # check self
292 0 0 0       if(!defined $self || !ref $self) {
293 0           $errorhandler->_error($pkg, $function, "", 2);
294             }
295            
296             # check concept was obtained
297 0 0         if(!$concept) {
298 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
299             }
300            
301             # check if valid concept
302 0 0         if(! ($errorhandler->_validCui($concept)) ) {
303 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
304             }
305            
306 0           my $children = $cuifinder->_getChildren($concept);
307              
308 0           my $n = _getN();
309              
310 0           my $children_num = ($#{$children}) + 2;
  0            
311 0           my $ic = 1 - ( (log($children_num)/log(10)) / (log($n)/log(10)) );
312            
313 0           return $ic;
314             }
315              
316              
317             sub _getDecendents
318             {
319 0     0     my $concept = shift;
320 0           my $array = shift;
321            
322 0 0         if($concept=~/^\s*$/) { return; }
  0            
323              
324             # if concept is one of the following just return
325             #C1274012|Ambiguous concept (inactive concept)
326 0 0         if($concept=~/C1274012/) { return; }
  0            
327             #C1274013|Duplicate concept (inactive concept)
328 0 0         if($concept=~/C1274013/) { return; }
  0            
329             #C1276325|Reason not stated concept (inactive concept)
330 0 0         if($concept=~/C1276325/) { return; }
  0            
331             #C1274014|Outdated concept (inactive concept)
332 0 0         if($concept=~/C1274014/) { return; }
  0            
333             #C1274015|Erroneous concept (inactive concept)
334 0 0         if($concept=~/C1274015/) { return; }
  0            
335             #C1274021|Moved elsewhere (inactive concept)
336 0 0         if($concept=~/C1274021/) { return; }
  0            
337             #C2733115|limited status concept
338 0 0         if($concept=~/C2733115/) { return; }
  0            
339             #C1443286|
340 0 0         if($concept=~/C1443286/) { return; }
  0            
341            
342             # set the new path
343 0           my @path = @{$array};
  0            
344 0           push @path, $concept;
345            
346 0           my $series = join " ", @path;
347              
348             # get all the children
349 0           my $children = $cuifinder->_getChildren($concept);
350            
351 0           my %subsumers = (); my %leaves = ();
  0            
352              
353             # search through the children
354 0           foreach my $child (@{$children}) {
  0            
355            
356             # check if child cui has already in the path
357 0           my $flag = 0;
358 0           foreach my $cui (@path) {
359 0 0         if($cui eq $child) {
360 0           $flag = 1;
361             }
362             }
363            
364             # if it isn't continue on with the depth first search
365 0 0         if($flag == 0) {
366 0           my ($s, $l) = &_getDecendents($child, \@path);
367 0           %subsumers = (%subsumers, %{$s}); %leaves = (%leaves, %{$l});
  0            
  0            
  0            
368             }
369             }
370            
371 0 0         if($#{$children} < 0) { $leaves{$concept}++; } $subsumers{$concept}++;
  0            
  0            
  0            
372            
373 0           return (\%subsumers, \%leaves);
374             }
375              
376             # returns the intrinsic information content (IC) of a cui
377             # input : $concept <- string containing a cui
378             # output: $double <- double containing its IC
379             sub _getSanchezIntrinsicIC
380             {
381 0     0     my $self = shift;
382 0           my $concept = shift;
383              
384 0           my $function = "_getSanchezIntrinsicIC";
385 0           &_debug($function);
386              
387             # check self
388 0 0 0       if(!defined $self || !ref $self) {
389 0           $errorhandler->_error($pkg, $function, "", 2);
390             }
391            
392             # check concept was obtained
393 0 0         if(!$concept) {
394 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
395             }
396            
397             # check if valid concept
398 0 0         if(! ($errorhandler->_validCui($concept)) ) {
399 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
400             }
401            
402             # get the leaves
403 0           my $leaves = 0; my $maxleaves = 0;
  0            
404 0 0         if($option_realtime) {
405 0           my @path = ();
406 0           my ($d, $l) = _getDecendents($concept, \@path);
407 0           $leaves = keys %{$l};
  0            
408 0           $maxleaves = _getMaxLeaves();
409             }
410             else {
411             # get the umlsinterfaceindex database from CuiFinder
412 0           my $sdb = $cuifinder->_getIndexDB();
413 0 0         if(!$sdb) { $errorhandler->_error($pkg, $function, "Error with sdb.", 3); }
  0            
414 0           $self->{'sdb'} = $sdb;
415            
416             # get the intrinsic table name
417 0           my $intrinsicTableName = $cuifinder->_getIntrinsicTableName();
418              
419             # check that it exists in the index
420 0           my $arrRefCheck = $sdb->selectcol_arrayref("select count(*) from tableindex where HEX=\'$intrinsicTableName\'");
421 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
422 0           my $check = shift @{$arrRefCheck};
  0            
423            
424 0 0         if($check != 1) {
425 0           print STDERR "The index does not contain the intrinsic table for the\n";
426 0           print STDERR "sources/relations in the configration file. It must have\n";
427 0           print STDERR "been created with an earlier version of UMLS-Interface.\n";
428 0           print STDERR "Please either recreate the index by removing it using the\n";
429 0           print STDERR "removeConfigData.pl or run with the --realtime option.\n\n";
430 0           exit;
431             }
432              
433             # get subsumers and leaves from the intrinsic table
434 0           my $arrRefLeaves = $sdb->selectcol_arrayref("select LEAVES from $intrinsicTableName where CUI=\'$concept\'");
435 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
436              
437 0           my $arrRefMaxLeaves = $sdb->selectcol_arrayref("select LEAVES from $intrinsicTableName where CUI=\'$root\'");
438 0           $errorhandler->_checkDbError($pkg, $function, $sdb);
439            
440 0           $maxleaves = shift @{$arrRefMaxLeaves};
  0            
441 0           $leaves = shift @{$arrRefLeaves};
  0            
442             }
443            
444             # get the subsumers
445 0           my $paths = $pathfinder->_pathsToRoot($concept);
446 0           my %subhash = ();
447 0           foreach my $path (@{$paths}) {
  0            
448 0           my @array = split/\s+/, $path;
449 0           foreach my $element (@array) { $subhash{$element}++; }
  0            
450             }
451 0           my $subsumers = keys %subhash;
452              
453 0           my $a = 0;
454 0 0         if(defined $leaves) {
455 0 0 0       if($leaves != 0 && $subsumers != 0) {
456 0           $a = $leaves/$subsumers;
457             }
458 0           }$a++;
459              
460 0           my $b = $maxleaves; $b++;
  0            
461            
462 0           my $ic = -1 * ( (log( $a/$b )/log(2)) );
463            
464 0           return $ic;
465             }
466              
467             sub _getMaxLeaves {
468            
469 0 0   0     if($max_leaves == 0) {
470              
471 0           my @path = ();
472 0           my ($s, $l) = _getDecendents($cuifinder->_root(), \@path);
473            
474 0           $max_leaves = keys %{$l};
  0            
475             }
476            
477 0           return $max_leaves;
478             }
479            
480             # returns the information content (IC) of a cui
481             # input : $concept <- string containing a cui
482             # output: $double <- double containing its IC
483             sub _getIC
484             {
485 0     0     my $self = shift;
486 0           my $concept = shift;
487              
488 0           my $function = "_getIC";
489 0           &_debug($function);
490              
491             # check self
492 0 0 0       if(!defined $self || !ref $self) {
493 0           $errorhandler->_error($pkg, $function, "", 2);
494             }
495            
496             # check concept was obtained
497 0 0         if(!$concept) {
498 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
499             }
500            
501             # check if valid concept
502 0 0         if(! ($errorhandler->_validCui($concept)) ) {
503 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
504             }
505            
506             # if option frequency then the propagation hash
507             # hash has not been loaded and we should determine
508             # the information content of the concept using the
509             # frequency information in the file in realtime
510 0 0         if($option_icfrequency) {
511            
512             # initialize the propagation hash
513 0           $self->_initializePropagationHash();
514            
515             # load the propagation frequency hash
516 0           $self->_loadPropagationFreq(\%frequencyHash);
517            
518             # propogate the counts
519 0           &_debug("_propagation");
520 0           my @array = ();
521 0           $self->_propagation($concept, \@array);
522            
523             # tally up the propagation counts
524 0           $self->_tallyCounts();
525             }
526            
527 0           my $prob = $propagationHash{$concept};
528              
529 0 0         if(!defined $prob) { return 0; }
  0            
530              
531 0           my $ic = 0;
532 0 0 0       if($prob > 0 and $prob < 1) { $ic = -1 * (log($prob) / log(10)); }
  0            
533              
534 0           return $ic;
535             }
536              
537            
538             # returns the probability
539             # input : $concept <- string containing a cui
540             # output: $double <- double containing its probability
541             sub _getProbability
542             {
543 0     0     my $self = shift;
544 0           my $concept = shift;
545              
546 0           my $function = "_getProbability";
547 0           &_debug($function);
548              
549             # check self
550 0 0 0       if(!defined $self || !ref $self) {
551 0           $errorhandler->_error($pkg, $function, "", 2);
552             }
553            
554             # check concept was obtained
555 0 0         if(!$concept) {
556 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
557             }
558            
559             # check if valid concept
560 0 0         if(! ($errorhandler->_validCui($concept)) ) {
561 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
562             }
563            
564             # if option frequency then the propagation hash
565             # hash has not been loaded and we should determine
566             # the information content of the concept using the
567             # frequency information in the file in realtime
568 0 0         if($option_icfrequency) {
569            
570             # initialize the propagation hash
571 0           $self->_initializePropagationHash();
572            
573             # load the propagation frequency hash
574 0           $self->_loadPropagationFreq(\%frequencyHash);
575            
576             # propogate the counts
577 0           &_debug("_propagation");
578 0           my @array = ();
579 0           $self->_propagation($concept, \@array);
580            
581             # tally up the propagation counts
582 0           $self->_tallyCounts();
583             }
584            
585 0           my $prob = $propagationHash{$concept};
586              
587            
588 0 0         if(!defined $prob) { return 0; }
  0            
589              
590 0           return $prob;
591             }
592              
593             # returns the propagation count (frequency) of a cui
594             # input : $concept <- string containing a cui
595             # output: $double <- frequency
596             sub _getFrequency
597             {
598 0     0     my $self = shift;
599 0           my $concept = shift;
600              
601 0           my $function = "_getFrequency";
602 0           &_debug($function);
603              
604             # check self
605 0 0 0       if(!defined $self || !ref $self) {
606 0           $errorhandler->_error($pkg, $function, "", 2);
607             }
608            
609             # check concept was obtained
610 0 0         if(!$concept) {
611 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
612             }
613            
614             # check if valid concept
615 0 0         if(! ($errorhandler->_validCui($concept)) ) {
616 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
617             }
618            
619             # if option frequency then the propagation hash
620             # hash has not been loaded and we should determine
621             # the information content of the concept using the
622             # frequency information in the file in realtime
623 0 0         if($option_icfrequency) {
624            
625             # initialize the propagation hash
626 0           $self->_initializePropagationHash();
627            
628             # load the propagation frequency hash
629 0           $self->_loadPropagationFreq(\%frequencyHash);
630            
631             # propogate the counts
632 0           &_debug("_propagation");
633 0           my @array = ();
634 0           $self->_propagation($concept, \@array);
635            
636             # tally up the propagation counts
637 0           $self->_tallyCounts();
638             }
639              
640 0           my $freq = int($propagationHash{$concept} * $configN);
641              
642 0           return $freq;
643             }
644              
645             # this method obtains the CUIs in the sources which
646             # are going to be propagated
647             # input :
648             # output: $hash <- reference to hash containing the cuis
649             sub _loadFrequencyHash {
650              
651 0     0     my $self = shift;
652            
653 0           my $function = "_loadFrequencyHash";
654 0           &_debug($function);
655              
656            
657             # check self
658 0 0 0       if(!defined $self || !ref $self) {
659 0           $errorhandler->_error($pkg, $function, "", 2);
660             }
661              
662             # open the frequency file
663 0 0         open(FILE, $frequencyFile) || die "Could not open file $frequencyFile\n";
664              
665             # get the source and relations associated with the propagation file
666 0           my $sab = ; chomp $sab;
  0            
667 0           my $rel = ; chomp $rel;
  0            
668              
669             # get the rela realtions associated with the propagation file if one exists
670 0           my $rela = ; chomp $rela;
  0            
671              
672             # if it does exist in then get N otherwise we got N already.
673 0           my $ninfo = $rela;
674 0 0         if($rela=~/RELA/) {
675 0           $ninfo = ; chomp $ninfo;
  0            
676             }
677             else {
678 0           $rela = "";
679             }
680              
681 0           $ninfo=~/N\s*\:\:\s*([0-9]+)/;
682 0           $configN = $1;
683              
684             # get the source and relations from config file or the defaults
685 0           my $configsab = $cuifinder->_getSabString();
686 0           my $configrel = $cuifinder->_getRelString();
687            
688             # check the source information is correct
689 0 0         if(! ($self->_checkParameters($configsab, $sab)) ) {
690 0           my $str = "SAB information ($sab) does not match the config file ($configsab).";
691 0           $errorhandler->_error($pkg, $function, $str, 5);
692             }
693            
694             # check that that the relation information is correct
695 0 0         if(! ($self->_checkParameters($configrel, $rel)) ) {
696 0           my $str = "REL information ($rel) does not match the config file ($configrel).";
697 0           $errorhandler->_error($pkg, $function, $str, 5);
698             }
699            
700             # check if rela information was used
701 0 0         if($rela ne "") {
702 0 0         if(!($self->_checkParameters($_, $cuifinder->_getRelaString()))) {
703 0           my $str = "RELA information does not match the config file ($_).";
704 0           $errorhandler->_error($pkg, $function, $str, 5);
705             }
706             }
707             # check that the relations used are acceptable for propagation
708             # the only acceptable relations are RB/RN and PAR/CHD
709 0 0         if(! ($self->_checkHierarchicalRelations ($configrel)) ) {
710 0           my $str = "REL information ($rel) contains relations other than RB/RN and PAR/CHD.";
711 0           $errorhandler->_error($pkg, $function, $str, 11);
712             }
713              
714             # obtain the frequency counts storing them in the frequency hash table
715 0           while() {
716 0           chomp;
717            
718             # if blank line more on
719 0 0         if($_=~/^\s*$/) { next; }
  0            
720              
721             # get the cui and its frequency count
722 0           my($cui, $freq) = split/<>/;
723            
724             # make certain that it is a cui and a frequency
725             # and if it is load it into the frequency hash
726 0 0 0       if( ($cui=~/C[0-9]/) && ($freq=~/[0-9]+/) ) {
727 0 0         if(exists $frequencyHash{$cui}) {
728 0           $frequencyHash{$cui} += $freq;
729             }
730             else {
731 0           $frequencyHash{$cui} = $freq;
732             }
733             }
734             }
735            
736 0           close(FILE);
737             }
738              
739             # this method obtains the CUIs in the sources which
740             # are going to be propagated
741             # input :
742             # output: $hash <- reference to hash containing the cuis
743             sub _getPropagationCuis {
744              
745 0     0     my $self = shift;
746            
747 0           my $function = "_getPropagationCuis";
748 0           &_debug($function);
749              
750             # check self
751 0 0 0       if(!defined $self || !ref $self) {
752 0           $errorhandler->_error($pkg, $function, "", 2);
753             }
754              
755             # return the reference to a hash
756 0           return $cuifinder->_getCuiList();
757             }
758              
759             # initialize the propgation hash
760             # input :
761             # output:
762             sub _initializePropagationHash {
763              
764 0     0     my $self = shift;
765              
766 0           my $function = "_initializePropagationHash";
767 0           &_debug($function);
768              
769             # check self
770 0 0 0       if(!defined $self || !ref $self) {
771 0           $errorhandler->_error($pkg, $function, "", 2);
772             }
773            
774             # clear out the hash just in case
775 0           my $hash = $self->_getPropagationCuis();
776            
777             # add the cuis to the propagation hash
778 0           foreach my $cui (sort keys %{$hash}) {
  0            
779 0           $propagationHash{$cui} = "";
780 0           $propagationFreq{$cui} = $smooth;
781             }
782             }
783              
784             # load the propagation frequency has with the frequency counts
785             # input : $hash <- reference to hash containing frequency counts
786             # output:
787             sub _loadPropagationFreq {
788              
789 0     0     my $self = shift;
790 0           my $fhash = shift;
791            
792 0           my $function = "_loadPropagationFreq";
793 0           &_debug($function);
794              
795             # check self
796 0 0 0       if(!defined $self || !ref $self) {
797 0           $errorhandler->_error($pkg, $function, "", 2);
798             }
799              
800             # loop through and set the frequency count
801 0           my $N = 0;
802 0           foreach my $cui (sort keys %{$fhash}) {
  0            
803 0 0         if($cui=~/^\s*$/) { next; }
  0            
804            
805 0           my $freq = ${$fhash}{$cui};
  0            
806 0 0         if(exists $propagationFreq{$cui}) {
807 0           $propagationFreq{$cui} += $freq;
808             }
809 0           $N = $N + $freq;
810             }
811            
812             # check if something has been set
813 0 0         if($smooth == 1) {
814 0           my $pkeys = keys %propagationFreq;
815 0           $N += $pkeys;
816             }
817            
818             # set N for the config file
819 0           $configN = $N;
820              
821             # loop through again and set the probability
822 0           foreach my $cui (sort keys %propagationFreq) {
823 0           $propagationFreq{$cui} = ($propagationFreq{$cui}) / $N;
824            
825             }
826             }
827              
828             # check that the parameters in config file match
829             # input : $string1 <- string containing parameter
830             # $string2 <- string containing configuratation parameter
831             # output: 0|1 <- true or false
832             sub _checkParameters {
833 0     0     my $self = shift;
834 0           my $string1 = shift;
835 0           my $string2 = shift;
836              
837 0           my $function = "_checkParameters";
838 0           &_debug($function);
839              
840 0 0 0       if( !(defined $string1) && !(defined $string2) ) { return 1; }
  0            
841 0 0 0       if( ($string1=~/^\s*$/) && ($string2=~/^\s*$/) ) { return 1; }
  0            
842 0 0 0       if( ($string1=~/^\s*$/) && !($string2=~/^\s*$/) ) { return 0; }
  0            
843 0 0 0       if( !($string1=~/^\s*$/) && ($string2=~/^\s*$/) ) { return 0; }
  0            
844              
845 0 0         if(!($string1=~/([A-Z]+) :: (include|exclude) (.*?)$/)) { return 0; }
  0            
846 0 0         if(!($string2=~/([A-Z]+) :: (include|exclude) (.*?)$/)) { return 0; }
  0            
847              
848 0           $string1=~/([A-Z]+) :: (include|exclude) (.*?)$/;
849 0           my $option1 = $1;
850 0           my $type1 = $2;
851 0           my $param1 = $3;
852            
853 0           $string2=~/([A-Z]+) :: (include|exclude) (.*?)$/;
854 0           my $option2 = $1;
855 0           my $type2 = $2;
856 0           my $param2 = $3;
857              
858 0 0         if($option1 ne $option2) { return 0; }
  0            
859 0 0         if($type1 ne $type2) { return 0; }
  0            
860              
861 0           my @array1 = split/\,/, $param1;
862 0           my @array2 = split/\,/, $param2;
863            
864              
865 0           my %hash = ();
866              
867 0           foreach my $element (@array1) { $element=~s/\s+//g; $hash{$element}++; }
  0            
  0            
868 0           foreach my $element (@array2) { $element=~s/\s+//g; $hash{$element}++; }
  0            
  0            
869            
870 0           foreach my $element (sort keys %hash) {
871 0 0         if($hash{$element} != 2) { return 0; }
  0            
872             }
873              
874 0           return 1;
875             }
876              
877             # check that the relations used are only RB/RN and/or PAR/CHD relations
878             # input : string <- contain the relation line from config file
879             # output: 1|0 <- indicating if the string contains relations other
880             # than RB/RN or PAR/CHD relations
881             sub _checkHierarchicalRelations {
882            
883 0     0     my $self = shift;
884 0           my $string = shift;
885              
886 0           my $function = "_checkHierarchicalRelations";
887 0           &_debug($function);
888              
889 0           $string=~/([A-Z]+) :: (include|exclude) (.*?)$/;
890 0           my $option = $1;
891 0           my $type = $2;
892 0           my $param = $3;
893            
894 0           $param=~s/\s+//g;
895              
896 0           my @rels = split/\s*\,\s*/, $param;
897            
898 0           foreach my $rel (@rels) {
899 0 0         if( !($rel=~/(PAR|CHD|RB|RN)/) ) {
900 0           return 0;
901             }
902             }
903            
904 0           return 1;
905             }
906              
907             # load the propagation hash
908             # input :
909             # output:
910             sub _loadPropagationHashFromFile {
911              
912 0     0     my $self = shift;
913            
914 0           my $function = "_loadPropagationHashFromFile";
915 0           &_debug($function);
916              
917             # check self
918 0 0 0       if(!defined $self || !ref $self) {
919 0           $errorhandler->_error($pkg, $function, "", 2);
920             }
921              
922             # open the propagation file
923 0 0         open(FILE, $propagationFile) || die "Could not open file $propagationFile\n";
924             # check if smoothing was set
925 0           my $psmooth = ;
926            
927             # get the source and relations associated with the propagation file
928 0           my $sab = ; chomp $sab;
  0            
929 0           my $rel = ; chomp $rel;
  0            
930              
931             # get the rela realtions associated with the propagation file if one exists
932 0           my $rela = ; chomp $rela;
  0            
933              
934             # if it does exist in then get N otherwise we got N already.
935 0           my $ninfo = $rela;
936 0 0         if($rela=~/RELA/) {
937 0           $ninfo = ; chomp $ninfo;
  0            
938             }
939             else {
940 0           $rela = "";
941             }
942              
943 0           $ninfo=~/N\s*\:\:\s*([0-9]+)/;
944 0           $configN = $1;
945              
946             # get the source and relations from config file or the defaults
947 0           my $configsab = $cuifinder->_getSabString();
948 0           my $configrel = $cuifinder->_getRelString();
949            
950             # check the source information is correct
951 0 0         if(! ($self->_checkParameters($configsab, $sab)) ) {
952 0           my $str = "SAB information ($sab) does not match the config file ($configsab).";
953 0           $errorhandler->_error($pkg, $function, $str, 5);
954             }
955            
956             # check that that the relation information is correct
957 0 0         if(! ($self->_checkParameters($configrel, $rel)) ) {
958 0           my $str = "REL information ($rel) does not match the config file ($configrel).";
959 0           $errorhandler->_error($pkg, $function, $str, 5);
960             }
961            
962             # check if rela information was used
963 0 0         if($rela ne "") {
964 0 0         if(!($self->_checkParameters($_, $cuifinder->_getRelaString()))) {
965 0           my $str = "RELA information does not match the config file ($_).";
966 0           $errorhandler->_error($pkg, $function, $str, 5);
967             }
968             }
969             # check that the relations used are acceptable for propagation
970             # the only acceptable relations are RB/RN and PAR/CHD
971 0 0         if(! ($self->_checkHierarchicalRelations ($configrel)) ) {
972 0           my $str = "REL information ($rel) contains relations other than RB/RN and PAR/CHD.";
973 0           $errorhandler->_error($pkg, $function, $str, 11);
974             }
975            
976 0           while() {
977 0           chomp;
978            
979             # if blank line move on
980 0 0         if($_=~/^\s*$/) { next; }
  0            
981            
982             # get the cui and its frequency count
983 0           my ($cui, $freq) = split/<>/;
984              
985             # load it into the propagation hash
986 0           $propagationHash{$cui} = $freq;
987             }
988             }
989              
990             # DEBUNKED FUNCTION? CHECK
991             # get the propagation count for a given cui
992             # input : $concept <- string containing the cui
993             # output: $double|-1 <- the propagation count otherwise
994             # a -1 if none existed for that cui
995             sub _getPropagationCount {
996              
997 0     0     my $self = shift;
998 0           my $concept = shift;
999              
1000 0           my $function = "_getPropagationCount";
1001 0           &_debug($function);
1002              
1003             # check self
1004 0 0 0       if(!defined $self || !ref $self) {
1005 0           $errorhandler->_error($pkg, $function, "", 2);
1006             }
1007              
1008             # check concept was obtained
1009 0 0         if(!$concept) {
1010 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
1011             }
1012            
1013             # check if valid concept
1014 0 0         if(! ($errorhandler->_validCui($concept)) ) {
1015 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
1016             }
1017              
1018             # propagate the counts
1019 0           $self->_propagateCounts();
1020              
1021             # if the concept exists in the propagation hash
1022             # return the probability otherwise return a -1
1023 0 0         if(exists $propagationHash{$concept}) {
1024 0           return $propagationHash{$concept};
1025             }
1026             else {
1027 0           return -1;
1028             }
1029              
1030             }
1031              
1032             # method which actually propagates the counts
1033             # input : $hash <- reference to the hash containing
1034             # the frequency counts
1035             # output:
1036             sub _propagateCounts {
1037              
1038 0     0     my $self = shift;
1039 0           my $fhash = shift;
1040            
1041 0           my $function = "_propagateCounts";
1042 0           &_debug($function);
1043              
1044             # check self
1045 0 0 0       if(!defined $self || !ref $self) {
1046 0           $errorhandler->_error($pkg, $function, "", 2);
1047             }
1048              
1049             # check the parameters
1050 0 0         if(!defined $fhash) {
1051 0           $errorhandler->_error($pkg, $function, "Input variable \%fhash not defined.", 4);
1052             }
1053              
1054             # initialize the propagation hash
1055 0           $self->_initializePropagationHash();
1056            
1057             # load the propagation frequency hash
1058 0           $self->_loadPropagationFreq($fhash);
1059            
1060             # propagate the counts
1061 0           my @array = ();
1062 0           $self->_propagation($root, \@array);
1063            
1064             # tally up the propagation counts
1065 0           $self->_tallyCounts();
1066              
1067 0           my $k = keys %propagationHash;
1068            
1069             # return the propagation counts
1070 0           return \%propagationHash;
1071             }
1072              
1073             # method that tallys up the probability counts of the
1074             # cui and its decendants and then calculates the ic
1075             # input :
1076             # output:
1077             sub _tallyCounts {
1078              
1079 0     0     my $self = shift;
1080            
1081 0           my $function = "_tallyCounts";
1082 0           &_debug($function);
1083              
1084             # check self
1085 0 0 0       if(!defined $self || !ref $self) {
1086 0           $errorhandler->_error($pkg, $function, "", 2);
1087             }
1088            
1089 0           foreach my $cui (sort keys %propagationHash) {
1090 0           my $set = $propagationHash{$cui};
1091 0           my $pcount = $propagationFreq{$cui};
1092            
1093 0 0         if(defined $set) {
1094 0           my %hash = ();
1095 0           while($set=~/(C[0-9][0-9][0-9][0-9][0-9][0-9][0-9])/g) {
1096 0           my $c = $1;
1097 0 0         if(! (exists $hash{$c}) ) {
1098 0           $pcount += $propagationFreq{$c};
1099 0           $hash{$c}++;
1100             }
1101             }
1102             }
1103 0           $propagationHash{$cui} = $pcount;
1104             }
1105             }
1106              
1107             # recursive method that acuatlly performs the propagation
1108             # input : $concept <- string containing the cui
1109             # $array <- reference to the array containing
1110             # the cui's decendants
1111             # output: $concept <- string containing the cui
1112             # $array <- reference to the array containing
1113             # the cui's decendants
1114             sub _propagation {
1115              
1116 0     0     my $self = shift;
1117 0           my $concept = shift;
1118 0           my $array = shift;
1119              
1120 0           my $function = "_propagation";
1121              
1122             # check self
1123 0 0 0       if(!defined $self || !ref $self) {
1124 0           $errorhandler->_error($pkg, $function, "", 2);
1125             }
1126            
1127             # check concept was obtained
1128 0 0         if(!$concept) {
1129 0           $errorhandler->_error($pkg, $function, "Error with input variable \$concept.", 4);
1130             }
1131            
1132             # check if valid concept
1133 0 0         if(! ($errorhandler->_validCui($concept)) ) {
1134 0           $errorhandler->_error($pkg, $function, "Concept ($concept) in not valid.", 6);
1135             }
1136            
1137             # if the concept is inactive
1138 0 0         if($cuifinder->_forbiddenConcept($concept)) { return; }
  0            
1139              
1140             # set up the new path
1141 0           my @intermediate = @{$array};
  0            
1142 0           push @intermediate, $concept;
1143 0           my $series = join " ", @intermediate;
1144              
1145             # initialize the set
1146 0           my $set = $propagationHash{$concept};
1147              
1148             # if the propagation hash already contains a list of CUIs it
1149             # is from its decendants so it has been here before so all we
1150             # have to do is return the list of ancestors with it added
1151 0 0         if(defined $set) {
1152 0 0         if(! ($set=~/^\s*$/)) {
1153 0           $set .= " $concept";
1154 0           return $set;
1155             }
1156             }
1157              
1158             # get all the children
1159 0           my $children = $cuifinder->_getChildren($concept);
1160              
1161             # search through the children
1162 0           foreach my $child (@{$children}) {
  0            
1163              
1164 0           my $flag = 0;
1165            
1166             # check that the concept is not one of the forbidden concepts
1167 0 0         if($cuifinder->_forbiddenConcept($child)) { $flag = 1; }
  0            
1168            
1169             # check if child cui has already in the path
1170 0           foreach my $cui (@intermediate) {
1171 0 0         if($cui eq $child) { $flag = 1; }
  0            
1172             }
1173            
1174             # if it isn't continue on with the depth first search
1175 0 0         if($flag == 0) {
1176 0           $set .= " ";
1177 0           $set .= $self->_propagation($child, \@intermediate);
1178             }
1179             }
1180            
1181             # remove duplicates from the set
1182 0           my $rset;
1183 0 0         if(defined $set) {
1184 0           $rset = _breduce($set);
1185             }
1186             # store the set in the propagation hash
1187 0           $propagationHash{$concept} = $rset;
1188            
1189             # add the concept to the set
1190 0           $rset .= " $concept";
1191            
1192             # return the set
1193 0           return $rset;
1194             }
1195              
1196             # removes duplicates in an array
1197             # input : $array <- reference to an array
1198             # output:
1199             sub _breduce {
1200            
1201 0     0     local($_)= @_;
1202 0           my (@words)= split;
1203 0           my (%newwords);
1204 0           for (@words) { $newwords{$_}=1 }
  0            
1205 0           join ' ', keys(%newwords);
1206             }
1207              
1208              
1209             1;
1210              
1211             __END__