File Coverage

blib/lib/UMLS/Association/Measures/Direct.pm
Criterion Covered Total %
statement 119 119 100.0
branch 24 24 100.0
condition 9 9 100.0
subroutine 5 5 100.0
pod 0 1 0.0
total 157 158 99.3


line stmt bran cond sub pod time code
1             #UMLS::Association::Measures::Direct
2             # Computes the association between two sets of terms
3             # using Direct association, which is the association
4             # between sets A and C using direct co-occurrences
5 1     1   12 use strict;
  1         1  
  1         18  
6 1     1   3 use warnings;
  1         1  
  1         546  
7              
8             package UMLS::Association::Measures::Direct;
9              
10              
11             # Gets stats (n11,n1p,np1,npp) for each pairHash in the pairHashList
12             # using direct association
13             # Input:
14             # $pairHashListRef - ref to an array of pairHashes
15             # $matrixFileName - the fileName of the co-occurrence matrix
16             # $noOrder - 1 if order is enforced, 0 if not
17             # Output:
18             # \@statsList - ref to an array of \@stats, refs to arrays
19             # containing the ordered values: n11, n1p, np1, npp
20             # for each of the pair hashes. The index of the
21             # \@statsList corresponds to the index of the pairHash
22             # in the input $pairHashListRef
23             sub getStats {
24 14     14 0 16 my $pairHashListRef = shift;
25 14         14 my $matrixFileName = shift;
26 14         12 my $noOrder = shift;
27              
28             #read in the matrix of all values needed for all
29             # pair sets in the pair hash list
30 14         21 my ($matrixRef, $vocabSize) = &UMLS::Association::StatFinder::readInMatrix($pairHashListRef, $matrixFileName);
31              
32             #compute n1p,np1, and npp for all values
33 14         22 my ($n1pRef, $np1Ref, $npp) = &_getAllCounts($matrixRef);
34              
35             #compute n11,n1p,np1,npp for all pair hashes
36             # and place into the statsList, a parallel array
37             # of stats for that pair hash
38 14         17 my @statsList = ();
39 14         13 foreach my $pairHashRef (@{$pairHashListRef}) {
  14         15  
40 21         28 push @statsList, &_statsFromAllCounts($matrixRef, $n1pRef, $np1Ref, $npp, $noOrder, $pairHashRef);
41             }
42              
43             #return the stats list, an array of array refs
44             # each array ref contains four values:
45             # n11, n1p, np1, and npp for the pair hash at
46             # the corresponding index in the pairHashList
47 14         68 return \@statsList;
48             }
49              
50              
51             # Computes n1p, np1, and npp for every CUI in the subgraph
52             # Input:
53             # $subGraphRef - ref to the subgraph or matrix read in
54             # Output:
55             # \%n1p - ref to a hash{$cui}=n1p for that cui, order enforced
56             # \%np1 - ref to a hash{$cui}=np1 for that cui, order enforced
57             # $npp - npp for the subGraphRef
58             sub _getAllCounts {
59 16     16   17 my $subGraphRef = shift;
60              
61             #find stats by iterating over all keys
62 16         14 my %n1p = ();
63 16         23 my %np1 = ();
64 16         15 my $npp = 0;
65 16         15 foreach my $key1 (keys %{$subGraphRef}) {
  16         40  
66 101         82 foreach my $key2 (keys %{${$subGraphRef}{$key1}}) {
  101         79  
  101         164  
67             #grab the value from the sub graph
68 146         120 my $value = ${${$subGraphRef}{$key1}}{$key2};
  146         113  
  146         148  
69            
70 146         166 $n1p{$key1} += $value;
71 146         131 $np1{$key2} += $value;
72 146         151 $npp += $value;
73             }
74             }
75              
76 16         36 return \%n1p, \%np1, $npp;
77             }
78              
79             # Computes n11, n1p, np1,and npp for the pairHash using
80             # the allCounts calculated from the _getAllCounts function
81             # Input:
82             # $subGraphRef - ref to the subgraph or matrix read in
83             # $n1pRef - ref to a hash{$cui}=n1p for that cui, order enforced
84             # $np1Ref - ref to a hash{$cui}=np1 for that cui, order enforced
85             # $npp - npp for the subGraphRef
86             # $noOrder - 0 if order is enforced, 1 if not
87             # $pairHashRef - ref to a pairHash
88             # Output:
89             # \@stats - ref to an array of (n11,n1p,np1,npp)
90             sub _statsFromAllCounts {
91 23     23   20 my $subGraphRef = shift;
92 23         22 my $n1pRef = shift;
93 23         17 my $np1Ref = shift;
94 23         22 my $npp = shift;
95 23         22 my $noOrder = shift;
96 23         19 my $pairHashRef = shift;
97            
98              
99             #NOTE: finding N11 is the bottleneck, but I don't think there is much I can do about it
100             #find stats by iterating over all keys
101             ############ calculate n11
102 23         21 my $n11 = 0;
103 23         20 foreach my $key1 (@{${$pairHashRef}{'set1'}}) {
  23         22  
  23         25  
104 48         42 foreach my $key2 (@{${$pairHashRef}{'set2'}}) {
  48         60  
  48         53  
105 99 100       92 if (defined ${${$subGraphRef}{$key1}}{$key2}) {
  99         84  
  99         130  
106 33         28 $n11 += ${${$subGraphRef}{$key1}}{$key2};
  33         28  
  33         36  
107             }
108 99 100 100     126 if ($noOrder && defined ${${$subGraphRef}{$key2}}{$key1}) {
  51         43  
  51         90  
109 10         7 $n11 += ${${$subGraphRef}{$key2}}{$key1};
  10         8  
  10         15  
110             }
111             }
112             }
113             #remove noorder double counts (nodes pointing at themselves)
114 23 100       27 if ($noOrder) {
115 12         13 foreach my $key (@{${$pairHashRef}{'set1'}}) {
  12         13  
  12         12  
116 26 100       23 if (exists ${${$subGraphRef}{$key}}{$key}) {
  26         16  
  26         42  
117 3         2 $n11 -= ${${$subGraphRef}{$key}}{$key};
  3         2  
  3         4  
118             }
119             }
120             }
121            
122             ##################################
123             ############## calculate n1p
124 23         22 my $n1p = 0;
125 23         18 foreach my $key1 (@{${$pairHashRef}{'set1'}}) {
  23         16  
  23         27  
126             #calculate n1p
127 48 100       39 if (defined ${$n1pRef}{$key1}) {
  48         63  
128 42         41 $n1p += ${$n1pRef}{$key1};
  42         51  
129             }
130 48 100 100     65 if ($noOrder && defined ${$np1Ref}{$key1}) {
  26         43  
131 21         19 $n1p += ${$np1Ref}{$key1};
  21         22  
132             }
133             }
134             #remove noorder double counts
135 23 100       27 if ($noOrder) {
136 12         10 foreach my $key1 (@{${$pairHashRef}{'set1'}}) {
  12         9  
  12         15  
137 26         25 foreach my $key2 (@{${$pairHashRef}{'set1'}}) {
  26         20  
  26         29  
138 66 100       56 if (defined ${${$subGraphRef}{$key1}}{$key2}) {
  66         52  
  66         84  
139 7         6 $n1p -= ${${$subGraphRef}{$key1}}{$key2};
  7         5  
  7         9  
140             }
141             }
142             }
143             }
144              
145             #####################################
146             ############## #calculate np1
147 23         22 my $np1 = 0;
148 23         19 foreach my $key2 (@{${$pairHashRef}{'set2'}}) {
  23         18  
  23         27  
149             #calculate np1
150 44 100       42 if (defined ${$np1Ref}{$key2}) {
  44         50  
151 37         33 $np1 += ${$np1Ref}{$key2};
  37         35  
152             }
153 44 100 100     51 if ($noOrder && defined ${$n1pRef}{$key2}) {
  22         32  
154 19         17 $np1 += ${$n1pRef}{$key2};
  19         20  
155             }
156             }
157             #remove noorder double counts
158 23 100       27 if ($noOrder) {
159 12         11 foreach my $key1 (@{${$pairHashRef}{'set2'}}) {
  12         9  
  12         15  
160 22         20 foreach my $key2 (@{${$pairHashRef}{'set2'}}) {
  22         19  
  22         24  
161 52 100       39 if (defined ${${$subGraphRef}{$key1}}{$key2}) {
  52         44  
  52         68  
162 9         7 $np1 -= ${${$subGraphRef}{$key1}}{$key2};
  9         6  
  9         12  
163             }
164             }
165             }
166             }
167             ##############################
168            
169             #pack and return the stats
170 23         33 my @stats = ($n11, $n1p, $np1, $npp);
171 23         44 return \@stats;
172             }
173              
174             1;