| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#UMLS::Association::Measures::LTA |
|
2
|
|
|
|
|
|
|
# Computes the Linking Term Association (LTA) between two sets of terms. |
|
3
|
|
|
|
|
|
|
# |
|
4
|
|
|
|
|
|
|
# LTA works by first finding the sets of linking terms for the A terms |
|
5
|
|
|
|
|
|
|
# and C terms to form sets B_A and B_C. It then uses these sets to |
|
6
|
|
|
|
|
|
|
# compute N11 - the count of unique shared linking terms, N1P, the count |
|
7
|
|
|
|
|
|
|
# of unique terms in B_A, NP1, the count of unique terms in B_C, and NPP, |
|
8
|
|
|
|
|
|
|
# the total number of unique terms in the dataset (the vocabulary size). |
|
9
|
|
|
|
|
|
|
# The association is then found using these counts. |
|
10
|
1
|
|
|
1
|
|
4
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
19
|
|
|
11
|
1
|
|
|
1
|
|
3
|
use warnings; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
227
|
|
|
12
|
|
|
|
|
|
|
package UMLS::Association::Measures::LTA; |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
# Gets stats (n11,n1p,np1,npp) for each pairHash in the pairHashList |
|
15
|
|
|
|
|
|
|
# using linking term association (LTA) |
|
16
|
|
|
|
|
|
|
# Input: |
|
17
|
|
|
|
|
|
|
# $pairHashListRef - ref to an array of pairHashes |
|
18
|
|
|
|
|
|
|
# $matrixFileName - the fileName of the co-occurrence matrix |
|
19
|
|
|
|
|
|
|
# $noOrder - 1 if order is enforced, 0 if not |
|
20
|
|
|
|
|
|
|
# Output: |
|
21
|
|
|
|
|
|
|
# \@statsList - ref to an array of \@stats, refs to arrays |
|
22
|
|
|
|
|
|
|
# containing the ordered values: n11, n1p, np1, npp |
|
23
|
|
|
|
|
|
|
# for each of the pair hashes. The index of the |
|
24
|
|
|
|
|
|
|
# \@statsList corresponds to the index of the pairHash |
|
25
|
|
|
|
|
|
|
# in the input $pairHashListRef |
|
26
|
|
|
|
|
|
|
sub getStats { |
|
27
|
4
|
|
|
4
|
0
|
5
|
my $pairHashListRef = shift; |
|
28
|
4
|
|
|
|
|
4
|
my $matrixFileName = shift; |
|
29
|
4
|
|
|
|
|
4
|
my $noOrder = shift; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# get the linking term info |
|
32
|
4
|
|
|
|
|
6
|
my ($n1pRef, $np1Ref, $npp, $matrixRef, $linkingPairHashListRef) = &UMLS::Association::StatFinder::getLinkingTermsPairHashList($pairHashListRef, $matrixFileName, $noOrder, 1, 1); |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
#compute n11,n1p,np1, and npp for all pair hashes |
|
35
|
|
|
|
|
|
|
# and place into the statsList, a parallel array |
|
36
|
|
|
|
|
|
|
# of stats for that pair hash |
|
37
|
4
|
|
|
|
|
5
|
my @statsList = (); |
|
38
|
4
|
|
|
|
|
5
|
for (my $i = 0; $i < scalar @{$pairHashListRef}; $i++) { |
|
|
10
|
|
|
|
|
19
|
|
|
39
|
6
|
|
|
|
|
7
|
my $pairHashRef = ${$pairHashListRef}[$i]; |
|
|
6
|
|
|
|
|
6
|
|
|
40
|
6
|
|
|
|
|
6
|
my $linkingPairHashRef = ${$linkingPairHashListRef}[$i]; |
|
|
6
|
|
|
|
|
5
|
|
|
41
|
6
|
|
|
|
|
15
|
push @statsList, &_statsFromAllLinkingInfo($pairHashRef, $linkingPairHashRef, $npp); |
|
42
|
|
|
|
|
|
|
} |
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
#return the stats list, an array of array refs |
|
45
|
|
|
|
|
|
|
# each array ref contains four values: |
|
46
|
|
|
|
|
|
|
# n11, n1p, np1, and npp for the pair hash at |
|
47
|
|
|
|
|
|
|
# the corresponding index in the pairHashList |
|
48
|
4
|
|
|
|
|
18
|
return \@statsList; |
|
49
|
|
|
|
|
|
|
} |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
# Gets stats (n11,n1p,np1,npp) for a single pairHash using the |
|
53
|
|
|
|
|
|
|
# precomputed linkingPairHash (from StatFinder::getLinkingTermsPairHashList) |
|
54
|
|
|
|
|
|
|
# Input: |
|
55
|
|
|
|
|
|
|
# $pairHashListRef - ref to pairHash |
|
56
|
|
|
|
|
|
|
# $linkingPairHashRef - ref to the linking terms pair hash for this pairHash |
|
57
|
|
|
|
|
|
|
# $npp - npp for the subGraphRef |
|
58
|
|
|
|
|
|
|
# Output: |
|
59
|
|
|
|
|
|
|
# \@stats - ref to an array of (n11,n1p,np1,npp) |
|
60
|
|
|
|
|
|
|
sub _statsFromAllLinkingInfo { |
|
61
|
6
|
|
|
6
|
|
6
|
my $pairHashRef = shift; |
|
62
|
6
|
|
|
|
|
5
|
my $linkingPairHashRef = shift; |
|
63
|
6
|
|
|
|
|
6
|
my $npp = shift; |
|
64
|
|
|
|
|
|
|
|
|
65
|
|
|
|
|
|
|
################################## |
|
66
|
|
|
|
|
|
|
############## calculate n11 |
|
67
|
|
|
|
|
|
|
#find n11, the count of shared linking terms |
|
68
|
|
|
|
|
|
|
# NOTE: noorder is taken care of when constructing the linking set |
|
69
|
6
|
|
|
|
|
4
|
my $n11 = 0; |
|
70
|
|
|
|
|
|
|
#Find the B to C linking terms |
|
71
|
6
|
|
|
|
|
5
|
my %bToCLinkingTerms = (); |
|
72
|
6
|
|
|
|
|
5
|
foreach my $key (@{${$linkingPairHashRef}{'set2'}}) { |
|
|
6
|
|
|
|
|
6
|
|
|
|
6
|
|
|
|
|
7
|
|
|
73
|
20
|
|
|
|
|
21
|
$bToCLinkingTerms{$key} = 1; |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
#iterate over all A to B terms and increment for each |
|
76
|
|
|
|
|
|
|
# term that is also a B to C shared linking term |
|
77
|
6
|
|
|
|
|
5
|
foreach my $key (@{${$linkingPairHashRef}{'set1'}}) { |
|
|
6
|
|
|
|
|
5
|
|
|
|
6
|
|
|
|
|
6
|
|
|
78
|
16
|
100
|
|
|
|
17
|
if (defined $bToCLinkingTerms{$key}) { |
|
79
|
10
|
|
|
|
|
10
|
$n11++; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
} |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
################################## |
|
84
|
|
|
|
|
|
|
############## calculate n1p and np1 |
|
85
|
6
|
|
|
|
|
6
|
my $n1p = scalar @{${$linkingPairHashRef}{'set1'}}; |
|
|
6
|
|
|
|
|
6
|
|
|
|
6
|
|
|
|
|
6
|
|
|
86
|
6
|
|
|
|
|
6
|
my $np1 = scalar @{${$linkingPairHashRef}{'set2'}}; |
|
|
6
|
|
|
|
|
5
|
|
|
|
6
|
|
|
|
|
6
|
|
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
############################## |
|
89
|
|
|
|
|
|
|
#pack and return the stats |
|
90
|
6
|
|
|
|
|
7
|
my @stats = ($n11, $n1p, $np1, $npp); |
|
91
|
6
|
|
|
|
|
11
|
return \@stats; |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
1; |
|
95
|
|
|
|
|
|
|
|