File Coverage

blib/lib/UMLS/Association/Measures/SBC.pm
Criterion Covered Total %
statement 34 34 100.0
branch 2 2 100.0
condition n/a
subroutine 3 3 100.0
pod 0 1 0.0
total 39 40 97.5


line stmt bran cond sub pod time code
1             #UMLS::Association::Measures::SBC
2             # Computes the shared B to C set association (SBC) between two sets of terms
3             #
4             # SBC works by first finding the set of linking terms for the A terms
5             # and C terms to form sets B_A and B_C. It then finds the overlap
6             # between these sets, the set of shared B terms, B_S. It then finds
7             # the dirst association between sets B_S and C
8 1     1   4 use strict;
  1         1  
  1         18  
9 1     1   3 use warnings;
  1         4  
  1         167  
10              
11             package UMLS::Association::Measures::SBC;
12              
13             # Gets stats (n11,n1p,np1,npp) for each pairHash in the pairHashList
14             # using shared B to C association (SBC)
15             # Input:
16             # $pairHashListRef - ref to an array of pairHashes
17             # $matrixFileName - the fileName of the co-occurrence matrix
18             # $noOrder - 1 if order is enforced, 0 if not
19             # Output:
20             # \@statsList - ref to an array of \@stats, refs to arrays
21             # containing the ordered values: n11, n1p, np1, npp
22             # for each of the pair hashes. The index of the
23             # \@statsList corresponds to the index of the pairHash
24             # in the input $pairHashListRef
25             sub getStats {
26 4     4 0 3 my $pairHashListRef = shift;
27 4         4 my $matrixFileName = shift;
28 4         4 my $noOrder = shift;
29              
30             #convert the pairHashes to linking term pairHashes
31 4         12 my $linkingTermsPairHashListRef = &UMLS::Association::StatFinder::getLinkingTermsPairHashList(
32             $pairHashListRef, $matrixFileName, $noOrder);
33            
34             #find the overlapping linking terms, and set
35             # the pairHashes to shared B (overlapping linking terms)
36             # to C (original set 2 of the pair hash)
37 4         5 my @sharedBToCPairHashList = ();
38 4         5 my $start = time();
39 4         4 for (my $i = 0; $i < scalar @{$pairHashListRef}; $i++) {
  10         15  
40            
41             #grab terms from sets 1 and 2 of this pair hash
42 6         5 my %set1Terms = ();
43 6         7 foreach my $cui (@{${${$linkingTermsPairHashListRef}[$i]}{'set1'}}) {
  6         5  
  6         3  
  6         11  
44 16         15 $set1Terms{$cui} = 1;
45             }
46              
47             #find the overlapping B terms and save as an array
48 6         5 my @sharedBTerms = ();
49 6         6 foreach my $cui (@{${${$linkingTermsPairHashListRef}[$i]}{'set2'}}) {
  6         3  
  6         6  
  6         8  
50 20 100       23 if (exists $set1Terms{$cui}) {
51 10         11 push @sharedBTerms, $cui;
52             }
53             }
54              
55             #create and save the pair hash
56 6         5 my %pairHash = ();
57 6         9 $pairHash{'set1'} = \@sharedBTerms;
58 6         5 $pairHash{'set2'} = ${${$pairHashListRef}[$i]}{'set2'};
  6         5  
  6         8  
59 6         7 push @sharedBToCPairHashList, \%pairHash;
60             }
61              
62             #Compute and return the direct association for shared
63             # B to C set associations
64 4         7 return &UMLS::Association::Measures::Direct::getStats(\@sharedBToCPairHashList, $matrixFileName, $noOrder);
65             }
66              
67              
68             1;