File Coverage

blib/lib/SemMed/Interface/DataAccess.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2             #
3             # @File DataAccess.pm
4             # @Author andriy
5             # @Created Jun 27, 2016 3:27:32 PM
6             #
7              
8             package DataAccess;
9 1     1   298 use DBI;
  0            
  0            
10             use UMLS::Association;
11              
12              
13            
14              
15             my $errorhandling = "";
16             my $association = "";
17             my $dbh = "";
18              
19             # method to create a new SemRep::Interface object
20             # input : $SemMedLoginParams <- reference to hash containing SemMed login parameter
21             # $AssociationLoginParams <- reference to hash containing the UMLS::Association login parameters
22             # output:
23             sub new {
24             $class = shift;
25             my $self = {};
26             my $SemMedLoginParams = shift; #hash containing the SemMed login parameters
27             my $AssociationLoginParams = shift; #hash containing the UMLS::Association login parameters
28            
29             my $database = $SemMedLoginParams->{'database'};
30             my $hostname = $SemMedLoginParams->{'hostname'};
31             my $port = $SemMedLoginParams->{'port'};
32             my $userid = $SemMedLoginParams->{'username'};
33             my $password = $SemMedLoginParams->{'password'};
34            
35             my $dsn = "DBI:mysql:database=$database;host=$hostname;port=$port";
36            
37             $association = new UMLS::Association($AssociationLoginParams);
38             $errorhandling = UMLS::Association::ErrorHandler->new();
39            
40             $dbh = DBI->connect($dsn, $userid, $password ) or die $DBI::errstr;
41             $dbh->{InactiveDestroy} = 1; #allows forking of threads containing this DB connect
42            
43             bless $self, $class;
44             return $self;
45             }
46              
47              
48             #given a CUI, this method will return all Predicate-CUI connections leading from the given CUI
49              
50             #INPUT: SOURCE_CUI_OBJECT, WEIGHT_STATISTICAL_MEASURE(string) or 1
51             #OPTIONAL INPUT: , List of predicates to only include, List of Predicates to Ignore
52              
53             #OUTPUT: ARRAY OF PREDICATE objects whos source is SOURCE_CUI_OBJECT and destination is what was matched in DB.
54             sub getPredicateConnections {
55            
56             my $self = shift;
57            
58             my $cui = shift;
59             my $measure = shift;
60             my $includedPredicates = shift;
61             my $excludedPredicates = shift;
62             unless($measure){ #default to tscore
63             $measure = "tscore";
64             }
65            
66             my $cuiid = $cui->getId();
67            
68             my $queryString =
69             "SELECT s_cui, s_name, o_cui, o_name, predicate, $measure FROM SemMedDB.DISTINCT_PREDICATION_AGGREGATE
70             WHERE s_cui = '$cuiid'";
71            
72            
73             ### Will add in query parameters for removing certain predicate types
74             if($includedPredicates){
75             my $perm = " AND (";
76             foreach $pred ( @$includedPredicates){
77             $perm .= "predicate = '$pred' OR ";
78             }
79             substr($perm, -4) = ""; #remove extra OR
80             $perm .= ")";
81             $queryString .= $perm;
82             }
83            
84             if($excludedPredicates){
85             my $perm = " AND NOT (";
86             foreach $pred ( @$excludedPredicates){
87             $perm .= "predicate = '$pred' OR ";
88             }
89             substr($perm, -4) = "";
90             $perm .= ")";
91             $queryString .= $perm;
92             }
93              
94            
95            
96             my $sth = $dbh->prepare($queryString);
97             $sth->execute() or die $DBI::errstr;
98             my @edges = ();
99            
100            
101             while (my @row = $sth->fetchrow_array()) {
102             my $source = new CUI($row[0], $row[1]); #create source vertex
103             my $destination = new CUI($row[2], $row[3]); #create dest vertext
104            
105             my $weight;
106            
107             my $validConcepts = $errorhandling->_validCui($row[0]) && $errorhandling->_validCui($row[2]);
108            
109             if($measure == 1){ #no stats needed, we are just running a BFS
110             $weight = 1;
111             }else{
112            
113             #if value is cached in db, use it
114             if($row[5]){
115             $weight = $row[5];
116            
117             }else{
118             #obtain value from UMLS, then cache it
119             if($validConcepts){
120             # print "calculating stats $row[1] $row[3] \n";
121             $weight = $association->calculateStatistic($source->getId(), $destination->getId(), $measure);
122             }else{
123             $weight = -1;
124             }
125            
126            
127             my $updateString = "UPDATE SemMedDB.DISTINCT_PREDICATION_AGGREGATE SET $measure = $weight WHERE s_cui = '$row[0]' AND predicate = '$row[4]' AND o_cui = '$row[2]' LIMIT 1;";
128             my $update = $dbh->prepare($updateString);
129             $update->execute() or die $DBI::errstr;
130             }
131             #we need to calculate an edge weight, use the measure given to do so from UMLS::Association package
132            
133            
134            
135             }
136            
137             if($weight == -1){next;}#if no information was found, ignore this edge
138            
139             #The weight is now a statistic, a value further from 0 will indicate that the two words are highly associated
140             #Take the multiplicative reciprical of this so that highly associated words will correspond to lower edge weights
141             if($weight){ #make sure its not zero
142             $weight = abs(1/$weight);
143             }
144            
145            
146             my $predicate = new Predicate($source, $row[4], $destination, $weight); #create edge
147             push @edges, $predicate; #push to main array holding all edges
148             }
149             $sth->finish();
150            
151             return @edges;
152             }
153              
154             #
155             #OBTAIN CUI FROM CUI ID
156             #INPUT: CUI | PREFERRED NAME
157             #OUTPUT: CUI_OBJECT with fields complete.
158             sub getCUI{
159             my($self, $cui) = @_;
160             my $sth = $dbh->prepare("SELECT CUI, PREFERRED_NAME FROM CONCEPT
161             WHERE CUI = '$cui' OR PREFERRED_NAME = '$cui'
162             LIMIT 1");
163             $sth->execute() or die $DBI::errstr;
164             my @row = $sth->fetchrow_array();
165             return new CUI($row[0], $row[1]);
166             }
167              
168              
169             #
170             # OBTAIN SEMTYPE FROM CUI ID
171             #INPUT: CUI | PREFERRED NAME
172             #OUTPUT: Semantic Type Associated with that CUI or Term.
173              
174              
175             sub getSemtype{
176             my($self, $cui) = @_;
177             my $sth = $dbh->prepare(
178             "SELECT CS.SEMTYPE FROM CONCEPT C
179             JOIN CONCEPT_SEMTYPE CS ON (C.CONCEPT_ID = CS.CONCEPT_ID)
180             WHERE C.CUI = '$cui' OR C.PREFERRED_NAME LIKE '$cui'
181             LIMIT 1"
182             );
183             $sth->execute() or die $DBI::errstr;
184             return ($sth->fetchrow_array())[0];
185            
186             }
187             1;