File Coverage

blib/lib/SemMed/Interface/GraphTraversal.pm
Criterion Covered Total %
statement 13 15 86.6
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 18 20 90.0


line stmt bran cond sub pod time code
1             #!/usr/bin/perl
2             #
3             # @File GraphTraversal.pm
4             # @Author andriy
5             # @Created Jul 1, 2016 10:53:44 AM
6             #
7 1     1   5 use strict;
  1         1  
  1         21  
8 1     1   2 use warnings;
  1         1  
  1         21  
9 1     1   330 use SemMed::Interface::CUI;
  1         1  
  1         18  
10 1     1   368 use SemMed::Interface::Predicate;
  1         1  
  1         26  
11 1     1   332 use SemMed::Interface::DataAccess;
  0            
  0            
12             use Heap::Priority;
13             package GraphTraversal;
14              
15              
16              
17             my $conn = ""; #used for data access
18             sub new{
19             my $class = shift;
20             $conn = shift;
21             my $self = {};
22             bless $self, $class;
23             return $self;
24             }
25              
26              
27             #given a source CUI-ID and a destination CUI-ID this sub will return the destination CUI containing shortest path data or a
28             #-1 signifying no path was found. Path data in the returned CUI can be access through its methods (ie. $CUI->getPathLength())
29             #Utilizes Dijktras for finding shortest path between CUI's
30              
31             #INPUT: SOURCE_CUI(string), DESTINATION_CUI(String), WEIGHT_STATISTICAL_MEASURE(string)
32             #OPTIONAL INPUT: , List of predicates to only include, List of Predicates to Ignore
33              
34              
35             sub findShortestPath{
36             my $self = shift;
37             my $startCui = $conn->getCUI(shift); #ex. heart arrest C0018790
38             my $endCui = $conn->getCUI(shift); #ex. traffic accidents C0000932
39             my $statistic = shift;
40             my $includedPredicates = shift; #array reference to list of predicates to include
41             my $excludedPredicates = shift; #array reference to list of predicates to ignore
42              
43              
44              
45             my $currentVertex = $startCui; #starting vertex
46             $currentVertex-> setPathLength(0); #mark first vertex as reached
47              
48             my @edges = (); #this array contains all predicate connections found thus far
49              
50             my $fringe = new Heap::Priority; #this PriorityQueue contains all CUI under consideration for the next shortest path.
51             $fringe->lowest_first(); #set priority to the smallest element
52              
53             my @reached = (); #this array contains references to all CUI's that have already been reached.
54              
55             ## load initial set of predicate connections
56             my @query = $conn->getPredicateConnections($startCui, $statistic, $includedPredicates, $excludedPredicates);
57             foreach my $edge (@query){
58             push @edges, $edge;
59             }
60              
61              
62              
63             while($currentVertex->getId() ne $endCui->getId()){ #while we have not reached the vertex we're searching for
64             $currentVertex->_print();
65             push @reached, $currentVertex; #add current vertex to reached vertices
66             # $currentVertex->printPath();
67             foreach my $edge (@edges){
68             if($edge->getSource()->getId() eq $currentVertex->getId() ){
69              
70             my $destVertex = $edge->getDestination();
71              
72              
73             if( not(grep $_->getId() eq $destVertex->getId(), @reached) ){ #if destVertex has not been reached yet
74              
75             if($destVertex->getPathLength() == -1){
76             $destVertex->setPathLength( $currentVertex->getPathLength() + $edge->getWeight() ); #TODO implement own method
77             $destVertex->setPrevCUI($currentVertex); #save the vertex we arrived from
78             $destVertex->setPrevPredicate($edge -> getPredicate);
79             }
80             if($destVertex->getPathLength() >= ($currentVertex->getPathLength() + $edge->getWeight() ) ){ #TODO
81             $destVertex->setPathLength( $currentVertex->getPathLength() + $edge->getWeight() ); #TODO implement own method
82             $destVertex->setPrevCUI($currentVertex); #save the vertex we arrived fromcd
83             $destVertex->setPrevPredicate($edge -> getPredicate);
84             }
85              
86             # if(not(grep $_->getId() eq $destVertex->getId(), @fringe) ){
87             $fringe->add($destVertex, $destVertex->getPathLength());
88             #push onto the queue giving it a priority equal to its edge weight.
89             # }
90             }
91             }
92             }
93              
94              
95             if($fringe->count()==0){ #if fringe is empty,break
96             return -1;
97             }
98              
99             #set current vertex to CUI with smallest aggregate weight
100             $currentVertex = $fringe->pop();
101              
102             ##loads new set of edges from databa$gt = new GraphTraversal();se
103             my @newedges = $conn->getPredicateConnections($currentVertex, $statistic);
104             foreach my $edge (@newedges){
105             push @edges, $edge;
106             }
107              
108             }
109              
110             push @reached, $currentVertex; #push end cui onto reached as we have found it
111              
112             return $currentVertex;
113              
114             }
115              
116              
117             #finds a path between two given CUI's
118             #utilizes BFS
119              
120             #
121             #INPUT: SOURCE_CUI(string), DESTINATION_CUI(string)
122             #OPTIONAL INPUT: List of predicates to only include
123             #
124             #OUTPUT: PathLength from SOURCE_CUI to DESTINATION_CUI
125             #
126             #
127             sub findPath{
128             my $self = shift;
129             my $startCui = shift; #String containing the start cui
130             my $endCui = shift; #String containing the end cui
131             my $includedPredicates = shift; #array reference to list of predicates to include
132              
133              
134              
135              
136             #shift will return head of the queue
137             #push will add element to the queue
138             my @reachedCUI = (); #this array(treated as a queue) will contain the next node we want to go to
139             my @reachedLength = (); #parallel array to hold the length to each cui
140              
141             my $currentCUI = $startCui;
142             my $currentLength = 0;
143              
144             while($currentCUI ne $endCui){
145              
146              
147              
148             if($currentLength == 10){
149             return -1;
150             }
151              
152             my $adjacentedges = $conn->getConnections($currentCUI, $includedPredicates);
153             #push new vertices to end of queue
154             foreach my $edge (@{$adjacentedges}){
155             push @reachedCUI, @{$edge}[1];
156             push @reachedLength, ($currentLength + 1);
157             }
158             #pop next vertex from queue
159             $currentCUI = shift @reachedCUI;
160             $currentLength = shift @reachedLength;
161              
162              
163             }
164              
165             return $currentLength;
166              
167              
168             }
169              
170             #finds the aggregate path score between two given CUI's
171             #utilizes BFS
172              
173             #
174             #INPUT: SOURCE_CUI(string), DESTINATION_CUI(string)
175             #OPTIONAL INPUT: statistical measure, List of predicates to only include, List of Predicates to Ignore
176             #
177             #OUTPUT: Aggregate relatedness score(measure specified in parameters) from SOURCE_CUI to DESTINATION_CUI
178             #
179             #TODO
180             sub findPathScore{
181             my $self = shift;
182             my $startCui = shift; #String containing the start cui
183             my $endCui = shift; #String containing the end cui
184             my $measure = shift;
185             my $includedPredicates = shift; #array reference to list of predicates to include
186             my $excludedPredicates = shift; #array reference to list of predicates to ignore
187              
188              
189              
190              
191             #shift will return head of the queue
192             #push will add element to the queue
193             my @reachedCUI = (); #this array(treated as a queue) will contain the next node we want to go to
194             my @reachedScore = (); #parallel array to hold the score of each cui
195              
196             my $currentCUI = $startCui;
197             my $currentLength = 0;
198             my $iter = 0;
199             while($currentCUI ne $endCui){
200             $iter++;
201             if($iter % 1000 == 0){
202             # print STDERR "Buffered CUI's: ". scalar(@reachedCUI)." ==> $iter \n";
203             }
204              
205             #TODO add threshold
206             # if($currentLength == 10){
207             # return -1;
208             # }
209              
210             my @adjacentedges = $conn->getPredicateConnections($conn->getCUI($currentCUI), $measure, $includedPredicates, $excludedPredicates);
211              
212             #push new vertices to end of queue
213             foreach my $edge (@adjacentedges){
214             push @reachedCUI, $edge->getDestination()->getId();
215             push @reachedScore, ($currentLength + ($edge->getWeight()));
216             }
217              
218             #pop next vertex from queue
219             $currentCUI = shift @reachedCUI;
220             $currentLength = shift @reachedScore;
221              
222              
223             }
224              
225             return $currentLength;
226              
227              
228             }
229              
230              
231             sub findPathString{
232             my $self = shift;
233             my $startCui = shift; #String containing the start cui
234             my $endCui = shift; #String containing the end cui
235             my $measure = shift;
236             my $includedPredicates = shift; #array reference to list of predicates to include
237             my $excludedPredicates = shift; #array reference to list of predicates to ignore
238              
239             my @reachedCUI; #this array(treated as a queue) will contain the next node we want to go to
240             my @reachedString = (); #parallel array to hold the path string
241              
242             my $currentCUI = $startCui;
243             my $currentString = "$startCui ";
244              
245             while($currentCUI ne $endCui){
246              
247             my $adjacentedges = $conn->getConnections($currentCUI);
248              
249             #push new vertices to end of queue
250             foreach my $edge (@{$adjacentedges}){
251             push @reachedCUI, @{$edge}[1];
252             push @reachedString, ($currentString." ".@{$edge}[1]);
253             }
254              
255             #pop next vertex from queue
256             $currentCUI = shift @reachedCUI;
257             $currentString = shift @reachedString;
258              
259              
260             }
261              
262             return $currentString;
263              
264             }
265              
266             #Finds overlap in outgoing concepts from two given concepts
267             #
268             #input: cui, cui
269             #output: score <- integer denoting the number of overlapping concepts
270              
271             sub getOverlappingConcepts{
272             my $self = shift;
273             my $concept_one = shift;
274             my $concept_two = shift;
275             my $includedPredicates = shift;
276              
277             my $concept_one_breadth = $conn->getConnections($concept_one, $includedPredicates);
278             my $concept_two_breadth = $conn->getConnections($concept_two, $includedPredicates);
279              
280             my $overlapping_concepts = 0;
281             foreach my $edge (@{$concept_one_breadth}){
282             my $concept = @{$edge}[1];
283             foreach my $edge2 (@{$concept_two_breadth}){
284             if($concept eq @{$edge2}[1]){
285             $overlapping_concepts++;
286             last;
287             }
288             }
289             }
290             return $overlapping_concepts;
291              
292              
293             }
294              
295             # a random neighboor of a given cui
296              
297             sub getRandomNeighbor{
298             my $self = shift;
299             my $concept = shift;
300             my $includedPredicates = shift;
301             my @neighbors = @{$conn->getBidirectionalConnections($concept, $includedPredicates)};
302             my @randomConnection = @{$neighbors[rand @neighbors]};
303              
304             if($randomConnection[0] eq $concept){#our neighbor is the second element
305             return $randomConnection[1];
306             }else{
307             return $randomConnection[0];
308             }
309              
310             }
311              
312              
313              
314              
315             1;