File Coverage

blib/lib/MetaMap/DataStructures/Phrase.pm
Criterion Covered Total %
statement 145 150 96.6
branch 27 32 84.3
condition n/a
subroutine 10 10 100.0
pod 0 5 0.0
total 182 197 92.3


line stmt bran cond sub pod time code
1             # MetaMap::DataStructures::Phrase
2             # (Last Updated $Id: Phrase.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $)
3             #
4             # Perl module that provides a perl interface to the
5             # Unified Medical Language System (UMLS)
6             #
7             # Copyright (c) 2016
8             #
9             # Sam Henry, Virginia Commonwealth University
10             # henryst at vcu.edu
11             #
12             # Bridget T. McInnes, Virginia Commonwealth University
13             # btmcinnes at vcu.edu
14             #
15             # This program is free software; you can redistribute it and/or
16             # modify it under the terms of the GNU General Public License
17             # as published by the Free Software Foundation; either version 2
18             # of the License, or (at your option) any later version.
19             #
20             # This program is distributed in the hope that it will be useful,
21             # but WITHOUT ANY WARRANTY; without even the implied warranty of
22             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23             # GNU General Public License for more details.
24             #
25             # You should have received a copy of the GNU General Public License
26             # along with this program; if not, write to
27             #
28             # The Free Software Foundation, Inc.,
29             # 59 Temple Place - Suite 330,
30             # Boston, MA 02111-1307, USA.
31              
32             package MetaMap::DataStructures::Phrase;
33 1     1   3 use strict;
  1         1  
  1         20  
34 1     1   2 use warnings;
  1         2  
  1         16  
35              
36 1     1   331 use MetaMap::DataStructures::Token;
  1         1  
  1         23  
37 1     1   511 use MetaMap::DataStructures::Mapping;
  1         2  
  1         67  
38 1     1   400 use MetaMap::DataStructures::Concept;
  1         2  
  1         879  
39              
40             #----------------------------------------
41             # constructor
42             #----------------------------------------
43             # constructor method to create a new Phrase object. It is recommended to
44             # construct Phrase objects from createFromText, since it is easier and
45             # garauntees all data structures are created properly.
46             # input : $text <- the human-readable text of the Phrase
47             # \@mappings <- mapping objects of this Phrase
48             # \@orderedConceptList <- concepts object list in sequential order.
49             # Where there is disambiguation (and therefore
50             # multiple concepts mappings to a single term
51             # a vertical dimension is created. This is
52             # therefore an array of arrays, where each
53             # array contains 1 or more concept objects.
54             # \@concepts <- concept objects ordered as they are read in, not
55             # necassarily sequential.
56             # \@tokens <- tokens objects ordered as read in
57             # output: $self <- a new instance of a Phrase Object
58             sub new {
59             #create and bless self
60 134     134 0 101 my $class = shift;
61 134         113 my $self = {};
62 134         114 bless $self, $class;
63              
64             #grab variables
65 134         155 $self->{text} = shift;
66 134         110 $self->{mappings} = shift; #sequential mappings
67 134         104 $self->{orderedConceptList} = shift; #ordered 2-D array of concepts
68 134         113 $self->{concepts} = shift; #ordered as read in (not sequential)
69             #These are unique concept objects but necassarily unique CUIs
70 134         90 $self->{tokens} = shift;
71              
72 134         453 return $self;
73             }
74              
75             # method creates and returns a concept from text
76             # input : $inputText <- a MetaMap Prolog Machine Output Phrase block or
77             # equivalent.
78             # \@negatedCUIs <- a list of negated CUIs within the phrase, empty
79             # is ok.
80             # output: $self <- a new instance of a Phrase Object
81             sub createFromText {
82             #grab the input
83 134     134 0 124 my $inputText = shift;
84 134         88 my $negatedCUIsRef = shift;
85              
86             #grab the full text
87 134         836516 $inputText =~ /phrase\((.*),\[(.*)\],(\d+)\/\d+,\[.*\]\)\./;
88 134         257 my $text = $1; #the text of the phrase
89 134         142 my $syntaxText = $2; #text containing the syntactic info
90 134         143 my $phraseStartIndex = $3; #character number the phrase begins at
91              
92             #remove trailing and leading quotes
93 134 100       382 if ($text =~ m/'(.*)'/) {
94 117         157 $text = $1;
95             }
96              
97             #----- Token Creation -------------------------
98             #get each token text
99 134         1470 my @tokenTexts = split /(adv|aux|compl|conj|det|head|mod|modal|pastpart|prep|pron|punc|shapes|verb|not_in_lex)\(/, $syntaxText;
100 134         109 shift @tokenTexts; #shift empty position 0 off
101              
102             #loop through each token text
103 134         158 my @tokens = ();
104 134         242 for (my $i = 0; $i < scalar @tokenTexts; $i+=2) {
105              
106             #get the type and token text
107 463         411 my $type = $tokenTexts[$i];
108 463         365 my $tokenText = $tokenTexts[$i+1];
109              
110             #add a new token to the list of tokens
111 463         1101 push @tokens, &MetaMap::DataStructures::Token::createFromText(
112             $type.'('.$tokenText);
113             }
114              
115             #----- Mappings and Concept Creation --------------
116             #gets each mapping text and orders the concepts
117             # iterates through each mapping, and creates a new mapping
118             # for each mapping text. For each new concept that is found
119             # it compares it with existing ordered concepts and sees if it
120             # should add it to a 2-D array of ordered concepts. The second
121             # dimension of the orderedConcepts array indicates multiple
122             # mappings for the token at that index
123             #get each mapping text
124 134         647 $inputText =~ m/mappings\(\[(.*)\)\./;
125 134         743 my @mappingsTexts = split /map\(-/, $1;
126             #remove the first element (text matched before 'map'
127 134         102 shift @mappingsTexts;
128            
129             #loop through each mapping text
130 134         154 my @orderedConcepts = (); #an array of arrays of ordered concepts.
131             # Where multiple mappings occur a new vertical dimension is added
132 134         101 my @mappings = ();
133 134         79 my @concepts = (); #an array of unique concepts
134 134         146 foreach my $mappingText(@mappingsTexts) {
135             #add the map back on (from the split)
136 488         897 $mappingText = 'map(-'.$mappingText;
137              
138             #grab the mapping score
139 488         1332 $mappingText =~ m/map\((-?\d+)/;
140 488         578 my $mappingScore = $1;
141              
142             #grab the concepts associated with the mapping
143 488         1312 my @conceptTexts = split /ev\(-/, $mappingText;
144 488         333 shift @conceptTexts; #shift off the leading text
145              
146             #loop through each concept
147 488         471 my @mappingConcepts = (); # = a list of concepts by mapping
148             # (as they appear when read in)
149 488         422 foreach my $conceptText(@conceptTexts) {
150              
151             #create the concept (put ev back on the front)
152 1583         3760 my $newConcept = &MetaMap::DataStructures::Concept::createFromText(
153             'ev(-'.$conceptText, \@tokens);
154              
155             #see where to place this concept within the context of the
156             # ordered concepts also see if the concept already exists
157             # (mappings repeat concepts) if it exists use the instance of
158             # that concept (oldConcept) in this mapping
159 1583         1232 my $conceptExists = 0;
160 1583         895 my $conceptIndex = -1;
161 1583         2246 for (my $i = 0; $i < scalar @orderedConcepts; $i++) {
162 3409         2266 for (my $j = 0; $j < scalar @{ $orderedConcepts[$i] }; $j++) {
  7878         10018  
163             #grab the existing concept
164 5697         3595 my $oldConcept = $orderedConcepts[$i][$j];
165            
166             #check if an instance of the concept already exists
167             # or if it is a new mapping of an existing concept
168 5697 100       7766 if ($newConcept->equals($oldConcept)) {
    100          
169             #an instance of this concept alread exists, stop looping
170 1228         874 $conceptExists = 1;
171 1228         871 $newConcept = $oldConcept;
172 1228         2406 last;
173             }
174             elsif ($newConcept->mapsToSameTokens($oldConcept)) {
175             #this concept maps to the same tokens as an existing
176             # concept record index, so must continue checking
177             # though to see if the concept already exists
178 1469         1348 $conceptIndex = $i;
179             }
180              
181             } #end inner ordered concept loop
182            
183             #check if you can quit early
184 3409 100       4249 if ($conceptExists) {
185 1228         843 last; #an instance of this concept already exists, done
186             }
187 2181 100       4004 if ($conceptIndex >= 0) {
188             #this concept maps to the same token as an existing
189             # concept, your done
190 136         113 last;
191             }
192             } #end outer ordered concept loop
193              
194             #done searching through existing concepts, update data structures
195 1583 100       2044 if (!$conceptExists) {
196             #update unordered concepts
197 355         301 push @concepts, $newConcept;
198            
199 355 100       373 if ($conceptIndex >= 0) {
200             #add to existing CUI list
201 136         82 push @{ $orderedConcepts[$conceptIndex] }, $newConcept;
  136         166  
202             }
203             else {
204             #create a new CUI list
205             #new concept, append ordered concepts with a new array
206 219         286 my @newArray = [ $newConcept ];
207 219         188 push @orderedConcepts, @newArray;
208            
209 219         259 my $index = (scalar @orderedConcepts)-1;
210             }
211             }
212              
213             #add the concept to the list of mapping concepts
214 1583         1896 push @mappingConcepts, $newConcept;
215              
216             }#end concept text loop
217              
218             #create and save the new mapping
219 488         979 push @mappings, MetaMap::DataStructures::Mapping->new(
220             $mappingScore, \@mappingConcepts);
221             }
222              
223             #----- Concept Negation ---------------------------
224             #TODO possible problem with negations. If a single phrase contains
225             # the same CUI multiple times how do I distinguish between the negated
226             # and non-negated one?
227 134         220 for (my $i = 0; $i < scalar @orderedConcepts; $i++) {
228 219         158 for (my $j = 0; $j < scalar @{ $orderedConcepts[$i] }; $j++) {
  574         923  
229 355         236 my $concept = $orderedConcepts[$i][$j];
230              
231             #see if it is negated
232 355         182 foreach my $negatedCUI(@{$negatedCUIsRef}) {
  355         370  
233 0 0       0 if ($concept->{cui} eq $negatedCUI) {
234 0         0 $concept->{isNegated} = 1;
235             }
236             }
237             }
238             }
239              
240             #create and return the new phrase
241 134         228 return MetaMap::DataStructures::Phrase->new(
242             $text, \@mappings, \@orderedConcepts, \@concepts, \@tokens);
243             }
244              
245              
246             #----------------------------------------
247             # methods
248             #----------------------------------------
249             # method summarizes this phrase as a string
250             # input : -
251             # output: $string <- a string describing $self
252             sub toString {
253 28     28 0 20 my $self = shift;
254            
255             #create head
256 28         17 my $string = "Phrase:\n";
257 28         34 $string .= " $self->{text}\n";
258              
259             #add each token text
260 28         18 $string .= " tokens:*";
261 28         17 foreach my $token(@{$self->{tokens}}) {
  28         38  
262 104         105 $string .= $token->{text}."*";
263             }
264 28         19 $string .= "\n";
265            
266             #add each concept text
267 28         22 $string .= " concepts:*";
268 28         10 foreach my $concept(@{$self->{concepts}}) {
  28         30  
269 80         82 $string .= $concept->{text}."*";
270             }
271 28         23 $string .= "\n";
272            
273             #add each mapping to the string
274 28         22 $string .= " mappings:\n";
275 28         13 foreach my $mapping(@{$self->{mappings}}) {
  28         29  
276 91         128 $string .= " ".$mapping->toString()."\n";
277             }
278              
279 28         191 return $string;
280             }
281              
282             # method compares this phrase to another and returns 1 if the two
283             # contain identical information
284             # input : $other <- the Phrase object to compare against
285             # output: boolean <- 1 if $self and $other are equivalent (equivalent texts,
286             # mappings, concepts, and tokens), else 0
287             sub equals {
288             #grab input
289 758     758 0 466 my $self = shift;
290 758         429 my $other = shift;
291              
292             #compare texts
293 758 100       917 if ($self->{text} ne $other->{text}) {
294 702         1193 return 0;
295             }
296              
297             #compare mappings
298 56         38 foreach my $mappingA(@{$self->{mappings}}){
  56         68  
299              
300             #check each mapping in B
301 182         155 my $match = 0;
302 182         118 foreach my $mappingB(@{$other->{mappings}}) {
  182         180  
303 2078 100       2672 if ($mappingA->equals($mappingB)) {
304 182         109 $match = 1;
305 182         132 last;
306             }
307             }
308              
309             #mappingA has no equivalent mapping in $other
310             # so phrases are not identical
311 182 50       277 if ($match < 1) {
312 0         0 return 0;
313             }
314             }
315              
316             #compare Concepts
317 56         38 foreach my $conceptA(@{$self->{concepts}}){
  56         62  
318              
319             #check each concept in B
320 160         112 my $match = 0;
321 160         90 foreach my $conceptB(@{$other->{concepts}}) {
  160         151  
322 510 100       631 if ($conceptA->equals($conceptB)) {
323 160         106 $match = 1;
324 160         101 last;
325             }
326             }
327              
328             #conceptA has no equivalent concept in $other
329             # so phrases are not identical
330 160 50       213 if ($match < 1) {
331 0         0 return 0;
332             }
333             }
334              
335             #compare Tokens
336 56         44 foreach my $tokenA(@{$self->{tokens}}){
  56         66  
337              
338             #check each token in B
339 208         137 my $match = 0;
340 208         125 foreach my $tokenB(@{$other->{tokens}}) {
  208         183  
341 586 100       732 if ($tokenA->equals($tokenB)) {
342 208         109 $match = 1;
343 208         151 last;
344             }
345             }
346              
347             #tokenA has no equivalent mapping in $other
348             # so phrases are not identical
349 208 50       273 if ($match < 1) {
350 0         0 return 0;
351             }
352             }
353              
354             #all fields are equivalent, return true
355 56         109 return 1;
356             }
357              
358             # method determines if this phrase contains the CUI provided as input
359             # returns 1 if this phrase contains the CUI, else 0
360             # input : $cui <- a string CUI code
361             # output: boolean <- 1 if any $self contains the $cui
362             sub contains {
363             #grab input
364 29     29 0 19 my $self = shift;
365 29         16 my $cui = shift;
366              
367             #check concept to see if it is the CUI
368 29         22 my $containsCUI = 0;
369 29         11 foreach my $concept(@{$self->{concepts}}) {
  29         33  
370 81 100       119 if ($concept->{cui} eq $cui) {
371 1         2 $containsCUI = 1;
372 1         1 last;
373             }
374             }
375            
376             #return the result
377 29         58 return $containsCUI;
378             }
379              
380             1;
381              
382             __END__