File Coverage

blib/lib/MetaMap/DataStructures/Utterance.pm
Criterion Covered Total %
statement 93 116 80.1
branch 7 12 58.3
condition 1 3 33.3
subroutine 12 13 92.3
pod 0 10 0.0
total 113 154 73.3


line stmt bran cond sub pod time code
1             # MetaMap::DataStructures::Utterance
2             # (Last Updated $Id: Utterance.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $)
3             #
4             # Perl module that provides a perl interface to the
5             # Unified Medical Language System (UMLS)
6             #
7             # Copyright (c) 2016
8             #
9             # Sam Henry, Virginia Commonwealth University
10             # henryst at vcu.edu
11             #
12             # Bridget T. McInnes, Virginia Commonwealth University
13             # btmcinnes at vcu.edu
14             #
15             # This program is free software; you can redistribute it and/or
16             # modify it under the terms of the GNU General Public License
17             # as published by the Free Software Foundation; either version 2
18             # of the License, or (at your option) any later version.
19             #
20             # This program is distributed in the hope that it will be useful,
21             # but WITHOUT ANY WARRANTY; without even the implied warranty of
22             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23             # GNU General Public License for more details.
24             #
25             # You should have received a copy of the GNU General Public License
26             # along with this program; if not, write to
27             #
28             # The Free Software Foundation, Inc.,
29             # 59 Temple Place - Suite 330,
30             # Boston, MA 02111-1307, USA.
31              
32             package MetaMap::DataStructures::Utterance;
33              
34 1     1   3 use strict;
  1         1  
  1         20  
35 1     1   3 use warnings;
  1         1  
  1         16  
36              
37 1     1   345 use MetaMap::DataStructures::Phrase;
  1         3  
  1         845  
38              
39             #----------------------------------------
40             # constructors
41             #----------------------------------------
42             # constructor method to create a new Utterance object
43             # input : $inputText <- a MetaMap Prolog Output utterance block
44             # (or equivalent)
45             # $id <- the id of this Utterance of the form: (ab:ti).([\d]+).([\d]+)
46             # (e.g. ab.00000.1)
47             # $text <- the human readable text of this utterance
48             # \@phrases <- an ordered list of phrase objects
49             # output: $self <- an instance of an Utterance object
50             sub new {
51             #create and bless self
52 7     7 0 11 my $class = shift;
53 7         9 my $self = {};
54 7         13 bless $self, $class;
55              
56             #grab input
57 7         24 $self->{inputText} = shift;
58 7         15 $self->{id} = shift;
59 7         13 $self->{text} = shift;
60 7         8 $self->{phrases} = shift;
61            
62 7         69 return $self;
63             }
64              
65             # method creates and returns an utterance from text
66             # (MetaMap Prolog Machine Output Utterance Block)
67             # input : $inputText <- a MetaMap Prolog Output utterance block (or equivalent)
68             # output: $self <- an instance of an Utterance object
69             sub createFromText {
70             #grab the input
71 7     7 0 11 my $self = shift;
72 7         8 my $inputText = shift;
73            
74             #grab negated CUIs
75 7         260 $inputText =~ m/neg_list\((.*)\)./;
76 7         285 my $negationsText = $1;
77 7         12 my @negatedCUIs = ();
78 7 50       17 if (defined $negationsText) {
79 7         182 while ($negationsText =~
80             m/negation\(\w+,[^\[\]]*,\[\d+\/\d+\],\['(C\d+)':/g) {
81 0         0 push @negatedCUIs, $1;
82             }
83             }
84              
85             #grab the id and text
86 7         906 $inputText =~ /utterance\('(.*)',"(.*)",/;
87 7         143 my $id = $1;
88 7         11 my $text = $2;
89              
90             #create the phrases list
91 7         344 my @phraseTexts = split /phrase\(/, $inputText;
92             #shift the first part off (its the part before the first phrase match
93 7         8 shift @phraseTexts;
94              
95             #create a phrase from the phrase texts (and collect the concepts)
96 7         10 my @phrases = ();
97 7         9 foreach my $phraseText(@phraseTexts) {
98             #put 'phrase(' back on
99 134         494 $phraseText = 'phrase('.$phraseText;
100             #create a new phrase from text
101 134         279 my $newPhrase = &MetaMap::DataStructures::Phrase::createFromText(
102             $phraseText, \@negatedCUIs);
103 134         225 push @phrases, $newPhrase;
104             }
105              
106             #create and return the new utterance
107 7         35 return MetaMap::DataStructures::Utterance->new(
108             $inputText, $id, $text, \@phrases);
109             }
110              
111             # method creates and returns an utterance from text
112             # (MetaMap Prolog Machine Output Utterance Block), and uses a custom $id.
113             # This is useful when the $input text has a non-properly formatted $id
114             # (e.g. tx.0000000.1)
115             # input : $inputText <- a MetaMap Prolog Output utterance block
116             # (or equivalent)
117             # $id <- the id to associate with this Utterance. It overrides any id
118             # found within $inputText. $id should be of the form:
119             # (ab:ti).([\d]+).([\d]+) (e.g. ab.00000.1)
120             # output: $self <- an instance of an Utterance Object
121             sub createFromTextWithId {
122 0     0 0 0 my $self = shift;
123              
124             #grab the input
125 0         0 my $inputText = shift;
126 0         0 my $id = shift;
127            
128             #grab negated CUIs
129 0         0 $inputText =~ m/neg_list\((.*)\)./;
130 0         0 my $negationsText = $1;
131 0         0 my @negatedCUIs = ();
132 0 0       0 if (defined $negationsText) {
133 0         0 while ($negationsText =~
134             m/negation\(\w+,[^\[\]]*,\[\d+\/\d+\],\['(C\d+)':/g) {
135 0         0 push @negatedCUIs, $1;
136             }
137             }
138              
139             #grab the id and text
140 0         0 $inputText =~ /utterance\('(.*)',"(.*)",/;
141 0         0 my $aid = $1;
142 0         0 my $text = $2;
143              
144             #create the phrases list
145 0         0 my @phraseTexts = split /phrase\(/, $inputText;
146             #shift the first part off (its the part before the first phrase match
147 0         0 shift @phraseTexts;
148              
149             #create a phrase from the phrase texts (and collect the concepts)
150 0         0 my @phrases = ();
151 0         0 foreach my $phraseText(@phraseTexts) {
152             #put 'phrase(' back on
153 0         0 $phraseText = 'phrase('.$phraseText;
154             #create a new phrase from text
155 0         0 my $newPhrase = &MetaMap::DataStructures::Phrase::createFromText(
156             $phraseText, \@negatedCUIs);
157 0         0 push @phrases, $newPhrase;
158             }
159              
160             #create and return the new utterance
161 0         0 return MetaMap::DataStructures::Utterance->new(
162             $inputText, $id, $text, \@phrases);
163             }
164              
165             #----------------------------------------
166             # methods
167             #----------------------------------------
168             # method summarizes this utterance as a string
169             # input : -
170             # output: $string <- a string describing $self
171             sub toString {
172 1     1 0 8 my $self = shift;
173              
174 1         2 my $string = "utterance:\n";
175 1         103 $string .= " $self->{id}\n";
176 1         6 $string .= " $self->{text}\n";
177            
178             #add each phrase to the string
179 1         3 foreach my $phrase(@{$self->{phrases}}) {
  1         3  
180 28         45 $string .= " ".$phrase->toString()."\n";
181             }
182            
183 1         359 return $string;
184             }
185              
186             # method compares this utterance to another and returns 1 if the two
187             # contain identical information
188             # input : $other <- the utterrance object to compare against
189             # output: boolean <- 1 if $self and $other are equivalent (contain equivalent
190             # IDs, and phrases), else 0
191             sub equals {
192             #grab input
193 2     2 0 2 my $self = shift;
194 2         3 my $other = shift;
195              
196             #compare id's and text
197 2 50 33     15 if ($self->{id} ne $other->{id}
198             || $self->{text} ne $other->{text}) {
199 0         0 return 0;
200             }
201              
202             #compare Utterances
203 2         2 foreach my $phraseA(@{$self->{phrases}}){
  2         4  
204              
205             #check each utterance in B
206 56         41 my $match = 0;
207 56         36 foreach my $phraseB(@{$other->{phrases}}) {
  56         63  
208 758 100       941 if ($phraseA->equals($phraseB)) {
209 56         34 $match = 1;
210 56         41 last;
211             }
212             }
213              
214             #utteranceA has no equivalent phrase in $other
215             # so utterances are not identical
216 56 50       86 if ($match < 1) {
217 0         0 return 0;
218             }
219             }
220              
221             #all tests passed, return true
222 2         10 return 1;
223             }
224              
225             # method determines if this utterance contains the CUI provided as input
226             # returns 1 if this utterance contains the CUI, else 0
227             # input : $cui <- a string CUI code
228             # output: boolean <- 1 if any of $self's phrases contain $cui
229             sub contains {
230             #grab input
231 2     2 0 3 my $self = shift;
232 2         3 my $cui = shift;
233              
234             #check each phrase to see if it contains the CUI
235 2         2 my $containsCUI = 0;
236 2         4 foreach my $phrase(@{$self->{phrases}}) {
  2         4  
237 29 100       39 if ($phrase->contains($cui)) {
238 1         1 $containsCUI = 1;
239 1         1 last;
240             }
241             }
242            
243             #return the result
244 2         6 return $containsCUI;
245             }
246              
247             # method gets the an array of concepts as they appear in the utterance.
248             # Conepts are not necassarily ordered, where ambiguity exists all possible
249             # token->CUI mappings are listed adjacent to one another.
250             # input : -
251             # output: \@concepts <- a list of concept objects
252             sub getConcepts {
253             #initialize
254 2     2 0 3 my $self = shift;
255 2         2 my @concepts = ();
256              
257             #add concepts in sorted order
258 2         2 foreach my $phrase(@{$self->{phrases}}) {
  2         4  
259 56         35 push @concepts, @{$phrase->{concepts}};
  56         67  
260             }
261 2         10 return \@concepts;
262             }
263              
264             # method gets an array list of concepts as they appear in the utterance
265             # input : -
266             # output: \@conceptList <- an array of arrays, where each sub-array contains a
267             # list of 1 or more concept objects. Where more than
268             # one concept object occurrs it means the token to
269             # concept mapping was ambiguous. Arrays are ordered as
270             # the tokens occurr in the utterance.
271             sub getOrderedConcepts {
272             #initialize
273 1     1 0 2 my $self = shift;
274 1         2 my @conceptList = ();
275              
276             #add concepts in sorted order
277 1         1 foreach my $phrase(@{ $self->{phrases} }) {
  1         2  
278 28         18 push @conceptList, @{ $phrase->{orderedConceptList} };
  28         39  
279             }
280 1         8 return \@conceptList;
281             }
282              
283             # method gets the an array of tokens as they appear in the utterance
284             # input : -
285             # output: \@tokens <- a list token objects ordered by their appearance in $self
286             sub getTokens {
287             #initialize
288 2     2 0 3 my $self = shift;
289 2         4 my @tokens = ();
290              
291             #add concepts in sorted order
292 2         1 foreach my $phrase(@{$self->{phrases}}) {
  2         4  
293 56         31 push @tokens, @{$phrase->{tokens}};
  56         90  
294             }
295 2         28 return \@tokens;
296             }
297              
298             # method gets the an array of Mappings as they appear in the utterance
299             # input : -
300             # output: \@mappings <- a list of mapping objects ordered by their appearance
301             # in $self
302             sub getMappings {
303             #initialize
304 1     1 0 2 my $self = shift;
305 1         1 my @mappings = ();
306              
307             #add concepts in sorted order
308 1         2 foreach my $phrase(@{$self->{phrases}}) {
  1         1  
309 28         18 push @mappings, @{$phrase->{mappings}};
  28         44  
310             }
311 1         10 return \@mappings;
312             }
313              
314             1;
315              
316             __END__