File Coverage

blib/lib/MetaMap/DataStructures/Citation.pm

Criterion	Covered	Total	%
statement	114	149	76.5
branch	15	26	57.6
condition	0	6	0.0
subroutine	19	23	82.6
pod	0	17	0.0
total	148	221	66.9

line	stmt	bran	cond	sub	pod	time	code
1							# MetaMap::DataStructures::Citation
2							# (Last Updated $Id: Citation.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $)
3							#
4							# Perl module that provides a perl interface to the
5							# Unified Medical Language System (UMLS)
6							#
7							# Copyright (c) 2016
8							#
9							# Sam Henry, Virginia Commonwealth University
10							# henryst at vcu.edu
11							#
12							# Bridget T. McInnes, Virginia Commonwealth University
13							# btmcinnes at vcu.edu
14							#
15							# This program is free software; you can redistribute it and/or
16							# modify it under the terms of the GNU General Public License
17							# as published by the Free Software Foundation; either version 2
18							# of the License, or (at your option) any later version.
19							#
20							# This program is distributed in the hope that it will be useful,
21							# but WITHOUT ANY WARRANTY; without even the implied warranty of
22							# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23							# GNU General Public License for more details.
24							#
25							# You should have received a copy of the GNU General Public License
26							# along with this program; if not, write to
27							#
28							# The Free Software Foundation, Inc.,
29							# 59 Temple Place - Suite 330,
30							# Boston, MA 02111-1307, USA.
31
32							package MetaMap::DataStructures::Citation;
33	1			1		4	use strict;
	1					1
	1					21
34	1			1		3	use warnings;
	1					1
	1					18
35
36	1			1		3	use MetaMap::DataStructures::Utterance;
	1					1
	1					1139
37
38							#----------------------------------------
39							# constructors
40							#----------------------------------------
41							# constructor method to create a new Citation object
42							# input : -
43							# output: $self <- a instance of a Citation object
44							sub new {
45							#create and bless self
46	5			5	0	6	my $class = shift;
47	5					6	my $self = {};
48	5					9	bless $self, $class;
49
50							#grab input and initialize
51	5					11	$self->{id} = shift;
52	5					6	$self->{utterances} = {};
53
54	5					11	return $self;
55							}
56
57							#-----------------------------------------------------------------
58							# methods
59							#-----------------------------------------------------------------
60
61							# method summarizes this utterance as a string
62							# input : -
63							# output: $string <- a string describing $self
64							sub toString {
65	0			0	0	0	my $self = shift;
66
67							#initiliaze the string
68	0					0	my $string = "citation:\n";
69	0					0	$string .= " $self->{id}\n";
70
71							#add each utterance to the string
72	0					0	my %utterances = %{$self->{utterances}};
	0					0
73	0					0	foreach my $key(keys %utterances) {
74	0					0	$string .= " ".$utterances{$key}->toString()."\n";
75							}
76	0					0	return $string;
77							}
78
79							# method to compare this citation to another and returns 1 if the two
80							# contain identical information
81							# input : $other <- the citation object to compare against
82							# output: boolean <- 1 if $self and $other are equivalent (contain equivalent
83							# ID's and utterances), else 0
84							sub equals {
85							#grab input
86	3			3	0	6	my $self = shift;
87	3					3	my $other = shift;
88
89							#compare id's
90	3	50				10	if ($self->{id} ne $other->{id}) {
91	0					0	return 0;
92							}
93
94							#compare Utterances
95	3					3	foreach my $keyA(sort _by_utterance keys %{$self->{utterances}}){
	3					15
96	3					4	my $utteranceA = $self->{utterances}{$keyA};
97
98							#check each utterance in B
99	3					4	my $match = 0;
100	3					5	foreach my $keyB(sort _by_utterance keys %{$other->{utterances}}) {
	3					6
101	2					3	my $utteranceB = $self->{utterances}{$keyB};
102	2	50				5	if ($utteranceA->equals($utteranceB)) {
103	2					3	$match = 1;
104	2					4	last;
105							}
106							}
107
108							#citationA has no equivalent citation in $other
109							# so citations are not identical
110	3	100				10	if ($match < 1) {
111	1					4	return 0;
112							}
113							}
114
115							#all tests passed, return true
116	2					21	return 1;
117							}
118
119							# method to determine if this citation contains the CUI provided as input
120							# returns 1 if this citation contains the CUI, else 0
121							# input : $cui <- a string CUI code
122							# output: boolean <- 1 if any of $self's utterances contain $cui
123							sub contains {
124							#grab input
125	2			2	0	11	my $self = shift;
126	2					3	my $cui = shift;
127
128							#check each phrase to see if it contains the CUI
129	2					4	my $containsCUI = 0;
130	2					1	foreach my $key(keys %{$self->{utterances}}) {
	2					5
131	2	100				6	if ($self->{utterances}{$key}->contains($cui)) {
132	1					1	$containsCUI = 1;
133	1					2	last;
134							}
135							}
136
137							#return the result
138	2					7	return $containsCUI;
139							}
140
141							# method to add a new utterance to the citation
142							# input : $newUtterance <- the utterance to add to $self
143							# output: -
144							sub addUtterance {
145	9			9	0	12	my $self = shift;
146	9					9	my $newUtterance = shift;
147
148	9	50				42	if($newUtterance->{id} =~ /((ti\|ab)\.[\d]+)/) {
149	9					1417	$self->{utterances}{$1} = $newUtterance;
150							}
151							else {
152	0					0	print STDERR "error adding utterance to citation: $newUtterance->{id}\n";
153							}
154							}
155
156							# method to sort the utterances by order they appear (title followed
157							# by abstract, number ascending)
158							# (e.g. ti.000.1, ti.000.2, ab.000.1, ab.000.2, ab.000.3)
159							# input : $a, $b <- implicit sort variables, the keys in a hash of utterances
160							# which are the utterance IDs (e.g. ti.0000000.1)
161							# output: integer <- -1 if a is before b, 0 if a and b are same order,
162							# 1 if a is after b
163							sub _by_utterance {
164							#get the utterance type
165	0			0		0	my $a_ab = ($a =~ /ab/);
166	0					0	my $b_ab = ($b =~ /ab/);
167
168							#check if both are abstracts or titles
169	0	0				0	if ($a_ab == $b_ab) {
170	0					0	$a =~ /(ti\|ab)\.([\d]+)/;
171	0					0	my $aNum = ($2);
172
173	0					0	$b =~ /(ti\|ab)\.([\d]+)/;
174	0					0	return $aNum <=> $2;
175							}
176
177							#check if one is abstract, the other is title
178	0	0	0			0	if ($a_ab && !$b_ab) {
179	0					0	return 1;
180							}
181	0	0	0			0	if (!$a_ab && $b_ab) {
182	0					0	return -1;
183							}
184							}
185
186
187							#------------------------------ Get Components ------------------------------
188							# method to returns an ordered list of Utterances contained by the Citation.
189							# Utterances are ordered by title, abstract, then number in ascending order
190							# (e.g. ti.000.1, ti.000.2, ab.000.1, ab.000.2, ab.000.3)
191							# input : -
192							# output: \@utterances <- $self's utterances ordered as they appear in the
193							# original text of $self
194							sub getOrderedUtterances {
195							#initialize
196	2			2	0	69	my $self = shift;
197	2					4	my @utterances = ();
198
199							#add concepts in sorted order
200	2					3	foreach my $key(sort _by_utterance keys %{$self->{utterances}}) {
	2					10
201	2					3	push @utterances, $self->{utterances}{$key};
202							}
203	2					4	return \@utterances;
204							}
205
206							# method to get an array of concepts that appear in the citation
207							# (not necassarily ordered). Use this method if order doesn't matter for
208							# increased performance.
209							# input : -
210							# output: \@concepts <- a list of concept objects
211							sub getConcepts {
212							#initialize
213	1			1	0	11	my $self = shift;
214	1					1	my @concepts = ();
215
216							#add concepts in sorted order
217	1					2	foreach my $key(keys %{$self->{utterances}}) {
	1					2
218	1					2	push @concepts, @{ $self->{utterances}{$key}->getConcepts() };
	1					3
219							}
220	1					3	return \@concepts;
221							}
222
223							# method to get the unique concepts and return a hash of
224							# concepts, CUIs are the keys
225							# input : -
226							# output: \%concepts <- $self's unique concepts with the key as the concept's
227							# CUI. CUIs are considered unique by their CUI code only (e.g. C0000000
228							# and C0000000 are considered the same even if there are two different
229							# Concept.pm objects associated with them)
230							sub getUniqueConcepts {
231	1			1	0	182	my $self = shift;
232	1					3	my %concepts = ();
233
234							#update concepts
235	1					1	foreach my $key(keys %{$self->{utterances}}) {
	1					4
236	1					4	my $utteranceConceptsRef = $self->{utterances}{$key}->getConcepts();
237	1					1	foreach my $concept(@{ $utteranceConceptsRef }) {
	1					2
238	80					59	my $cui = $concept->{cui};
239	80	100				96	if (!exists $concepts{$cui}) {
240	75					89	$concepts{$cui} = $concept;
241							}
242							}
243							}
244	1					18	return \%concepts;
245							}
246
247							# method to get the an array of concepts that appear in the citation
248							# concepts are ordered as they appear in the utterance
249							# however where there are multiple mappings for a single
250							# token those two concepts will appear adjacent to one another
251							# input : -
252							# output: \@conceptList <- an array of arrays, where each sub-array contains a
253							# list of 1 or more concept objects. Where more than
254							# one concept object occurrs it means the token to
255							# concept mapping was ambiguous. Arrays are ordered as
256							# the tokens occurr in the utterance.
257							sub getOrderedConcepts {
258							#initialize
259	1			1	0	94	my $self = shift;
260	1					2	my @conceptsList = ();
261
262							#add concepts in sorted order
263	1					2	foreach my $key(sort _by_utterance keys %{$self->{utterances}}) {
	1					3
264	1					2	push @conceptsList, @{ $self->{utterances}{$key}->getOrderedConcepts() };
	1					4
265							}
266	1					2	return \@conceptsList;
267							}
268
269							# method to get a list of ordered mappings. There may be multiple
270							# mappings for a single utterance, but they will appear in correct
271							# utterance order
272							# input : -
273							# output: \@mappings <- a list of mapping objects ordered by their occurence in
274							# $self.
275							sub getOrderedMappings {
276							#initialize
277	1			1	0	100	my $self = shift;
278	1					1	my @mappings = ();
279
280							#add mappings in sorted order
281	1					2	foreach my $key(sort _by_utterance keys %{$self->{utterances}}) {
	1					4
282	1					1	push @mappings, @{ $self->{utterances}{$key}->getMappings() };
	1					4
283							}
284	1					2	return \@mappings;
285							}
286
287							# method to get all the mappings of the citation (not necassarily ordered)
288							# input : -
289							# output: \@mappings <- a list of mapping objects
290							sub getMappings {
291							#initialize
292	0			0	0	0	my $self = shift;
293	0					0	my @mappings = ();
294
295							#add mappings in sorted order
296	0					0	foreach my $key(keys %{$self->{utterances}}) {
	0					0
297	0					0	push @mappings, @{ $self->{utterances}{$key}->getMappings() };
	0					0
298							}
299	0					0	return \@mappings;
300							}
301
302							# method to get an array of ordered tokens as they appear in the citation
303							# input : -
304							# output: \@tokens <- a list of token objects ordered by their appearance in
305							# $self
306							sub getOrderedTokens
307							{
308							#initialize
309	3			3	0	165	my $self = shift;
310	3					5	my @tokens = ();
311
312							#add words in sorted order
313	3					3	foreach my $key(sort _by_utterance keys %{$self->{utterances}}) {
	3					9
314	2					1	push @tokens, @{ $self->{utterances}{$key}->getTokens() };
	2					8
315							}
316	3					5	return \@tokens;
317							}
318
319							# method to get an array of tokens. Tokens are not necassarily in order
320							# input : -
321							# output: \@tokens <- a list of token objects
322							sub getTokens
323							{
324							#initialize
325	0			0	0	0	my $self = shift;
326	0					0	my @tokens = ();
327
328							#add words in sorted order
329	0					0	foreach my $key(keys %{$self->{utterances}}) {
	0					0
330	0					0	push @tokens, @{ $self->{utterances}{$key}->getTokens() };
	0					0
331							}
332	0					0	return \@tokens;
333							}
334
335							#---------------------- Has Parts (title or abstract) -------------------------
336							# method to determine if the citation contains any title utterances
337							# input : -
338							# output: boolean <- 1 if $self contains a title utterance, else 0
339							sub hasTitle
340							{
341	1			1	0	518	my $self = shift;
342	1					3	return $self->_hasPart('ti');
343
344							}
345
346							# method to determine if the citation contains any abstract utterances
347							# input : -
348							# output: boolean <- 1 if $self contains an abstract utterance, else 0
349							sub hasAbstract
350							{
351	1			1	0	4	my $self = shift;
352	1					2	return $self->_hasPart('ab');
353
354							}
355
356							# method to determine if the citation contains any utterances of the
357							# tag ('ti' or 'ab')
358							# input : $tag <- the utterance tag to check for, should be 'ti' or 'ab'
359							# output: boolean <- 1 if $self contains an utterance with the $tag, else 0
360							sub _hasPart
361							{
362	2			2		3	my $self = shift;
363	2					2	my $tag = shift;
364
365							#get the utterances that match the tag
366	2					3	foreach my $key(keys %{$self->{utterances}}) {
	2					4
367	2	50				10	if ($key =~ /(ti\|ab)/) {
368	2	100				7	if ($1 eq $tag) {
369							#tag found, returning true
370	1					3	return 1;
371							}
372							}
373							}
374							#no matching tags found, returning false
375	1					3	return 0;
376							}
377							#----------------------------------------------------------------------------
378
379
380							#------------------ Get Parts (Title or Abstract) ---------------------
381							# method to create a new citation containing just the title of this citation
382							# input : -
383							# output: $part <- a citation object containing all utterances of $self's title
384							sub getTitle
385							{
386	2			2	0	25	my $self = shift;
387	2					6	return $self->_getPart('ti');
388							}
389
390							# method to create a new citation containing just the abstract of this citation
391							# input : -
392							# output: $part <- a citation object containing all utterances of $self's
393							# abstract
394							sub getAbstract
395							{
396	2			2	0	100	my $self = shift;
397	2					5	return $self->_getPart('ab');
398							}
399
400							# method to get a part of this citation (title or abstract)
401							# input is a match string, either 'ti' or 'ab'
402							# input : $tag <- the utterance tag to extract, should be 'ti' or 'ab'
403							# output: $part <- a citation object containing all utterance of $self
404							# containing the $tag in their ID
405							sub _getPart
406							{
407	4			4		5	my $self = shift;
408	4					5	my $tag = shift;
409
410							#get the utterances that match the tag
411	4					16	my $part = MetaMap::DataStructures::Citation->new($self->{id});
412	4					6	foreach my $key(keys %{$self->{utterances}}) {
	4					12
413	4	50				35	if($key =~ /(ti\|ab)/) {
414	4	100				19	if ($1 eq $tag) {
415	2					5	$part->addUtterance($self->{utterances}{$key});
416							}
417							}
418							}
419							#return the title citation
420	4					12	return $part;
421							}
422							#-----------------------------------------------------------------------
423
424							1;
425
426							__END__