File Coverage

blib/lib/MetaMap/DataStructures/Token.pm
Criterion Covered Total %
statement 55 56 98.2
branch 12 14 85.7
condition 7 15 46.6
subroutine 6 6 100.0
pod 0 4 0.0
total 80 95 84.2


line stmt bran cond sub pod time code
1             # MetaMap::DataStructures::Token
2             # (Last Updated $Id: Token.pm,v 1.80 2016/01/07 22:49:33 btmcinnes Exp $)
3             #
4             # Perl module that provides a perl interface to the
5             # Unified Medical Language System (UMLS)
6             #
7             # Copyright (c) 2016
8             #
9             # Sam Henry, Virginia Commonwealth University
10             # henryst at vcu.edu
11             #
12             # Bridget T. McInnes, Virginia Commonwealth University
13             # btmcinnes at vcu.edu
14             #
15             # This program is free software; you can redistribute it and/or
16             # modify it under the terms of the GNU General Public License
17             # as published by the Free Software Foundation; either version 2
18             # of the License, or (at your option) any later version.
19             #
20             # This program is distributed in the hope that it will be useful,
21             # but WITHOUT ANY WARRANTY; without even the implied warranty of
22             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23             # GNU General Public License for more details.
24             #
25             # You should have received a copy of the GNU General Public License
26             # along with this program; if not, write to
27             #
28             # The Free Software Foundation, Inc.,
29             # 59 Temple Place - Suite 330,
30             # Boston, MA 02111-1307, USA.
31              
32             package MetaMap::DataStructures::Token;
33              
34 1     1   3 use strict;
  1         1  
  1         20  
35 1     1   2 use warnings;
  1         1  
  1         546  
36              
37             #----------------------------------------
38             # constructor
39             #----------------------------------------
40             # constructor method to create a new Token object
41             # input : -
42             # output: $self <- a Token object
43             sub new {
44             #grab class and create self
45 463     463 0 329 my $class = shift;
46 463         396 my $self = {};
47 463         435 bless $self, $class;
48              
49             #get the rest of the input
50 463         584 $self->{text} = shift;
51 463         425 $self->{matchedText} = shift;
52 463         401 $self->{type} = shift;
53 463         419 $self->{posTag} = shift;
54 463         353 $self->{features} = shift;
55 463         346 $self->{numWords} = shift;
56              
57 463         1468 return $self;
58             }
59              
60             # method creates and returns a token from text
61             # (MetaMap Prolog Machine Output, single
62             # syntax description after phrase)
63             # input : $text <- a Metamap Prolog Machine Output single syntax description
64             # given after the phrase (or equivalent)
65             # output: $self <- a Token object
66             sub createFromText {
67             #grab the inputs
68 463     463 0 322 my $text= shift;
69            
70             #grab the type
71 463         874 $text =~ m/(adv|aux|compl|conj|det|head|mod|modal|pastpart|prep|pron|punc|shapes|verb|not_in_lex)\(/;
72 463         424 my $type = $1;
73              
74             #grab and format input match
75 463         785 $text =~ m/inputmatch\(\[([^\[\]]+)\]/;
76 463         352 my $inputMatchText = $1;
77              
78             #remove trailing and leading quotes and commas
79 463         318 my $inputMatch = "";
80 463         531 my @inputs = split /,/, $inputMatchText;
81 463         453 foreach my $input(@inputs) {
82 490 100       908 if ($input =~ m/'(.*)'/) {
83 284         322 $input = $1;
84             }
85 490         556 $inputMatch .= ' '.$input;
86             }
87             #remove trailing and leading spaces from input match
88 463         806 $inputMatch =~ s/\s*//;
89 463         587 $inputMatch =~ s/^\s*//;
90 463         743 $inputMatch =~ s/\s*$//;
91            
92             #grab the pos tag (if any)
93 463         328 my $tag = '';
94 463 100       938 if ($text =~ m/tag\(([a-z]+)\)/) {
95 386         348 $tag = $1;
96             }
97            
98             #grab the lexmatch (if any)
99 463         285 my $lexMatch = '';
100 463 100       879 if ($text =~ m/lexmatch\(\[([^\[\]]+)\]/) {
101 379         339 $lexMatch = $1;
102 379 100       506 if ($lexMatch =~ m/'(.*)'/) {
103 2         3 $lexMatch = $1;
104             }
105             }
106              
107             #grab features (if any)
108 463         302 my $features = '';
109 463 50       563 if ($text =~ m/features\(\[([^\[\]]+)\]\)/) {
110 0         0 $features = $1;
111             }
112              
113             #Count the number of words in this token
114             # ...important for match mapping
115 463         319 my $numWords = 1;
116 463 100       900 if ($text =~ m/tokens\(\[([^\[\]]+)\]\)/) {
117 386         559 my @splitToken = (split /,/, $1);
118 386         360 $numWords = scalar @splitToken;
119             }
120              
121             #TODO delete this (for debugging)
122             #print out info about the token being created
123             #print "Creating Token:\n";
124             #print "text = $text\n";
125             #print "inputMatch = $inputMatch\n";
126             #print "lexMatch = $lexMatch\n";
127             #print "type = $type\n";
128             #print "tag = $tag\n";
129             #print "features = $features\n";
130             #print "numWords = $numWords\n";
131              
132             #create and retrun the new token
133 463         619 return MetaMap::DataStructures::Token->new(
134             $inputMatch, $lexMatch, $type, $tag, $features, $numWords);
135             }
136              
137              
138             #----------------------------------------
139             # Methods
140             #---------------------------------------
141              
142             # method compares this token to another and returns 1 if
143             # the two tokens contain identical information
144             # input : $other <- the Token to comnpare against
145             # output: boolean <- 1 if $self and other are equivalent (have the same field
146             # values)
147             sub equals {
148             #grab input
149 9264     9264 0 5656 my $self = shift;
150 9264         5220 my $other = shift;
151              
152             #compare each field
153 9264 50 66     49444 if ($self->{text} ne $other->{text}
      66        
      33        
      33        
      33        
154             || $self->{matchedText} ne $other->{matchedText}
155             || $self->{type} ne $other->{type}
156             || $self->{posTag} ne $other->{posTag}
157             || $self->{features} ne $other->{features}
158             || $self->{numWords} ne $other->{numWords}) {
159 3459         6842 return 0;
160             }
161              
162             #everything matches, return true
163 5805         10054 return 1;
164             }
165              
166             # method summarizes this token as a string
167             # input : -
168             # output: $string <- a string describing $self
169             sub toString {
170 293     293 0 164 my $self = shift;
171 293         173 my $string = "token: \n";
172 293         345 $string .= " $self->{text}, $self->{matchedText}\n";
173 293         318 $string .= " $self->{type}, $self->{posTag}, $self->{numWords}\n";
174              
175 293         519 return $string;
176             }
177              
178             1;
179              
180             __END__