File Coverage

lib/Lingua/EN/Contraction.pm
Criterion Covered Total %
statement 73 74 98.6
branch 30 40 75.0
condition n/a
subroutine 10 10 100.0
pod 0 6 0.0
total 113 130 86.9


line stmt bran cond sub pod time code
1             package Lingua::EN::Contraction;
2            
3 1     1   10261 use Data::Dumper;
  1         11151  
  1         143  
4             require Exporter;
5            
6             @ISA = qw( Exporter );
7            
8             @EXPORT_OK = qw(
9            
10             contraction
11             contract_n_t
12             contract_other
13            
14             );
15            
16 1     1   14 use warnings;
  1         3  
  1         38  
17 1     1   6 use strict;
  1         9  
  1         49  
18             #use diagnostics;
19            
20            
21 1         1239 use vars qw(
22             $VERSION
23 1     1   16 );
  1         2  
24            
25            
26             $VERSION = '0.104';
27            
28             my @modal = qw(might must do does did should could can);
29             my @pronoun = qw(I you we he she it they);
30             my @that = qw(there this that);
31             my @other = qw(who what when where why how);
32             my @verbs = qw(are is am was were will would have has had);
33            
34             my $modal_re = re_ify_list(@modal);
35             my $pronoun_re = re_ify_list(@pronoun);
36             my $that_re = re_ify_list(@that);
37             my $other_re = re_ify_list(@other);
38             my $verbs_re = re_ify_list(@verbs);
39            
40            
41             my %list = ( am => ['I'],
42             had => [@pronoun, @that, @other],
43             would=> [@pronoun, @that, @other],
44             will => [@pronoun, @that, @other],
45             are => [@pronoun, @other],
46             is => [@pronoun, @that, @other],
47             has => [@pronoun, @that, @other],
48             that => [@pronoun, @that, @other],
49             have => [@pronoun, @that, @other]
50             );
51            
52            
53            
54            
55             sub contraction {
56            
57 13     13 0 36 my $phrase = shift;
58            
59             # contract "not" before contracting other stuff...
60            
61 13         29 $phrase = contract_n_t($phrase);
62 13         38 $phrase = contract_other($phrase);
63            
64 13         67 return $phrase;
65             }
66            
67            
68             sub contract_n_t {
69            
70             # MODAL-NOT -> MODAL-N_T (that is, "were not" becomes "weren't")
71             # MODAL-PRONOUN-NOT -> MODAL-N_T-PRONOUN (that is, "were we not" becomes "weren't we")
72            
73            
74 14     14 0 32 my $phrase = shift;
75            
76 14         49 $phrase =~ s/(can)(not)/$1 $2/ig;
77            
78 14         23 my $new_phrase = $phrase;
79            
80            
81            
82 14         613 while ($phrase =~ /(\b($modal_re|$verbs_re) ?($pronoun_re )?(not)\b)/ig) {
83 22         237 my $orig_phrase = $1;
84 22         34 my $_phrase = $1;
85            
86            
87 22 100       277 if ( $_phrase =~ /\b($modal_re|$verbs_re) ?(not)\b/i ) {
88 16         26 my $m = $1;
89 16         25 my $n = $2;
90 16 50       32 if (my $m2 = N_T($m, $n)) {
91 16         247 $_phrase =~ s/\b$m not\b/$m2/i;
92             }
93             }
94 22 100       278 if ($_phrase =~ /($modal_re|$verbs_re) ($pronoun_re) (not)\b/i ) {
95 6         12 my $p = $2; my $m = $1;
  6         10  
96 6         11 my $n = $3;
97 6 50       11 if (my $m2 = N_T($m, $n)) {
98 6         114 $_phrase =~ s/\b$m $p not\b/$m2 $p/i;
99             }
100             }
101 22 100       81 next if $orig_phrase eq $_phrase;
102 19         776 $phrase =~ s/$orig_phrase/$_phrase/;
103             }
104 14         35 return $phrase;
105            
106             }
107            
108             sub contract_other {
109 14     14 0 20 my $phrase = shift;
110            
111 14         68 while ($phrase =~ /\b(let us)/ig) {
112 1         19 $phrase =~ s/\b(let) u(s)/$1'$2/i;
113             }
114            
115 14         553 while ($phrase =~ /(\b([\w']*(?: not)?) ?($pronoun_re|$other_re|$modal_re|$that_re) ($verbs_re)\b)/ig) {
116             #print "1 -> $1\n\t, 2-> $2, 3->$3, 4->$4\n";
117 17         92 my $orig_phrase = $1;
118 17         24 my $_phrase = $1;
119 17         27 my $w1 = $2;
120 17         25 my $w2 = $3;
121 17         28 my $w3 = $4;
122            
123             # don't form contractions following modal verbs:
124             # nobody ever says "could I've been walking?", they say "could I have been walking?".
125 17 100       94 next if $w1 =~ /$modal_re/;
126            
127 16 50       65 my $ctrct_after = $list{lc($w3)} or next;
128 16 100       52 next unless match_any($w2, @$ctrct_after);
129 15         22 my $w3b = $w3;
130 15         75 $w3b =~ s/.*(m|d|ll|re|s|t|ve)$/$1/i;
131 15 50       36 next if $w3b eq $w3;
132            
133 15         266 $_phrase =~ s/($w2) ($w3)/$w2'$w3b/;
134            
135 15 50       41 next if $_phrase eq $orig_phrase;
136 15         965 $phrase =~ s/$orig_phrase/$_phrase/;
137             }
138 14         37 return $phrase;
139             }
140            
141            
142            
143             sub match_any {
144 16     16 0 22 my $a = shift;
145 16         59 my @b = @_;
146 16 100       30 for (@b) { return 1 if $a =~ /\b$_\b/i ; }
  73         788  
147 1         25 return undef;
148             }
149            
150             sub N_T {
151            
152             #add contracted negation to modal verbs:
153 22     22 0 49 my $modal = shift;
154 22         52 my $not = shift;
155 22 50       65 die "unexpected value for 'not'\n" unless $not =~ /not/i;
156            
157             # preserve orginal case for "NOT->N'T" and "not->n't"
158             # but change case for "Not" -> "n't"
159            
160 22 50       59 my $n_t = $not =~ /N[oO]T/ ? "N'T":
161             "n't";
162            
163 22 100       84 if (lc($modal) eq 'am') {return "$modal $not"; }
  3 100       13  
    100          
    50          
164            
165             # cases where simply adding "n't" doesn't work:
166             # will->won't, can->can't, shall->shan't
167             # trying to preserve original case...
168            
169             elsif (lc($modal) eq 'will') {
170 1         4 $modal =~ s/ll//i;
171 1         3 $modal =~ tr/Ii/Oo/;
172             }
173            
174             elsif (lc($modal) eq 'can') {
175 2         7 $modal =~ s/n//i;
176             }
177            
178             elsif (lc($modal) eq 'shall') {
179 0         0 $modal =~ s/ll//i;
180             }
181            
182 19         45 my $answer = $modal . $n_t;
183            
184 19 50       99 return $modal eq lc($modal) ? lc($answer):
    50          
    100          
185             $modal eq uc($modal) ? uc($answer):
186             $modal eq ucfirst($modal) ? ucfirst($answer):
187             $answer;
188            
189             }
190            
191             sub re_ify_list {
192 5     5 0 13 my $re = '\b(?:' . join("|", @_) . ')';
193 5         142 $re = qr/$re/i;
194             }
195            
196             1;
197            
198             =head1 NAME
199            
200             Lingua::EN::Contraction - Add apostrophes all over the place...
201            
202             =head1 SYNOPSIS
203            
204             use Lingua::EN::Contraction qw(contraction);
205            
206             $sentance = "No, I am not going to explain it. If you cannot figure it out, you did not want to know anyway... :-)";
207            
208             print contraction($sentance) ;
209            
210            
211             =head1 DESCRIPTION
212            
213             A very simple, humble little module that adds apostrophes to your sentances for you. There aren't any options, so if you
214             don't like the way it contracts things then you'll have to change the code a bit. It'll preserve capitalization, so if
215             you feed it things like "DO NOT PANIC", you'll get "DON'T PANIC" out the other end.
216            
217             =head1 BUGS
218            
219             =head1 TODO
220            
221             =head1 AUTHOR
222            
223             Russ Graham, russgraham@gmail.com
224            
225             =cut
226