File Coverage

blib/lib/Lingua/Awkwords/Parser.pm
Criterion Covered Total %
statement 97 99 97.9
branch 29 30 96.6
condition n/a
subroutine 21 21 100.0
pod 1 1 100.0
total 148 151 98.0


line stmt bran cond sub pod time code
1             # -*- Perl -*-
2             #
3             # Parser::MGC subclass that parses awkword patterns into a data structure
4              
5             package Lingua::Awkwords::Parser;
6              
7 3     3   110154 use strict;
  3         13  
  3         72  
8 3     3   14 use warnings;
  3         5  
  3         73  
9              
10 3     3   1136 use parent qw( Parser::MGC );
  3         835  
  3         12  
11              
12 3     3   46717 use Lingua::Awkwords::ListOf;
  3         11  
  3         91  
13 3     3   1046 use Lingua::Awkwords::OneOf;
  3         13  
  3         65  
14 3     3   723 use Lingua::Awkwords::String;
  3         7  
  3         68  
15 3     3   786 use Lingua::Awkwords::Subpattern;
  3         5  
  3         2824  
16              
17             our $VERSION = '0.10';
18              
19             sub parse {
20 31     31 1 9154 my $self = shift;
21 31         67 my $unit = $self->_parse_unit;
22              
23             my $filters =
24 31     16   373 $self->sequence_of(sub { $self->expect('^'); $self->_parse_filter });
  16         629  
  6         326  
25 31 100       1682 $unit->add_filters(@$filters) if @$filters;
26              
27 31         61 return $unit;
28             }
29              
30             # filters exclude strings from previous units
31             sub _parse_filter {
32 10     10   15 my $self = shift;
33              
34 10         14 my $filter = '';
35              
36             1 while $self->any_of(
37             # NOTE code for these duplicted from the unit parse
38             sub {
39 20     20   329 $filter .= ($self->expect(qr/"([^"]*)"/))[-1];
40 0         0 1;
41             },
42             sub {
43 20     20   1343 $filter .= $self->generic_token('other', qr{[^ "A-Z\(\)\[\]/\*\^]+});
44 10         387 1;
45             },
46             sub {
47 10     10   794 0;
48             }
49 10         53 );
50              
51 10         71 return $filter;
52             }
53              
54             # units, which might be a ::ListOf choices [VV] or ::OneOf [a/b] or both
55             # [VV/CV] or neither [asdf]. units can also contain other units
56             sub _parse_unit {
57 46     46   317 my $self = shift;
58              
59 46         58 my ($oneof, $weight);
60 46         92 my @terms = '';
61              
62             1 while $self->any_of(
63             sub {
64 107     107   2913 $self->expect('*');
65              
66             # NOTE original version instead treats [a*10*20/b] as a
67             # weight of 1020 then reduces that to 128 (with a warning)
68             # as of 0.06 upper limit removed from this implementation
69 3 100       120 $self->fail("weight already set") if $weight;
70              
71 2         10 my $num = $self->token_int;
72 2 100       266 $self->fail("weight must be positive integer") if $num < 1;
73 1         2 $weight = $num;
74 1         3 1;
75             },
76             sub {
77 106     106   9466 $self->expect('/');
78              
79 8 100       522 $oneof = Lingua::Awkwords::OneOf->new if !defined $oneof;
80              
81 8         17 for my $term (@terms) {
82             # TODO cache these strings so only one obj instance per str?
83 8 100       126 $term = Lingua::Awkwords::String->new(string => $term) if !ref $term;
84             }
85 8         372 $oneof->add_choice(Lingua::Awkwords::ListOf->new(terms => [@terms]), $weight);
86              
87             # empty string here is so [a/] parses correctly as a choice
88             # between a and nothing instead of dropping out of the unit
89             # upon ]
90 8         12 undef $weight;
91 8         16 @terms = '';
92 8         14 1;
93             },
94             sub {
95             # recurse into sub-units [...] or (...)
96 98     98   8106 my $delim = $self->expect(qr/[ \[\( ]/x);
97 15         658 $delim =~ tr/[(/])/;
98              
99 15         49 my $ret = $self->scope_of(undef, \&_parse_unit, $delim);
100 10 100       587 if ($terms[-1] eq '') {
101 8         14 $terms[-1] = $ret;
102             } else {
103 2         4 push @terms, $ret;
104             }
105              
106             # () needs additional code as (a) is equivalent to [a/] so
107             # we must add an empty string to what must become a oneof
108 10 100       21 if ($delim eq ')') {
109 1         2 my $newof;
110 1 50       9 unless ($terms[-1]->can('add_choice')) {
111 1         23 $newof = Lingua::Awkwords::OneOf->new;
112 1         12 $newof->add_choice($terms[-1]);
113 1         2 $terms[-1] = $newof;
114             } else {
115 0         0 $newof = $terms[-1];
116             }
117              
118             # TODO cache this string in a hash so only one obj?
119 1         14 $newof->add_choice(Lingua::Awkwords::String->new(string => ''));
120             }
121              
122             # filters in [VV]^aa form (as opposed to the top-level
123             # parse() VV^aa form which lack the trailing ] or ) of this
124             # code path
125             $self->maybe(
126             sub {
127             my $filters =
128 10         150 $self->sequence_of(sub { $self->expect('^'); $self->_parse_filter });
  6         189  
  4         144  
129 10 100       420 $terms[-1]->add_filters(@$filters) if @$filters;
130             }
131 10         56 );
132 10         56 1;
133             },
134             sub {
135 88     88   6160 my $pat = $self->generic_token('subpattern', qr{[A-Z]});
136 12 100       536 $self->fail("not a defined pattern")
137             if !Lingua::Awkwords::Subpattern->is_pattern($pat);
138              
139 11         209 my $ret = Lingua::Awkwords::Subpattern->new(pattern => $pat);
140 11 100       52 if ($terms[-1] eq '') {
141 5         8 $terms[-1] = $ret;
142             } else {
143 6         11 push @terms, $ret;
144             }
145 11         21 1;
146             },
147             # NOTE code from these two also used in _parse_filter
148             sub {
149 77     77   5687 my $ret = ($self->expect(qr/"([^"]*)"/))[-1];
150 5 100       404 if (ref $terms[-1]) {
151 1         2 push @terms, $ret;
152             } else {
153 4         11 $terms[-1] .= $ret;
154             }
155 5         12 1;
156             },
157             sub {
158 72     72   6192 my $ret = $self->generic_token('other', qr{[^ "A-Z\(\)\[\]/\*\^]+});
159 26 100       1971 if (ref $terms[-1]) {
160 1         3 push @terms, $ret;
161             } else {
162 25         49 $terms[-1] .= $ret;
163             }
164 26         46 1;
165             },
166             sub {
167 46     46   3250 0;
168             }
169 46         532 );
170              
171 46         666 for my $term (@terms) {
172             # TODO cache these strings so only one obj instance per str?
173 56 100       845 $term = Lingua::Awkwords::String->new(string => $term) if !ref $term;
174             }
175              
176 46 100       2735 if (defined $oneof) {
177 5         75 $oneof->add_choice(Lingua::Awkwords::ListOf->new(terms => [@terms]), $weight);
178 5         13 return $oneof;
179             } else {
180 41         822 return Lingua::Awkwords::ListOf->new(terms => \@terms);
181             }
182             }
183              
184             1;
185             __END__