File Coverage

blib/lib/Lingua/Awkwords/Parser.pm
Criterion Covered Total %
statement 97 99 97.9
branch 29 30 96.6
condition n/a
subroutine 21 21 100.0
pod 1 1 100.0
total 148 151 98.0


line stmt bran cond sub pod time code
1             # -*- Perl -*-
2             #
3             # Parser::MGC subclass that parses awkword patterns into a data structure
4              
5             package Lingua::Awkwords::Parser;
6              
7 3     3   97323 use strict;
  3         14  
  3         71  
8 3     3   13 use warnings;
  3         4  
  3         68  
9              
10 3     3   1135 use parent qw( Parser::MGC );
  3         780  
  3         12  
11              
12 3     3   47068 use Lingua::Awkwords::ListOf;
  3         8  
  3         101  
13 3     3   928 use Lingua::Awkwords::OneOf;
  3         12  
  3         69  
14 3     3   767 use Lingua::Awkwords::String;
  3         6  
  3         64  
15 3     3   749 use Lingua::Awkwords::Subpattern;
  3         6  
  3         2905  
16              
17             our $VERSION = '0.08';
18              
19             sub parse {
20 32     32 1 8980 my $self = shift;
21 32         74 my $unit = $self->_parse_unit;
22              
23             my $filters =
24 32     16   218 $self->sequence_of( sub { $self->expect('^'); $self->_parse_filter } );
  16         617  
  6         234  
25 32 100       1443 $unit->add_filters(@$filters) if @$filters;
26              
27 32         64 return $unit;
28             }
29              
30             # filters exclude strings from previous units
31             sub _parse_filter {
32 10     10   16 my $self = shift;
33              
34 10         14 my $filter = '';
35              
36             1 while $self->any_of(
37             # NOTE code for these duplicted from the unit parse
38             sub {
39 20     20   346 $filter .= ( $self->expect(qr/"([^"]*)"/) )[-1];
40 0         0 1;
41             },
42             sub {
43 20     20   1284 $filter .= $self->generic_token( 'other', qr{[^ "A-Z\(\)\[\]/\*\^]+} );
44 10         441 1;
45             },
46             sub {
47 10     10   560 0;
48             }
49 10         56 );
50              
51 10         72 return $filter;
52             }
53              
54             # units, which might be a ::ListOf choices [VV] or ::OneOf [a/b] or both
55             # [VV/CV] or neither [asdf]. units can also contain other units
56             sub _parse_unit {
57 49     49   368 my $self = shift;
58              
59 49         67 my ( $oneof, $weight );
60 49         89 my @terms = '';
61              
62             1 while $self->any_of(
63             sub {
64 121     121   2953 $self->expect('*');
65              
66             # NOTE original version instead treats [a*10*20/b] as a
67             # weight of 1020 then reduces that to 128 (with a warning)
68             # as of 0.06 upper limit removed from this implementation
69 3 100       114 $self->fail("weight already set") if $weight;
70              
71 2         12 my $num = $self->token_int;
72 2 100       184 $self->fail("weight must be positive integer") if $num < 1;
73 1         1 $weight = $num;
74 1         4 1;
75             },
76             sub {
77 120     120   9100 $self->expect('/');
78              
79 11 100       552 $oneof = Lingua::Awkwords::OneOf->new if !defined $oneof;
80              
81 11         23 for my $term (@terms) {
82             # TODO cache these strings so only one obj instance per str?
83 12 100       175 $term = Lingua::Awkwords::String->new( string => $term ) if !ref $term;
84             }
85 11         263 $oneof->add_choice( Lingua::Awkwords::ListOf->new( terms => [@terms] ),
86             $weight );
87              
88             # empty string here is so [a/] parses correctly as a choice
89             # between a and nothing instead of dropping out of the unit
90             # upon ]
91 11         19 undef $weight;
92 11         19 @terms = '';
93 11         23 1;
94             },
95             sub {
96             # recurse into sub-units [...] or (...)
97 109     109   7365 my $delim = $self->expect(qr/[ \[\( ]/x);
98 17         650 $delim =~ tr/[(/])/;
99              
100 17         50 my $ret = $self->scope_of( undef, \&_parse_unit, $delim );
101 12 100       499 if ( $terms[-1] eq '' ) {
102 8         13 $terms[-1] = $ret;
103             } else {
104 4         6 push @terms, $ret;
105             }
106              
107             # () needs additional code as (a) is equivalent to [a/] so
108             # we must add an empty string to what must become a oneof
109 12 100       24 if ( $delim eq ')' ) {
110 1         2 my $newof;
111 1 50       7 unless ( $terms[-1]->can('add_choice') ) {
112 1         18 $newof = Lingua::Awkwords::OneOf->new;
113 1         13 $newof->add_choice( $terms[-1] );
114 1         2 $terms[-1] = $newof;
115             } else {
116 0         0 $newof = $terms[-1];
117             }
118              
119             # TODO cache this string in a hash so only one obj?
120 1         14 $newof->add_choice( Lingua::Awkwords::String->new( string => '' ) );
121             }
122              
123             # filters in [VV]^aa form (as opposed to the top-level
124             # parse() VV^aa form which lack the trailing ] or ) of this
125             # code path
126             $self->maybe(
127             sub {
128             my $filters =
129 12         142 $self->sequence_of( sub { $self->expect('^'); $self->_parse_filter } );
  7         221  
  4         144  
130 12 100       507 $terms[-1]->add_filters(@$filters) if @$filters;
131             }
132 12         63 );
133 12         65 1;
134             },
135             sub {
136 97     97   6398 my $pat = $self->generic_token( 'subpattern', qr{[A-Z]} );
137 12 100       537 $self->fail("not a defined pattern")
138             if !Lingua::Awkwords::Subpattern->is_pattern($pat);
139              
140 11         169 my $ret = Lingua::Awkwords::Subpattern->new( pattern => $pat );
141 11 100       99 if ( $terms[-1] eq '' ) {
142 5         7 $terms[-1] = $ret;
143             } else {
144 6         12 push @terms, $ret;
145             }
146 11         24 1;
147             },
148             # NOTE code from these two also used in _parse_filter
149             sub {
150 86     86   5831 my $ret = ( $self->expect(qr/"([^"]*)"/) )[-1];
151 5 100       259 if ( ref $terms[-1] ) {
152 1         2 push @terms, $ret;
153             } else {
154 4         9 $terms[-1] .= $ret;
155             }
156 5         10 1;
157             },
158             sub {
159 81     81   5555 my $ret = $self->generic_token( 'other', qr{[^ "A-Z\(\)\[\]/\*\^]+} );
160 32 100       1769 if ( ref $terms[-1] ) {
161 1         2 push @terms, $ret;
162             } else {
163 31         57 $terms[-1] .= $ret;
164             }
165 32         58 1;
166             },
167             sub {
168 49     49   3412 0;
169             }
170 49         508 );
171              
172 49         709 for my $term (@terms) {
173             # TODO cache these strings so only one obj instance per str?
174 60 100       877 $term = Lingua::Awkwords::String->new( string => $term ) if !ref $term;
175             }
176              
177 49 100       2455 if ( defined $oneof ) {
178 8         117 $oneof->add_choice( Lingua::Awkwords::ListOf->new( terms => [@terms] ),
179             $weight );
180 8         18 return $oneof;
181             } else {
182 41         614 return Lingua::Awkwords::ListOf->new( terms => \@terms );
183             }
184             }
185              
186             1;
187             __END__