File Coverage

blib/lib/PPIx/Regexp/Token/Interpolation.pm
Criterion Covered Total %
statement 108 114 94.7
branch 62 90 68.8
condition 13 24 54.1
subroutine 18 19 94.7
pod 2 2 100.0
total 203 249 81.5


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PPIx::Regexp::Token::Interpolation - Represent an interpolation in the PPIx::Regexp package.
4              
5             =head1 SYNOPSIS
6              
7             use PPIx::Regexp::Dumper;
8             PPIx::Regexp::Dumper->new('qr{$foo}smx')->print();
9              
10             =head1 INHERITANCE
11              
12             C is a
13             L.
14              
15             C has no descendants.
16              
17             =head1 DESCRIPTION
18              
19             This class represents a variable interpolation into a regular
20             expression. In the L the C<$foo> would be represented by an
21             object of this class.
22              
23             =head2 Incompatible Change: Parse of '@{[ ... ]}'
24              
25             In versions 0.083 and earlier, C<'@{[ ... ]}'> parsed as a member of
26             this class. In 0.084 and later, it parses as a member of
27             C. This more accurately reflects the actual
28             contents of the token, and enables the recognition of the full range of
29             postfix dereference operators for versioning purposes, not just those
30             valid in interpolations.
31              
32             =head1 METHODS
33              
34             This class provides the following public methods beyond those provided
35             by its superclass.
36              
37             =cut
38              
39             package PPIx::Regexp::Token::Interpolation;
40              
41 9     9   67 use strict;
  9         18  
  9         262  
42 9     9   44 use warnings;
  9         19  
  9         240  
43              
44 9     9   47 use base qw{ PPIx::Regexp::Token::Code };
  9         19  
  9         763  
45              
46 9     9   56 use Carp qw{ confess };
  9         19  
  9         425  
47 9     9   62 use PPI::Document;
  9         19  
  9         245  
48 9         977 use PPIx::Regexp::Constant qw{
49             COOKIE_CLASS
50             COOKIE_REGEX_SET
51             MINIMUM_PERL
52             TOKEN_LITERAL
53             @CARP_NOT
54 9     9   43 };
  9         23  
55              
56             our $VERSION = '0.088';
57              
58 9     9   66 use constant VERSION_WHEN_IN_REGEX_SET => '5.017009';
  9         20  
  9         7653  
59              
60             sub __new {
61 105     105   7179 my ( $class, $content, %arg ) = @_;
62              
63             defined $arg{perl_version_introduced}
64 105 50       466 or $arg{perl_version_introduced} = MINIMUM_PERL;
65              
66 105         602 my $self = $class->SUPER::__new( $content, %arg );
67              
68 105         501 return $self;
69             }
70              
71             =head2 is_matcher
72              
73             This method returns C because a static analysis can not in
74             general tell whether a piece of code matches anything.
75              
76             =cut
77              
78 0     0 1 0 sub is_matcher { return undef; } ## no critic (ProhibitExplicitReturnUndef)
79              
80             # Return true if the token can be quantified, and false otherwise
81             # This can be quantified because it might interpolate a quantifiable
82             # token. Of course, it might not, but we need to be permissive here.
83             # sub can_be_quantified { return };
84              
85             # We overrode this in PPIx::Regexp::Token::Code, since (?{...}) did not
86             # appear until Perl 5.5. But interpolation has been there since the
87             # beginning, so we have to override again. This turns out to be OK,
88             # though, because while Regex Sets were introduced in 5.17.8,
89             # interpolation inside them was not introduced until 5.17.9.
90             sub perl_version_introduced {
91 17     17 1 1550 my ( $self ) = @_;
92 17         56 return $self->{perl_version_introduced};
93             }
94              
95             # Normalize the content of an interpolation object before making it into
96             # a PPI document. The issue here is that things like ${x} are at least
97             # warnings outside strings, but are normal inside them.
98             sub __ppi_normalize_content {
99 115     115   252 my ( $self ) = @_;
100 115         189 my $content;
101 115 50       301 defined( $content = $self->content() )
102             or return $content;
103             # NOTE: perldata gives a regexp for this, but it requires Perl 5.10.
104             # I believe the following caputures the intent, except possibly for
105             # various weird combinations of '::' and "'".
106 115         527 $content =~
107             s/ \A
108             ( \$ \# \$* | [\@\$] \$* ) # Sigil and possible casts
109             [{] \s* (?: :: )* '? # per perldata
110             ( ^? (?: \w+ (?: (?: :: | ' ) \w+ )* (?: :: )? | [[:punct:]] ) )
111             \s* [}] \z
112             /$1$2/smx;
113 115         354 return $content;
114             }
115              
116             # Match the beginning of an interpolation.
117              
118             my $interp_re =
119             qr{ \A (?= [\@\$]? \$ [-\w&`'+^./\\";%=~:?!\@\$<>\[\]\{\},#] |
120             \@ [\w\{] )
121             }smx;
122              
123             # Match bracketed interpolation
124              
125             my $brkt_interp_re =
126             qr{ \A (?: [\@\$] \$* [#]? \$* [\{] (?: [][\-&`'+,^./\\";%=:?\@\$<>,#] |
127             \^? \w+ (?: :: \w+ )* ) [\}] |
128             \@ [\{] \w+ (?: :: \w+ )* [\}] )
129             }smx;
130              
131             # We pull out the logic of finding and dealing with the interpolation
132             # into a separate subroutine because if we fail to find an interpolation
133             # we want to do something with the sigils.
134              
135             my %allow_subscript_based_on_cast_symbol = (
136             q<$#> => 0,
137             q<$> => 1,
138             q<@> => 1,
139             );
140              
141             sub _interpolation {
142 110     110   346 my ( $class, $tokenizer, undef, $in_regexp ) = @_; # $character unused
143              
144             # If the regexp does not interpolate, bail now.
145 110 100       373 $tokenizer->interpolates() or return;
146              
147             # If we're a bracketed interpolation, just accept it
148 108 100       439 if ( my $len = $tokenizer->find_regexp( $brkt_interp_re ) ) {
149 8         48 return $len;
150             }
151              
152             # Make sure we start off plausibly
153 100 100       324 defined $tokenizer->find_regexp( $interp_re )
154             or return;
155              
156             # See if PPI can figure out what we have
157 83 50       412 my $doc = $tokenizer->ppi_document()
158             or return;
159              
160             # Get the first statement to work on.
161 83 50       140275 my $stmt = $doc->find_first( 'PPI::Statement' )
162             or return;
163              
164 83         18257 my @accum; # The elements of the interpolation
165             my $allow_subscript; # Assume no subscripts allowed
166 83         201 my $want_class = __PACKAGE__; # Assume we want an interpolation.
167              
168             # Find the beginning of the interpolation
169 83 50       272 my $next = $stmt->schild( 0 ) or return;
170              
171             # The interpolation should start with
172 83 100       1627 if ( $next->isa( 'PPI::Token::Symbol' ) ) {
    100          
    50          
173              
174             # A symbol
175 73         175 push @accum, $next;
176 73         164 $allow_subscript = 1; # Subscripts are allowed
177              
178             } elsif ( $next->isa( 'PPI::Token::Cast' ) ) {
179              
180             # Or a cast followed by a block
181 8         22 push @accum, $next;
182 8 50       41 $next = $next->next_sibling() or return;
183 8 100       272 if ( $next->isa( 'PPI::Token::Symbol' ) ) {
    50          
184             defined (
185             $allow_subscript =
186             $allow_subscript_based_on_cast_symbol{
187 6 50       22 $accum[-1]->content()
188             }
189             ) or return;
190 6         46 push @accum, $next;
191             } elsif ( $next->isa( 'PPI::Structure::Block' ) ) {
192             # We want @{[ ... ]} to parse as a PPIx::Regexp::Token::Code.
193             # PPI parses this as a cast followed by a block. The block
194             # contains a single statement, which contains a single
195             # constructor. So:
196 2         12 my @kids = $next->schildren();
197 2 50 33     42 if ( @kids == 1 && $kids[0]->isa( 'PPI::Statement' ) ) {
198 2         9 @kids = $kids[0]->schildren();
199 2 50 33     37 if ( @kids == 1 &&
      33        
200             $kids[0]->isa( 'PPI::Structure::Constructor' ) &&
201             $kids[0]->start() eq '[' ) {
202 2         56 $want_class = 'PPIx::Regexp::Token::Code';
203             }
204             }
205 2         7 push @accum, $next;
206             } else {
207 0         0 return;
208             }
209              
210             } elsif ( $next->isa( 'PPI::Token::ArrayIndex' ) ) {
211              
212             # Or an array index
213 2         6 push @accum, $next;
214              
215             } else {
216              
217             # None others need apply.
218 0         0 return;
219              
220             }
221              
222             # The interpolation _may_ be subscripted. If so ...
223             {
224              
225             # Only accept a subscript if wanted and available
226 83 100 100     158 $allow_subscript and $next = $next->snext_sibling() or last;
  106         564  
227              
228             # Accept an optional dereference operator.
229 63         1978 my @subscr;
230 63 100       298 if ( $next->isa( 'PPI::Token::Operator' ) ) {
231 18 100       77 $next->content() eq '->' or last;
232 4         28 push @subscr, $next;
233 4 50       20 $next = $next->next_sibling() or last;
234              
235             # postderef was introduced in 5.19.5, per perl5195delta.
236 4 50       98 if ( my $deref = $tokenizer->__recognize_postderef(
237             __PACKAGE__, $next ) ) {
238 4         57 push @accum, @subscr, $deref;
239 4         8 last;
240             }
241             }
242              
243             # Accept only a subscript
244 45 100       178 $next->isa( 'PPI::Structure::Subscript' ) or last;
245              
246             # The subscript must have a closing delimiter.
247 25 50       113 $next->finish() or last;
248              
249             # If we are in a regular expression rather than a replacement
250             # string, screen the subscript for content, since [] could be a
251             # character class, and {} could be a quantifier. The perlop docs
252             # say that Perl applies undocumented heuristics subject to
253             # change without notice to figure this out. So we do our poor
254             # best to be heuristical and undocumented.
255 25 100 100     228 not $in_regexp or $class->_subscript( $next ) or last;
256              
257             # If we got this far, accept the subscript and try for another
258             # one.
259 23         92 push @accum, @subscr, $next;
260 23         40 redo;
261             }
262              
263             # Compute the length of all the PPI elements accumulated, and return
264             # it.
265 83         1230 my $length = 0;
266 83         228 foreach ( @accum ) {
267 122 100       683 $length += ref $_ ? length $_->content() : $_;
268             }
269 83         1514 return ( $length, $want_class );
270             }
271              
272             {
273 9     9   77 no warnings qw{ qw }; ## no critic (ProhibitNoWarnings)
  9         20  
  9         4726  
274              
275             my %accept = map { $_ => 1 } qw{ $ $# @ };
276              
277             sub __postderef_accept_cast {
278 108     108   346 return \%accept;
279             }
280             }
281              
282             {
283              
284             my %allowed = (
285             '[' => '_square',
286             '{' => '_curly',
287             );
288              
289             sub _subscript {
290 21     21   56 my ( $class, $struct ) = @_;
291              
292             # We expect to have a left delimiter, which is either a '[' or a
293             # '{'.
294 21 50       60 my $left = $struct->start() or return;
295 21         135 my $lc = $left->content();
296 21 50       113 my $handler = $allowed{$lc} or return;
297              
298             # We expect a single child, which is a PPI::Statement
299 21 50       78 ( my @kids = $struct->schildren() ) == 1 or return;
300 21 50       301 $kids[0]->isa( 'PPI::Statement' ) or return;
301              
302             # We expect the statement to have at least one child.
303 21 50       62 ( @kids = $kids[0]->schildren() ) or return;
304              
305 21         247 return $class->$handler( @kids );
306              
307             }
308              
309             }
310              
311             # Return true if we think a curly-bracketed subscript is really a
312             # subscript, rather than a quantifier.
313             # Called as $class->$handler( ... ) above
314             sub _curly { ## no critic (ProhibitUnusedPrivateSubroutines)
315 17     17   56 my ( undef, @kids ) = @_; # Invocant unused
316              
317             # If the first child is a word, and either it is an only child or
318             # the next child is the fat comma operator, we accept it as a
319             # subscript.
320 17 100       78 if ( $kids[0]->isa( 'PPI::Token::Word' ) ) {
321 14 100       64 @kids == 1 and return 1;
322 4 50 33     27 $kids[1]->isa( 'PPI::Token::Operator' )
323             and $kids[1]->content() eq '=>'
324             and return 1;
325             }
326              
327             # If the first child is a symbol,
328 3 100 66     27 if ( @kids && $kids[0]->isa( 'PPI::Token::Symbol' ) ) {
329             # Accept it if it is the only child
330 1 50       3 @kids == 1
331             and return 1;
332             # Accept it if there are exactly two children and the second is
333             # a subscript.
334 1 50 33     19 @kids == 2
335             and $kids[1]->isa( 'PPI::Structure::Subscript' )
336             and return 1;
337             }
338              
339             # We reject anything else.
340 2         10 return;
341             }
342              
343             # Return true if we think a square-bracketed subscript is really a
344             # subscript, rather than a character class.
345             # Called as $class->$handler( ... ) above
346             sub _square { ## no critic (ProhibitUnusedPrivateSubroutines)
347 4     4   11 my ( undef, @kids ) = @_; # Invocant unused
348              
349             # We expect to have either a number or a symbol as the first
350             # element.
351 4 50       32 $kids[0]->isa( 'PPI::Token::Number' ) and return 1;
352 0 0       0 $kids[0]->isa( 'PPI::Token::Symbol' ) and return 1;
353              
354             # Anything else is rejected.
355 0         0 return;
356             }
357              
358             # Alternate classes for the sigils, depending on whether we are in a
359             # character class (index 1) or not (index 0).
360             my %sigil_alternate = (
361             '$' => [ 'PPIx::Regexp::Token::Assertion', TOKEN_LITERAL ],
362             '@' => [ TOKEN_LITERAL, TOKEN_LITERAL ],
363             );
364              
365             sub __PPIX_TOKENIZER__regexp {
366 1434     1434   3472 my ( $class, $tokenizer, $character ) = @_;
367              
368 1434 100       4574 exists $sigil_alternate{$character} or return;
369              
370 105 100       529 if ( my ( $accept, $want_class ) =
371             $class->_interpolation( $tokenizer, $character, 1 )
372             ) {
373 86         7302 return $tokenizer->make_token( $accept, $want_class );
374             }
375              
376 19 50       75 my $alternate = $sigil_alternate{$character} or return;
377 19 50       71 return $tokenizer->make_token(
378             1, $alternate->[$tokenizer->cookie( COOKIE_CLASS ) ? 1 : 0 ] );
379              
380             }
381              
382             sub __PPIX_TOKENIZER__repl {
383 18     18   62 my ( $class, $tokenizer, $character ) = @_;
384              
385 18 100       71 exists $sigil_alternate{$character} or return;
386              
387 5 50       25 if ( my ( $accept, $want_class ) =
388             $class->_interpolation( $tokenizer, $character, 0 ) ) {
389 5         397 return $tokenizer->make_token( $accept, $want_class );
390             }
391              
392 0           return $tokenizer->make_token( 1, TOKEN_LITERAL );
393              
394             }
395              
396             1;
397              
398             __END__