File Coverage

blib/lib/PPIx/Regexp/Token/Interpolation.pm
Criterion Covered Total %
statement 108 114 94.7
branch 62 90 68.8
condition 13 24 54.1
subroutine 18 19 94.7
pod 2 2 100.0
total 203 249 81.5


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PPIx::Regexp::Token::Interpolation - Represent an interpolation in the PPIx::Regexp package.
4              
5             =head1 SYNOPSIS
6              
7             use PPIx::Regexp::Dumper;
8             PPIx::Regexp::Dumper->new('qr{$foo}smx')->print();
9              
10             =head1 INHERITANCE
11              
12             C is a
13             L.
14              
15             C has no descendants.
16              
17             =head1 DESCRIPTION
18              
19             This class represents a variable interpolation into a regular
20             expression. In the L the C<$foo> would be represented by an
21             object of this class.
22              
23             =head2 Incompatible Change: Parse of '@{[ ... ]}'
24              
25             In versions 0.083 and earlier, C<'@{[ ... ]}'> parsed as a member of
26             this class. In 0.084 and later, it parses as a member of
27             C. This more accurately reflects the actual
28             contents of the token, and enables the recognition of the full range of
29             postfix dereference operators for versioning purposes, not just those
30             valid in interpolations.
31              
32             =head1 METHODS
33              
34             This class provides the following public methods beyond those provided
35             by its superclass.
36              
37             =cut
38              
39             package PPIx::Regexp::Token::Interpolation;
40              
41 9     9   69 use strict;
  9         19  
  9         259  
42 9     9   52 use warnings;
  9         16  
  9         243  
43              
44 9     9   61 use base qw{ PPIx::Regexp::Token::Code };
  9         22  
  9         781  
45              
46 9     9   60 use Carp qw{ confess };
  9         18  
  9         394  
47 9     9   60 use PPI::Document;
  9         29  
  9         237  
48 9         995 use PPIx::Regexp::Constant qw{
49             COOKIE_CLASS
50             COOKIE_REGEX_SET
51             MINIMUM_PERL
52             TOKEN_LITERAL
53             @CARP_NOT
54 9     9   47 };
  9         17  
55              
56             our $VERSION = '0.087_01';
57              
58 9     9   59 use constant VERSION_WHEN_IN_REGEX_SET => '5.017009';
  9         18  
  9         8169  
59              
60             sub __new {
61 105     105   7365 my ( $class, $content, %arg ) = @_;
62              
63             defined $arg{perl_version_introduced}
64 105 50       445 or $arg{perl_version_introduced} = MINIMUM_PERL;
65              
66 105         543 my $self = $class->SUPER::__new( $content, %arg );
67              
68 105         415 return $self;
69             }
70              
71             =head2 is_matcher
72              
73             This method returns C because a static analysis can not in
74             general tell whether a piece of code matches anything.
75              
76             =cut
77              
78 0     0 1 0 sub is_matcher { return undef; } ## no critic (ProhibitExplicitReturnUndef)
79              
80             # Return true if the token can be quantified, and false otherwise
81             # This can be quantified because it might interpolate a quantifiable
82             # token. Of course, it might not, but we need to be permissive here.
83             # sub can_be_quantified { return };
84              
85             # We overrode this in PPIx::Regexp::Token::Code, since (?{...}) did not
86             # appear until Perl 5.5. But interpolation has been there since the
87             # beginning, so we have to override again. This turns out to be OK,
88             # though, because while Regex Sets were introduced in 5.17.8,
89             # interpolation inside them was not introduced until 5.17.9.
90             sub perl_version_introduced {
91 17     17 1 1804 my ( $self ) = @_;
92 17         54 return $self->{perl_version_introduced};
93             }
94              
95             # Normalize the content of an interpolation object before making it into
96             # a PPI document. The issue here is that things like ${x} are at least
97             # warnings outside strings, but are normal inside them.
98             sub __ppi_normalize_content {
99 115     115   673 my ( $self ) = @_;
100 115         212 my $content;
101 115 50       348 defined( $content = $self->content() )
102             or return $content;
103             # NOTE: perldata gives a regexp for this, but it requires Perl 5.10.
104             # I believe the following caputures the intent, except possibly for
105             # various weird combinations of '::' and "'".
106 115         565 $content =~
107             s/ \A
108             ( \$ \# \$* | [\@\$] \$* ) # Sigil and possible casts
109             [{] \s* (?: :: )* '? # per perldata
110             ( ^? (?: \w+ (?: (?: :: | ' ) \w+ )* (?: :: )? | [[:punct:]] ) )
111             \s* [}] \z
112             /$1$2/smx;
113 115         370 return $content;
114             }
115              
116             # Match the beginning of an interpolation.
117              
118             my $interp_re =
119             qr{ \A (?= [\@\$]? \$ [-\w&`'+^./\\";%=~:?!\@\$<>\[\]\{\},#] |
120             \@ [\w\{] )
121             }smx;
122              
123             # Match bracketed interpolation
124              
125             my $brkt_interp_re =
126             qr{ \A (?: [\@\$] \$* [#]? \$* [\{] (?: [][\-&`'+,^./\\";%=:?\@\$<>,#] |
127             \^? \w+ (?: :: \w+ )* ) [\}] |
128             \@ [\{] \w+ (?: :: \w+ )* [\}] )
129             }smx;
130              
131             # We pull out the logic of finding and dealing with the interpolation
132             # into a separate subroutine because if we fail to find an interpolation
133             # we want to do something with the sigils.
134              
135             my %allow_subscript_based_on_cast_symbol = (
136             q<$#> => 0,
137             q<$> => 1,
138             q<@> => 1,
139             );
140              
141             sub _interpolation {
142 110     110   359 my ( $class, $tokenizer, undef, $in_regexp ) = @_; # $character unused
143              
144             # If the regexp does not interpolate, bail now.
145 110 100       354 $tokenizer->interpolates() or return;
146              
147             # If we're a bracketed interpolation, just accept it
148 108 100       431 if ( my $len = $tokenizer->find_regexp( $brkt_interp_re ) ) {
149 8         61 return $len;
150             }
151              
152             # Make sure we start off plausibly
153 100 100       319 defined $tokenizer->find_regexp( $interp_re )
154             or return;
155              
156             # See if PPI can figure out what we have
157 83 50       433 my $doc = $tokenizer->ppi_document()
158             or return;
159              
160             # Get the first statement to work on.
161 83 50       136099 my $stmt = $doc->find_first( 'PPI::Statement' )
162             or return;
163              
164 83         18287 my @accum; # The elements of the interpolation
165             my $allow_subscript; # Assume no subscripts allowed
166 83         219 my $want_class = __PACKAGE__; # Assume we want an interpolation.
167              
168             # Find the beginning of the interpolation
169 83 50       286 my $next = $stmt->schild( 0 ) or return;
170              
171             # The interpolation should start with
172 83 100       1558 if ( $next->isa( 'PPI::Token::Symbol' ) ) {
    100          
    50          
173              
174             # A symbol
175 73         194 push @accum, $next;
176 73         146 $allow_subscript = 1; # Subscripts are allowed
177              
178             } elsif ( $next->isa( 'PPI::Token::Cast' ) ) {
179              
180             # Or a cast followed by a block
181 8         31 push @accum, $next;
182 8 50       39 $next = $next->next_sibling() or return;
183 8 100       275 if ( $next->isa( 'PPI::Token::Symbol' ) ) {
    50          
184             defined (
185             $allow_subscript =
186             $allow_subscript_based_on_cast_symbol{
187 6 50       28 $accum[-1]->content()
188             }
189             ) or return;
190 6         46 push @accum, $next;
191             } elsif ( $next->isa( 'PPI::Structure::Block' ) ) {
192             # We want @{[ ... ]} to parse as a PPIx::Regexp::Token::Code.
193             # PPI parses this as a cast followed by a block. The block
194             # contains a single statement, which contains a single
195             # constructor. So:
196 2         9 my @kids = $next->schildren();
197 2 50 33     34 if ( @kids == 1 && $kids[0]->isa( 'PPI::Statement' ) ) {
198 2         47 @kids = $kids[0]->schildren();
199 2 50 33     36 if ( @kids == 1 &&
      33        
200             $kids[0]->isa( 'PPI::Structure::Constructor' ) &&
201             $kids[0]->start() eq '[' ) {
202 2         49 $want_class = 'PPIx::Regexp::Token::Code';
203             }
204             }
205 2         6 push @accum, $next;
206             } else {
207 0         0 return;
208             }
209              
210             } elsif ( $next->isa( 'PPI::Token::ArrayIndex' ) ) {
211              
212             # Or an array index
213 2         6 push @accum, $next;
214              
215             } else {
216              
217             # None others need apply.
218 0         0 return;
219              
220             }
221              
222             # The interpolation _may_ be subscripted. If so ...
223             {
224              
225             # Only accept a subscript if wanted and available
226 83 100 100     132 $allow_subscript and $next = $next->snext_sibling() or last;
  106         547  
227              
228             # Accept an optional dereference operator.
229 63         1829 my @subscr;
230 63 100       298 if ( $next->isa( 'PPI::Token::Operator' ) ) {
231 18 100       69 $next->content() eq '->' or last;
232 4         23 push @subscr, $next;
233 4 50       16 $next = $next->next_sibling() or last;
234              
235             # postderef was introduced in 5.19.5, per perl5195delta.
236 4 50       95 if ( my $deref = $tokenizer->__recognize_postderef(
237             __PACKAGE__, $next ) ) {
238 4         56 push @accum, @subscr, $deref;
239 4         9 last;
240             }
241             }
242              
243             # Accept only a subscript
244 45 100       150 $next->isa( 'PPI::Structure::Subscript' ) or last;
245              
246             # The subscript must have a closing delimiter.
247 25 50       100 $next->finish() or last;
248              
249             # If we are in a regular expression rather than a replacement
250             # string, screen the subscript for content, since [] could be a
251             # character class, and {} could be a quantifier. The perlop docs
252             # say that Perl applies undocumented heuristics subject to
253             # change without notice to figure this out. So we do our poor
254             # best to be heuristical and undocumented.
255 25 100 100     217 not $in_regexp or $class->_subscript( $next ) or last;
256              
257             # If we got this far, accept the subscript and try for another
258             # one.
259 23         78 push @accum, @subscr, $next;
260 23         43 redo;
261             }
262              
263             # Compute the length of all the PPI elements accumulated, and return
264             # it.
265 83         1527 my $length = 0;
266 83         210 foreach ( @accum ) {
267 122 100       728 $length += ref $_ ? length $_->content() : $_;
268             }
269 83         1426 return ( $length, $want_class );
270             }
271              
272             {
273 9     9   79 no warnings qw{ qw }; ## no critic (ProhibitNoWarnings)
  9         29  
  9         4903  
274              
275             my %accept = map { $_ => 1 } qw{ $ $# @ };
276              
277             sub __postderef_accept_cast {
278 108     108   312 return \%accept;
279             }
280             }
281              
282             {
283              
284             my %allowed = (
285             '[' => '_square',
286             '{' => '_curly',
287             );
288              
289             sub _subscript {
290 21     21   54 my ( $class, $struct ) = @_;
291              
292             # We expect to have a left delimiter, which is either a '[' or a
293             # '{'.
294 21 50       55 my $left = $struct->start() or return;
295 21         134 my $lc = $left->content();
296 21 50       119 my $handler = $allowed{$lc} or return;
297              
298             # We expect a single child, which is a PPI::Statement
299 21 50       69 ( my @kids = $struct->schildren() ) == 1 or return;
300 21 50       299 $kids[0]->isa( 'PPI::Statement' ) or return;
301              
302             # We expect the statement to have at least one child.
303 21 50       64 ( @kids = $kids[0]->schildren() ) or return;
304              
305 21         236 return $class->$handler( @kids );
306              
307             }
308              
309             }
310              
311             # Return true if we think a curly-bracketed subscript is really a
312             # subscript, rather than a quantifier.
313             # Called as $class->$handler( ... ) above
314             sub _curly { ## no critic (ProhibitUnusedPrivateSubroutines)
315 17     17   51 my ( undef, @kids ) = @_; # Invocant unused
316              
317             # If the first child is a word, and either it is an only child or
318             # the next child is the fat comma operator, we accept it as a
319             # subscript.
320 17 100       69 if ( $kids[0]->isa( 'PPI::Token::Word' ) ) {
321 14 100       62 @kids == 1 and return 1;
322 4 50 33     24 $kids[1]->isa( 'PPI::Token::Operator' )
323             and $kids[1]->content() eq '=>'
324             and return 1;
325             }
326              
327             # If the first child is a symbol,
328 3 100 66     34 if ( @kids && $kids[0]->isa( 'PPI::Token::Symbol' ) ) {
329             # Accept it if it is the only child
330 1 50       5 @kids == 1
331             and return 1;
332             # Accept it if there are exactly two children and the second is
333             # a subscript.
334 1 50 33     26 @kids == 2
335             and $kids[1]->isa( 'PPI::Structure::Subscript' )
336             and return 1;
337             }
338              
339             # We reject anything else.
340 2         10 return;
341             }
342              
343             # Return true if we think a square-bracketed subscript is really a
344             # subscript, rather than a character class.
345             # Called as $class->$handler( ... ) above
346             sub _square { ## no critic (ProhibitUnusedPrivateSubroutines)
347 4     4   10 my ( undef, @kids ) = @_; # Invocant unused
348              
349             # We expect to have either a number or a symbol as the first
350             # element.
351 4 50       25 $kids[0]->isa( 'PPI::Token::Number' ) and return 1;
352 0 0       0 $kids[0]->isa( 'PPI::Token::Symbol' ) and return 1;
353              
354             # Anything else is rejected.
355 0         0 return;
356             }
357              
358             # Alternate classes for the sigils, depending on whether we are in a
359             # character class (index 1) or not (index 0).
360             my %sigil_alternate = (
361             '$' => [ 'PPIx::Regexp::Token::Assertion', TOKEN_LITERAL ],
362             '@' => [ TOKEN_LITERAL, TOKEN_LITERAL ],
363             );
364              
365             sub __PPIX_TOKENIZER__regexp {
366 1434     1434   3587 my ( $class, $tokenizer, $character ) = @_;
367              
368 1434 100       4660 exists $sigil_alternate{$character} or return;
369              
370 105 100       458 if ( my ( $accept, $want_class ) =
371             $class->_interpolation( $tokenizer, $character, 1 )
372             ) {
373 86         7188 return $tokenizer->make_token( $accept, $want_class );
374             }
375              
376 19 50       76 my $alternate = $sigil_alternate{$character} or return;
377 19 50       71 return $tokenizer->make_token(
378             1, $alternate->[$tokenizer->cookie( COOKIE_CLASS ) ? 1 : 0 ] );
379              
380             }
381              
382             sub __PPIX_TOKENIZER__repl {
383 18     18   101 my ( $class, $tokenizer, $character ) = @_;
384              
385 18 100       66 exists $sigil_alternate{$character} or return;
386              
387 5 50       24 if ( my ( $accept, $want_class ) =
388             $class->_interpolation( $tokenizer, $character, 0 ) ) {
389 5         374 return $tokenizer->make_token( $accept, $want_class );
390             }
391              
392 0           return $tokenizer->make_token( 1, TOKEN_LITERAL );
393              
394             }
395              
396             1;
397              
398             __END__