File Coverage

blib/lib/PPIx/Regexp/Token/Interpolation.pm
Criterion Covered Total %
statement 108 114 94.7
branch 62 90 68.8
condition 13 24 54.1
subroutine 18 19 94.7
pod 2 2 100.0
total 203 249 81.5


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PPIx::Regexp::Token::Interpolation - Represent an interpolation in the PPIx::Regexp package.
4              
5             =head1 SYNOPSIS
6              
7             use PPIx::Regexp::Dumper;
8             PPIx::Regexp::Dumper->new('qr{$foo}smx')->print();
9              
10             =head1 INHERITANCE
11              
12             C is a
13             L.
14              
15             C has no descendants.
16              
17             =head1 DESCRIPTION
18              
19             This class represents a variable interpolation into a regular
20             expression. In the L the C<$foo> would be represented by an
21             object of this class.
22              
23             =head2 Incompatible Change: Parse of '@{[ ... ]}'
24              
25             In versions 0.083 and earlier, C<'@{[ ... ]}'> parsed as a member of
26             this class. In 0.084 and later, it parses as a member of
27             C. This more accurately reflects the actual
28             contents of the token, and enables the recognition of the full range of
29             postfix dereference operators for versioning purposes, not just those
30             valid in interpolations.
31              
32             =head1 METHODS
33              
34             This class provides the following public methods beyond those provided
35             by its superclass.
36              
37             =cut
38              
39             package PPIx::Regexp::Token::Interpolation;
40              
41 9     9   66 use strict;
  9         19  
  9         284  
42 9     9   47 use warnings;
  9         23  
  9         248  
43              
44 9     9   48 use base qw{ PPIx::Regexp::Token::Code };
  9         28  
  9         812  
45              
46 9     9   68 use Carp qw{ confess };
  9         17  
  9         367  
47 9     9   51 use PPI::Document;
  9         22  
  9         241  
48 9         1004 use PPIx::Regexp::Constant qw{
49             COOKIE_CLASS
50             COOKIE_REGEX_SET
51             MINIMUM_PERL
52             TOKEN_LITERAL
53             @CARP_NOT
54 9     9   56 };
  9         17  
55              
56             our $VERSION = '0.087';
57              
58 9     9   61 use constant VERSION_WHEN_IN_REGEX_SET => '5.017009';
  9         19  
  9         7607  
59              
60             sub __new {
61 105     105   7526 my ( $class, $content, %arg ) = @_;
62              
63             defined $arg{perl_version_introduced}
64 105 50       482 or $arg{perl_version_introduced} = MINIMUM_PERL;
65              
66 105         651 my $self = $class->SUPER::__new( $content, %arg );
67              
68 105         428 return $self;
69             }
70              
71             =head2 is_matcher
72              
73             This method returns C because a static analysis can not in
74             general tell whether a piece of code matches anything.
75              
76             =cut
77              
78 0     0 1 0 sub is_matcher { return undef; } ## no critic (ProhibitExplicitReturnUndef)
79              
80             # Return true if the token can be quantified, and false otherwise
81             # This can be quantified because it might interpolate a quantifiable
82             # token. Of course, it might not, but we need to be permissive here.
83             # sub can_be_quantified { return };
84              
85             # We overrode this in PPIx::Regexp::Token::Code, since (?{...}) did not
86             # appear until Perl 5.5. But interpolation has been there since the
87             # beginning, so we have to override again. This turns out to be OK,
88             # though, because while Regex Sets were introduced in 5.17.8,
89             # interpolation inside them was not introduced until 5.17.9.
90             sub perl_version_introduced {
91 17     17 1 1632 my ( $self ) = @_;
92 17         53 return $self->{perl_version_introduced};
93             }
94              
95             # Normalize the content of an interpolation object before making it into
96             # a PPI document. The issue here is that things like ${x} are at least
97             # warnings outside strings, but are normal inside them.
98             sub __ppi_normalize_content {
99 115     115   245 my ( $self ) = @_;
100 115         195 my $content;
101 115 50       333 defined( $content = $self->content() )
102             or return $content;
103             # NOTE: perldata gives a regexp for this, but it requires Perl 5.10.
104             # I believe the following caputures the intent, except possibly for
105             # various weird combinations of '::' and "'".
106 115         560 $content =~
107             s/ \A
108             ( \$ \# \$* | [\@\$] \$* ) # Sigil and possible casts
109             [{] \s* (?: :: )* '? # per perldata
110             ( ^? (?: \w+ (?: (?: :: | ' ) \w+ )* (?: :: )? | [[:punct:]] ) )
111             \s* [}] \z
112             /$1$2/smx;
113 115         351 return $content;
114             }
115              
116             # Match the beginning of an interpolation.
117              
118             my $interp_re =
119             qr{ \A (?= [\@\$]? \$ [-\w&`'+^./\\";%=~:?!\@\$<>\[\]\{\},#] |
120             \@ [\w\{] )
121             }smx;
122              
123             # Match bracketed interpolation
124              
125             my $brkt_interp_re =
126             qr{ \A (?: [\@\$] \$* [#]? \$* [\{] (?: [][\-&`'+,^./\\";%=:?\@\$<>,#] |
127             \^? \w+ (?: :: \w+ )* ) [\}] |
128             \@ [\{] \w+ (?: :: \w+ )* [\}] )
129             }smx;
130              
131             # We pull out the logic of finding and dealing with the interpolation
132             # into a separate subroutine because if we fail to find an interpolation
133             # we want to do something with the sigils.
134              
135             my %allow_subscript_based_on_cast_symbol = (
136             q<$#> => 0,
137             q<$> => 1,
138             q<@> => 1,
139             );
140              
141             sub _interpolation {
142 110     110   436 my ( $class, $tokenizer, undef, $in_regexp ) = @_; # $character unused
143              
144             # If the regexp does not interpolate, bail now.
145 110 100       362 $tokenizer->interpolates() or return;
146              
147             # If we're a bracketed interpolation, just accept it
148 108 100       462 if ( my $len = $tokenizer->find_regexp( $brkt_interp_re ) ) {
149 8         64 return $len;
150             }
151              
152             # Make sure we start off plausibly
153 100 100       374 defined $tokenizer->find_regexp( $interp_re )
154             or return;
155              
156             # See if PPI can figure out what we have
157 83 50       443 my $doc = $tokenizer->ppi_document()
158             or return;
159              
160             # Get the first statement to work on.
161 83 50       144327 my $stmt = $doc->find_first( 'PPI::Statement' )
162             or return;
163              
164 83         19916 my @accum; # The elements of the interpolation
165             my $allow_subscript; # Assume no subscripts allowed
166 83         237 my $want_class = __PACKAGE__; # Assume we want an interpolation.
167              
168             # Find the beginning of the interpolation
169 83 50       357 my $next = $stmt->schild( 0 ) or return;
170              
171             # The interpolation should start with
172 83 100       1727 if ( $next->isa( 'PPI::Token::Symbol' ) ) {
    100          
    50          
173              
174             # A symbol
175 73         218 push @accum, $next;
176 73         193 $allow_subscript = 1; # Subscripts are allowed
177              
178             } elsif ( $next->isa( 'PPI::Token::Cast' ) ) {
179              
180             # Or a cast followed by a block
181 8         25 push @accum, $next;
182 8 50       38 $next = $next->next_sibling() or return;
183 8 100       288 if ( $next->isa( 'PPI::Token::Symbol' ) ) {
    50          
184             defined (
185             $allow_subscript =
186             $allow_subscript_based_on_cast_symbol{
187 6 50       32 $accum[-1]->content()
188             }
189             ) or return;
190 6         49 push @accum, $next;
191             } elsif ( $next->isa( 'PPI::Structure::Block' ) ) {
192             # We want @{[ ... ]} to parse as a PPIx::Regexp::Token::Code.
193             # PPI parses this as a cast followed by a block. The block
194             # contains a single statement, which contains a single
195             # constructor. So:
196 2         13 my @kids = $next->schildren();
197 2 50 33     38 if ( @kids == 1 && $kids[0]->isa( 'PPI::Statement' ) ) {
198 2         12 @kids = $kids[0]->schildren();
199 2 50 33     42 if ( @kids == 1 &&
      33        
200             $kids[0]->isa( 'PPI::Structure::Constructor' ) &&
201             $kids[0]->start() eq '[' ) {
202 2         66 $want_class = 'PPIx::Regexp::Token::Code';
203             }
204             }
205 2         8 push @accum, $next;
206             } else {
207 0         0 return;
208             }
209              
210             } elsif ( $next->isa( 'PPI::Token::ArrayIndex' ) ) {
211              
212             # Or an array index
213 2         9 push @accum, $next;
214              
215             } else {
216              
217             # None others need apply.
218 0         0 return;
219              
220             }
221              
222             # The interpolation _may_ be subscripted. If so ...
223             {
224              
225             # Only accept a subscript if wanted and available
226 83 100 100     175 $allow_subscript and $next = $next->snext_sibling() or last;
  106         613  
227              
228             # Accept an optional dereference operator.
229 63         1877 my @subscr;
230 63 100       316 if ( $next->isa( 'PPI::Token::Operator' ) ) {
231 18 100       92 $next->content() eq '->' or last;
232 4         23 push @subscr, $next;
233 4 50       15 $next = $next->next_sibling() or last;
234              
235             # postderef was introduced in 5.19.5, per perl5195delta.
236 4 50       112 if ( my $deref = $tokenizer->__recognize_postderef(
237             __PACKAGE__, $next ) ) {
238 4         64 push @accum, @subscr, $deref;
239 4         9 last;
240             }
241             }
242              
243             # Accept only a subscript
244 45 100       160 $next->isa( 'PPI::Structure::Subscript' ) or last;
245              
246             # The subscript must have a closing delimiter.
247 25 50       114 $next->finish() or last;
248              
249             # If we are in a regular expression rather than a replacement
250             # string, screen the subscript for content, since [] could be a
251             # character class, and {} could be a quantifier. The perlop docs
252             # say that Perl applies undocumented heuristics subject to
253             # change without notice to figure this out. So we do our poor
254             # best to be heuristical and undocumented.
255 25 100 100     223 not $in_regexp or $class->_subscript( $next ) or last;
256              
257             # If we got this far, accept the subscript and try for another
258             # one.
259 23         82 push @accum, @subscr, $next;
260 23         42 redo;
261             }
262              
263             # Compute the length of all the PPI elements accumulated, and return
264             # it.
265 83         1129 my $length = 0;
266 83         247 foreach ( @accum ) {
267 122 100       660 $length += ref $_ ? length $_->content() : $_;
268             }
269 83         1624 return ( $length, $want_class );
270             }
271              
272             {
273 9     9   77 no warnings qw{ qw }; ## no critic (ProhibitNoWarnings)
  9         21  
  9         4518  
274              
275             my %accept = map { $_ => 1 } qw{ $ $# @ };
276              
277             sub __postderef_accept_cast {
278 108     108   311 return \%accept;
279             }
280             }
281              
282             {
283              
284             my %allowed = (
285             '[' => '_square',
286             '{' => '_curly',
287             );
288              
289             sub _subscript {
290 21     21   50 my ( $class, $struct ) = @_;
291              
292             # We expect to have a left delimiter, which is either a '[' or a
293             # '{'.
294 21 50       56 my $left = $struct->start() or return;
295 21         138 my $lc = $left->content();
296 21 50       120 my $handler = $allowed{$lc} or return;
297              
298             # We expect a single child, which is a PPI::Statement
299 21 50       70 ( my @kids = $struct->schildren() ) == 1 or return;
300 21 50       277 $kids[0]->isa( 'PPI::Statement' ) or return;
301              
302             # We expect the statement to have at least one child.
303 21 50       59 ( @kids = $kids[0]->schildren() ) or return;
304              
305 21         247 return $class->$handler( @kids );
306              
307             }
308              
309             }
310              
311             # Return true if we think a curly-bracketed subscript is really a
312             # subscript, rather than a quantifier.
313             # Called as $class->$handler( ... ) above
314             sub _curly { ## no critic (ProhibitUnusedPrivateSubroutines)
315 17     17   65 my ( undef, @kids ) = @_; # Invocant unused
316              
317             # If the first child is a word, and either it is an only child or
318             # the next child is the fat comma operator, we accept it as a
319             # subscript.
320 17 100       61 if ( $kids[0]->isa( 'PPI::Token::Word' ) ) {
321 14 100       69 @kids == 1 and return 1;
322 4 50 33     21 $kids[1]->isa( 'PPI::Token::Operator' )
323             and $kids[1]->content() eq '=>'
324             and return 1;
325             }
326              
327             # If the first child is a symbol,
328 3 100 66     47 if ( @kids && $kids[0]->isa( 'PPI::Token::Symbol' ) ) {
329             # Accept it if it is the only child
330 1 50       6 @kids == 1
331             and return 1;
332             # Accept it if there are exactly two children and the second is
333             # a subscript.
334 1 50 33     13 @kids == 2
335             and $kids[1]->isa( 'PPI::Structure::Subscript' )
336             and return 1;
337             }
338              
339             # We reject anything else.
340 2         11 return;
341             }
342              
343             # Return true if we think a square-bracketed subscript is really a
344             # subscript, rather than a character class.
345             # Called as $class->$handler( ... ) above
346             sub _square { ## no critic (ProhibitUnusedPrivateSubroutines)
347 4     4   16 my ( undef, @kids ) = @_; # Invocant unused
348              
349             # We expect to have either a number or a symbol as the first
350             # element.
351 4 50       42 $kids[0]->isa( 'PPI::Token::Number' ) and return 1;
352 0 0       0 $kids[0]->isa( 'PPI::Token::Symbol' ) and return 1;
353              
354             # Anything else is rejected.
355 0         0 return;
356             }
357              
358             # Alternate classes for the sigils, depending on whether we are in a
359             # character class (index 1) or not (index 0).
360             my %sigil_alternate = (
361             '$' => [ 'PPIx::Regexp::Token::Assertion', TOKEN_LITERAL ],
362             '@' => [ TOKEN_LITERAL, TOKEN_LITERAL ],
363             );
364              
365             sub __PPIX_TOKENIZER__regexp {
366 1434     1434   3672 my ( $class, $tokenizer, $character ) = @_;
367              
368 1434 100       4630 exists $sigil_alternate{$character} or return;
369              
370 105 100       515 if ( my ( $accept, $want_class ) =
371             $class->_interpolation( $tokenizer, $character, 1 )
372             ) {
373 86         7209 return $tokenizer->make_token( $accept, $want_class );
374             }
375              
376 19 50       92 my $alternate = $sigil_alternate{$character} or return;
377 19 50       81 return $tokenizer->make_token(
378             1, $alternate->[$tokenizer->cookie( COOKIE_CLASS ) ? 1 : 0 ] );
379              
380             }
381              
382             sub __PPIX_TOKENIZER__repl {
383 18     18   104 my ( $class, $tokenizer, $character ) = @_;
384              
385 18 100       69 exists $sigil_alternate{$character} or return;
386              
387 5 50       32 if ( my ( $accept, $want_class ) =
388             $class->_interpolation( $tokenizer, $character, 0 ) ) {
389 5         373 return $tokenizer->make_token( $accept, $want_class );
390             }
391              
392 0           return $tokenizer->make_token( 1, TOKEN_LITERAL );
393              
394             }
395              
396             1;
397              
398             __END__