File Coverage

blib/lib/PPIx/QuoteLike/Utils.pm
Criterion Covered Total %
statement 134 165 81.2
branch 57 104 54.8
condition 6 40 15.0
subroutine 27 27 100.0
pod 7 7 100.0
total 231 343 67.3


line stmt bran cond sub pod time code
1             package PPIx::QuoteLike::Utils;
2              
3 8     8   1909 use 5.006;
  8         26  
4              
5 8     8   32 use strict;
  8         14  
  8         149  
6 8     8   30 use warnings;
  8         15  
  8         234  
7              
8 8     8   36 use base qw{ Exporter };
  8         13  
  8         766  
9              
10 8     8   44 use Carp;
  8         19  
  8         427  
11 8         851 use PPIx::QuoteLike::Constant qw{
12             HAVE_PPIX_REGEXP
13             LOCATION_LINE
14             LOCATION_CHARACTER
15             LOCATION_COLUMN
16             LOCATION_LOGICAL_LINE
17             LOCATION_LOGICAL_FILE
18             VARIABLE_RE
19             @CARP_NOT
20 8     8   372 };
  8         25  
21 8     8   3640 use Readonly;
  8         26285  
  8         363  
22 8     8   48 use Scalar::Util ();
  8         19  
  8         140  
23              
24 8     8   31 use constant LEFT_CURLY => q<{>;
  8         14  
  8         395  
25 8     8   36 use constant RIGHT_CURLY => q<}>;
  8         14  
  8         10776  
26              
27             our @EXPORT_OK = qw{
28             column_number
29             is_ppi_quotelike_element
30             line_number
31             logical_filename
32             logical_line_number
33             statement
34             visual_column_number
35             __instance
36             __match_enclosed
37             __matching_delimiter
38             __normalize_interpolation_for_ppi
39             __variables
40             };
41              
42             our $VERSION = '0.022_01';
43              
44             # Readonly::Scalar my $BRACED_RE => __match_enclosed( LEFT_CURLY );
45             Readonly::Scalar my $BRACKETED_RE => __match_enclosed( '[' ); # ]
46             Readonly::Scalar my $PARENTHESIZED_RE => __match_enclosed( '(' ); # )
47              
48             Readonly::Scalar my $SIGIL_AND_CAST_RE => qr/ \$ \# \$* | [\@\$] \$* /smx;
49             # The following is an interpretation of perldata Identifier Parsing for
50             # Perls before 5.10.
51             Readonly::Scalar my $NORMAL_SYMBOL_NAME_RE => qr/
52             (?:
53             (?: :: )* '?
54             \w+ (?: (?: (?: :: )+ '? | (?: :: )* ' ) \w+ )*
55             (?: :: )* |
56             [[:punct:]]
57             )
58             /smx;
59              
60             Readonly::Scalar my $SYMBOL_NAME_RE => qr/
61             \^ \w+ | $NORMAL_SYMBOL_NAME_RE
62             /smxo;
63              
64             sub column_number {
65 2     2 1 3 my ( $self ) = @_;
66 2   50     6 return ( $self->location() || [] )->[LOCATION_CHARACTER];
67             }
68              
69             {
70              
71             my @relevant_ppi_classes = qw{
72             PPI::Token::Quote
73             PPI::Token::QuoteLike::Backtick
74             PPI::Token::QuoteLike::Command
75             PPI::Token::QuoteLike::Readline
76             PPI::Token::HereDoc
77             };
78              
79             sub is_ppi_quotelike_element {
80 9     9 1 2105 my ( $elem ) = @_;
81              
82 9 100       22 ref $elem
83             or return;
84              
85 8 100       23 Scalar::Util::blessed( $elem )
86             or return;
87              
88 7         15 foreach my $class ( @relevant_ppi_classes ) {
89 25 100       119 $elem->isa( $class )
90             and return 1;
91             }
92              
93 2         8 return;
94             }
95              
96             my $cast_allowed_for_bare_bracketed_variable = {
97             map { $_ => 1 } qw{ @ $ % } };
98              
99             sub __variables {
100 66     66   15508 my ( $ppi ) = @_;
101              
102             # In case we need to manufacture any.
103 66         947 require PPIx::QuoteLike;
104              
105 66 50       282 Scalar::Util::blessed( $ppi )
106             or croak 'Argument must be an object';
107              
108             # TODO the following two lines are a crock, but there does not
109             # seem to be a good alternative. Bad alternatives:
110             # * Introduce PPIx::QuoteLike::Element. But it seems stupid to
111             # introduce a class simply to mark these as members of the
112             # PPIx::QuoteLike family.
113             # If I go this way at all, PPIx::QuoteLike::Element should be
114             # analogous to PPIx::Regexp::Element in that it carries at
115             # least the navigational and Perl version methods.
116             # * Use DOES(). But that was not introduced until 5.10. So I
117             # could:
118             # - Depend on UNIVERSAL::DOES. This kindly steps aside if
119             # UNIVERSAL::DOES() exists, but it seems stupid to introduce
120             # a dependency that is only needed under really old Perls.
121             # - Same as above, only make the dependence conditional on the
122             # version of Perl. This may actually be the best
123             # alternative, but it's still pretty crufty.
124 66 100       385 $ppi->isa( 'PPIx::QuoteLike' )
125             and return $ppi->variables();
126 65 50       284 $ppi->isa( 'PPIx::QuoteLike::Token' )
127             and return $ppi->variables();
128              
129 65         100 my %var;
130              
131             $ppi->isa( 'PPIx::Regexp::Element' )
132 65 50       368 and do {
133 0 0       0 foreach my $code ( @{ $ppi->find(
  0         0  
134             'PPIx::Regexp::Token::Code' ) || [] } ) {
135 0         0 foreach my $name ( __variables( $code->ppi() ) ) {
136 0         0 $var{ $name } = 1;
137             }
138             }
139 0         0 return keys %var;
140             };
141              
142 65 50       201 $ppi->isa( 'PPI::Element' )
143             or croak 'Argument must be a PPI::Element, ',
144             'PPIx::Regexp::Element, PPIx::QuoteLike, or ',
145             'PPIx::QuoteLike::Token';
146              
147 65         177 foreach my $sym ( _find( $ppi, 'PPI::Token::Symbol' ) ) {
148             # Eliminate rogue subscripts
149 57 50       13925 _is_bareword_subscript( $sym )
150             and next;
151 57 50       478 if ( defined( my $name = _name_from_misparsed_magic( $sym ) )
152             ) {
153             # We're $${name}, which is a dereference of $name
154 0         0 $var{$name} = 1;
155             } else {
156             # PPI got it right.
157 57         349 $var{ $sym->symbol() } = 1;
158             }
159             }
160              
161             # For some reason, PPI parses '$#foo' as a
162             # PPI::Token::ArrayIndex. $#$foo is parsed as a Cast followed
163             # by a Symbol, so as long as nobody decides the '$#' cast causes
164             # $elem->symbol() to return something other than '$foo', we're
165             # cool.
166 65         5135 foreach my $elem ( _find( $ppi, 'PPI::Token::ArrayIndex' ) ) {
167 0         0 my $name = $elem->content();
168 0 0       0 $name =~ s/ \A \$ [#] /@/smx or next;
169 0         0 $var{$name} = 1;
170             }
171              
172             # Occasionally you see something like ${foo} outside quotes.
173             # This is legitimate, though PPI parses it as a cast followed by
174             # a block. On the assumption that there are fewer blocks than
175             # words in most Perl, we start at the top and work down. Perl
176             # also handles punctuation variables specified this way, but
177             # since PPI goes berserk when it sees this, we won't bother.
178 65         12807 foreach my $elem ( _find( $ppi, 'PPI::Structure::Block' ) ) {
179              
180 7 50       2508 my $previous = $elem->sprevious_sibling()
181             or next;
182 7 50       220 $previous->isa( 'PPI::Token::Cast' )
183             or next;
184 7         26 my $sigil = $previous->content();
185 7 50       45 $cast_allowed_for_bare_bracketed_variable->{ $sigil }
186             or next;
187              
188 7 50       34 if ( my @kids = $elem->schildren() ) {
189             # The simple case: we parsed a block whose contents,
190             # however they were parsed, are the contents of the
191             # token.
192 7 50       94 1 == @kids
193             or next;
194 7 50       30 $kids[0]->isa( 'PPI::Statement' )
195             or next;
196              
197 7 100       31 ( my $name = join '', map { $_->content() }
  17         84  
198 2         365 $kids[0]->children() ) =~ m/ \A @{[ VARIABLE_RE ]} \z /smxo
199             or next;
200              
201 2         63 $var{ "$sigil$name" } = 1;
202             } else {
203             # The downright ugly case. We have something like ${]}
204             # where PPI can't find the terminator. To solve this we
205             # need to go blundering through the parse until we find
206             # the closing terminator.
207 0 0       0 my $stmt = $elem->statement()
208             or next;
209 0 0       0 if ( my $finish = $elem->finish() ) {
210             # If we appear to have a terminated block, we may # {{
211             # have ${}}, which is the same as $}
212 0 0       0 my $next = $stmt->next_sibling()
213             or next;
214 0 0 0     0 $next->isa( 'PPI::Statement::UnmatchedBrace' )
215             and RIGHT_CURLY eq $next->content()
216             or next;
217 0         0 $var{ $sigil . $finish->content() } = 1;
218             } else {
219             # Otherwise we have something like # [
220             # ${]}
221 0 0       0 my $next = $stmt->next_sibling()
222             or next;
223 0 0       0 my $finish = $next->next_sibling()
224             or next;
225 0 0 0     0 $finish->isa( 'PPI::Statement::UnmatchedBrace' )
226             and RIGHT_CURLY eq $finish->content()
227             or next;
228 0         0 $var{ $sigil . $next->content() } = 1;
229             }
230             }
231             }
232              
233             # Yes, we might have nested string literals, like
234             # "... @{[ qq<$foo> ]} ..."
235 65         10019 foreach my $class ( @relevant_ppi_classes ) {
236 325         47604 foreach my $elem ( _find( $ppi, $class ) ) {
237              
238 8 50       912 my $ql = PPIx::QuoteLike->new( $elem )
239             or next;
240 8 100       21 $ql->interpolates()
241             or next;
242 6         17 foreach my $sym ( $ql->variables() ) {
243 7         59 $var{ $sym } = 1;
244             }
245             }
246             }
247              
248             # By the same token we might have a regexp
249             # TODO for consistency's sake, give PPIx::Regexp a variables()
250             # method.
251 65 50       11962 if ( HAVE_PPIX_REGEXP ) {
252 0         0 foreach my $class ( qw{
253             PPI::Token::QuoteLike::Regexp
254             PPI::Token::Regexp::Match
255             PPI::Token::Regexp::Substitute
256             } ) {
257 0         0 foreach my $elem ( _find( $ppi, $class ) ) {
258 0 0       0 my $re = PPIx::Regexp->new( $elem )
259             or next;
260 0 0       0 foreach my $code ( @{ $re->find(
  0         0  
261             'PPIx::Regexp::Token::Code' ) || [] } ) {
262 0         0 foreach my $name ( __variables( $code->ppi() ) ) {
263 0         0 $var{ $name } = 1;
264             }
265             }
266             }
267             }
268             }
269              
270 65         423 return ( keys %var );
271             }
272             }
273              
274             # We want __variables to work when passed a single token. So we go
275             # through this to do what we wish PPI did -- return an array for a
276             # PPI::Node, or return either the element itself or nothing otherwise.
277             sub _find {
278 520     520   947 my ( $elem, $class ) = @_;
279             $elem->isa( 'PPI::Node' )
280 520 100       1324 and return @{ $elem->find( $class ) || [] };
  496 100       1077  
281 24 100       56 $elem->isa( $class )
282             and return $elem;
283 22         34 return;
284             }
285              
286             sub __instance {
287 191     191   355 my ( $object, $class ) = @_;
288 191 100       578 Scalar::Util::blessed( $object ) or return;
289 144         470 return $object->isa( $class );
290             }
291              
292             # The problem this solves is that PPI can parse '{_}' as containing a
293             # PPI::Token::Magic (which is a PPI::Token::Symbol), not a
294             # PPI::Token::Word. This code also returns true for '${_}', which is not
295             # a subscript but has the same basic problem. The latter gets caught
296             # later.
297             sub _is_bareword_subscript {
298 57     57   123 my ( $elem ) = @_;
299 57 50       189 $elem->content() =~ m/ \A \w+ \z /smx
300             or return;
301 0         0 my $parent;
302 0 0 0     0 $parent = $elem->parent()
      0        
303             and $parent->isa( 'PPI::Statement' )
304             and 1 == $parent->children()
305             or return;
306 0 0 0     0 $parent = $parent->parent()
      0        
      0        
307             and ( $parent->isa( 'PPI::Structure::Subscript' )
308             or $parent->isa( 'PPI::Structure::Block' ) )
309             and 1 == $parent->children()
310             or return;
311 0         0 my $start;
312 0 0 0     0 $start = $parent->start()
      0        
313             and $start->isa( 'PPI::Token::Structure' )
314             and q<{> eq $start->content()
315             or return;
316 0         0 return 1;
317             }
318              
319             sub line_number {
320 2     2 1 4 my ( $self ) = @_;
321 2   50     6 return ( $self->location() || [] )->[LOCATION_LINE];
322             }
323              
324             sub logical_filename {
325 2     2 1 4 my ( $self ) = @_;
326 2   50     5 return ( $self->location() || [] )->[LOCATION_LOGICAL_FILE];
327             }
328              
329             sub logical_line_number {
330 2     2 1 5 my ( $self ) = @_;
331 2   50     5 return ( $self->location() || [] )->[LOCATION_LOGICAL_LINE];
332             }
333              
334             {
335             our %REGEXP_CACHE;
336              
337             my %matching_bracket;
338              
339             BEGIN {
340 8     8   995 %matching_bracket = qw/ ( ) [ ] { } < > /;
341             }
342              
343             sub __match_enclosed {
344 54     54   119 my ( $left ) = @_;
345 54         108 my $ql = quotemeta $left;
346              
347             $REGEXP_CACHE{$ql}
348 54 100       167 and return $REGEXP_CACHE{$ql};
349              
350 41 100       1504 if ( my $right = $matching_bracket{$left} ) {
351              
352             # Based on Regexp::Common $RE{balanced} 2.113 (because I
353             # can't use (?-1)
354              
355 20         38 my $ql = quotemeta $left;
356 20         27 my $qr = quotemeta $right;
357 20         30 my $pkg = __PACKAGE__;
358 20         53 my $r = "(??{ \$${pkg}::REGEXP_CACHE{'$ql'} })";
359              
360 20         62 my @parts = (
361             "(?>[^\\\\$ql$qr]+)",
362             "(?>\\\$[$ql$qr])",
363             '(?>\\\\.)',
364             $r,
365             );
366              
367             {
368 8     8   70 use re qw{ eval };
  8         13  
  8         7114  
  20         63  
369 20         34 local $" = '|';
370 20         1932 $REGEXP_CACHE{$ql} = qr/($ql(?:@parts)*$qr)/sm;
371             }
372              
373 20         116 return $REGEXP_CACHE{$ql};
374              
375             } else {
376              
377             # Based on Regexp::Common $RE{delimited}{-delim=>'`'}
378 21   33     496 return ( $REGEXP_CACHE{$ql} ||=
379             qr< (
380             (?: \Q$left\E )
381             (?: [^\\\Q$left\E]* (?: \\ . [^\\\Q$left\E]* )* )
382             (?: \Q$left\E )
383             ) >smx
384             );
385             }
386             }
387              
388             sub __matching_delimiter {
389 55     55   108 my ( $left ) = @_;
390 55 100       214 my $right = $matching_bracket{$left}
391             or return $left;
392 8         28 return $right;
393             }
394             }
395              
396             sub __normalize_interpolation_for_ppi {
397 44     44   4373 ( local $_ ) = @_;
398              
399             # "@{[ foo() ]}" => 'foo()'
400 44 100       412 if ( m/ \A \@ [{] \s* ( $BRACKETED_RE ) \s* [}] \z /smxo ) {
401 3         12 $_ = $1;
402 3         14 s/ \A [[] \s* //smx;
403 3         14 s/ \s* []] \z //smx;
404 3         13 return "$_";
405             }
406              
407             # "${\( foo() )}" => 'foo()'
408 41 100       285 if ( m/ \A \$ [{] \s* \\ \s* ( $PARENTHESIZED_RE ) \s* [}] \z /smox ) {
409 1         3 $_ = $1;
410 1         4 s/ \A [(] \s* //smx;
411 1         4 s/ \s* [)] \z //smx;
412 1         4 return "$_";
413             }
414              
415             # "${foo}" => '$foo'
416             m/ \A ( $SIGIL_AND_CAST_RE ) \s*
417             [{] \s* ( $NORMAL_SYMBOL_NAME_RE ) \s* [}] \z /smxo
418 40 100       403 and return "$1$2";
419              
420             # "${foo{bar}}" => '$foo{bar}'
421             # NOTE that this is a warning, and so not done.
422             # if ( m/ \A ( $SIGIL_AND_CAST_RE ) (?= [{] ) ( $BRACED_RE ) /smx ) {
423             # ( my $sigil, local $_ ) = ( $1, $2 );
424             # s/ \A [{] \s* //smx;
425             # s/ \s* [}] \z //smx;
426             # return "$sigil$_";
427             # }
428              
429             # "$ foo->{bar}" => '$foo->{bar}'
430 32 100       255 if ( m/ \A ( $SIGIL_AND_CAST_RE ) \s+ ( $SYMBOL_NAME_RE ) ( .* ) /smxo ) {
431 1         6 return "$1$2$3";
432             }
433              
434             # Everything else
435 31         136 return "$_";
436             }
437              
438             sub statement {
439 3     3 1 54 my ( $self ) = @_;
440 3 50       9 my $top = $self->top()
441             or return;
442 3 50       11 $top->can( 'source' )
443             or return;
444 3 50       7 my $source = $top->source()
445             or return;
446 3 50       9 $source->can( 'statement' )
447             or return;
448 3         7 return $source->statement();
449             }
450              
451             sub visual_column_number {
452 2     2 1 5 my ( $self ) = @_;
453 2   50     5 return ( $self->location() || [] )->[LOCATION_COLUMN];
454             }
455              
456             # This handles two known cases where PPI misparses bracketed variable
457             # names.
458             # * $${foo} is parsed as '$$' when it is really a dereference of $foo.
459             # The argument is the '$$'
460             # * ${$} is parsed as an unterminated block followed by '$}'. The
461             # argument is the '$}'.
462              
463             {
464             my $special = {
465             '$$' => sub { # $${foo},$${$_[0]}
466             my ( $elem ) = @_;
467             my $next;
468             $next = $elem->snext_sibling()
469             and $next->isa( 'PPI::Structure::Subscript' )
470             or return;
471             my $start;
472             $start = $next->start()
473             and LEFT_CURLY eq $start->content()
474             or return;
475             my @kids = $next->schildren();
476             1 == @kids
477             and $kids[0]->isa( 'PPI::Statement' )
478             and @kids = $kids[0]->schildren();
479             if ( 1 == @kids ) {
480             # The $${foo} case
481             return join '', '$', map { $_->content() } @kids;
482             } else {
483             # The $${$_[0]} case. In this case the curly brackets
484             # are really a block, as
485             # $ perl -MO=Deparse -e '$${$_[0]}' makes clear. So we
486             # just return the '$$', since the '$_' will turn up in
487             # the course of things.
488             return $elem->content();
489             }
490             },
491             # {
492             '$}' => sub { # ${$}
493             my ( $elem ) = @_;
494             my $stmt;
495             $stmt = $elem->parent()
496             and $stmt->isa( 'PPI::Statement' )
497             or return;
498             my $block;
499             $block = $stmt->parent()
500             and $block->isa( 'PPI::Structure::Block' )
501             and not $block->finish()
502             or return;
503             my $sigil;
504             $sigil = $block->sprevious_sibling()
505             and $sigil->isa( 'PPI::Token::Cast' )
506             or return;
507             my $name = join '', map { $_->content() } $sigil,
508             $stmt->children();
509             chop $name;
510             return $name;
511             },
512             };
513              
514             sub _name_from_misparsed_magic {
515 57     57   111 my ( $elem ) = @_;
516 57 100       289 $elem->isa( 'PPI::Token::Magic' )
517             or return;
518 8 100       58 my $code = $special->{ $elem->content() }
519             or return;
520 2         16 return $code->( $elem );
521             }
522             }
523              
524             1;
525              
526             __END__