File Coverage

blib/lib/PPIx/QuoteLike/Utils.pm
Criterion Covered Total %
statement 134 165 81.2
branch 57 104 54.8
condition 6 40 15.0
subroutine 27 27 100.0
pod 7 7 100.0
total 231 343 67.3


line stmt bran cond sub pod time code
1             package PPIx::QuoteLike::Utils;
2              
3 8     8   1813 use 5.006;
  8         30  
4              
5 8     8   34 use strict;
  8         13  
  8         180  
6 8     8   33 use warnings;
  8         15  
  8         222  
7              
8 8     8   44 use base qw{ Exporter };
  8         14  
  8         778  
9              
10 8     8   42 use Carp;
  8         16  
  8         534  
11 8         833 use PPIx::QuoteLike::Constant qw{
12             HAVE_PPIX_REGEXP
13             LOCATION_LINE
14             LOCATION_CHARACTER
15             LOCATION_COLUMN
16             LOCATION_LOGICAL_LINE
17             LOCATION_LOGICAL_FILE
18             VARIABLE_RE
19             @CARP_NOT
20 8     8   386 };
  8         16  
21 8     8   3576 use Readonly;
  8         26352  
  8         415  
22 8     8   51 use Scalar::Util ();
  8         15  
  8         135  
23              
24 8     8   30 use constant LEFT_CURLY => q<{>;
  8         15  
  8         391  
25 8     8   40 use constant RIGHT_CURLY => q<}>;
  8         13  
  8         10702  
26              
27             our @EXPORT_OK = qw{
28             column_number
29             is_ppi_quotelike_element
30             line_number
31             logical_filename
32             logical_line_number
33             statement
34             visual_column_number
35             __instance
36             __match_enclosed
37             __matching_delimiter
38             __normalize_interpolation_for_ppi
39             __variables
40             };
41              
42             our $VERSION = '0.022';
43              
44             # Readonly::Scalar my $BRACED_RE => __match_enclosed( LEFT_CURLY );
45             Readonly::Scalar my $BRACKETED_RE => __match_enclosed( '[' ); # ]
46             Readonly::Scalar my $PARENTHESIZED_RE => __match_enclosed( '(' ); # )
47              
48             Readonly::Scalar my $SIGIL_AND_CAST_RE => qr/ \$ \# \$* | [\@\$] \$* /smx;
49             # The following is an interpretation of perldata Identifier Parsing for
50             # Perls before 5.10.
51             Readonly::Scalar my $SYMBOL_NAME_RE => qr/
52             \^? (?:
53             (?: :: )* '?
54             \w+ (?: (?: (?: :: )+ '? | (?: :: )* ' ) \w+ )*
55             (?: :: )* |
56             [[:punct:]]
57             )
58             /smx;
59              
60             sub column_number {
61 2     2 1 5 my ( $self ) = @_;
62 2   50     5 return ( $self->location() || [] )->[LOCATION_CHARACTER];
63             }
64              
65             {
66              
67             my @relevant_ppi_classes = qw{
68             PPI::Token::Quote
69             PPI::Token::QuoteLike::Backtick
70             PPI::Token::QuoteLike::Command
71             PPI::Token::QuoteLike::Readline
72             PPI::Token::HereDoc
73             };
74              
75             sub is_ppi_quotelike_element {
76 9     9 1 2139 my ( $elem ) = @_;
77              
78 9 100       25 ref $elem
79             or return;
80              
81 8 100       22 Scalar::Util::blessed( $elem )
82             or return;
83              
84 7         13 foreach my $class ( @relevant_ppi_classes ) {
85 25 100       121 $elem->isa( $class )
86             and return 1;
87             }
88              
89 2         17 return;
90             }
91              
92             my $cast_allowed_for_bare_bracketed_variable = {
93             map { $_ => 1 } qw{ @ $ % } };
94              
95             sub __variables {
96 62     62   11671 my ( $ppi ) = @_;
97              
98             # In case we need to manufacture any.
99 62         754 require PPIx::QuoteLike;
100              
101 62 50       202 Scalar::Util::blessed( $ppi )
102             or croak 'Argument must be an object';
103              
104             # TODO the following two lines are a crock, but there does not
105             # seem to be a good alternative. Bad alternatives:
106             # * Introduce PPIx::QuoteLike::Element. But it seems stupid to
107             # introduce a class simply to mark these as members of the
108             # PPIx::QuoteLike family.
109             # If I go this way at all, PPIx::QuoteLike::Element should be
110             # analogous to PPIx::Regexp::Element in that it carries at
111             # least the navigational and Perl version methods.
112             # * Use DOES(). But that was not introduced until 5.10. So I
113             # could:
114             # - Depend on UNIVERSAL::DOES. This kindly steps aside if
115             # UNIVERSAL::DOES() exists, but it seems stupid to introduce
116             # a dependency that is only needed under really old Perls.
117             # - Same as above, only make the dependence conditional on the
118             # version of Perl. This may actually be the best
119             # alternative, but it's still pretty crufty.
120 62 100       249 $ppi->isa( 'PPIx::QuoteLike' )
121             and return $ppi->variables();
122 61 50       173 $ppi->isa( 'PPIx::QuoteLike::Token' )
123             and return $ppi->variables();
124              
125 61         75 my %var;
126              
127             $ppi->isa( 'PPIx::Regexp::Element' )
128 61 50       195 and do {
129 0 0       0 foreach my $code ( @{ $ppi->find(
  0         0  
130             'PPIx::Regexp::Token::Code' ) || [] } ) {
131 0         0 foreach my $name ( __variables( $code->ppi() ) ) {
132 0         0 $var{ $name } = 1;
133             }
134             }
135 0         0 return keys %var;
136             };
137              
138 61 50       149 $ppi->isa( 'PPI::Element' )
139             or croak 'Argument must be a PPI::Element, ',
140             'PPIx::Regexp::Element, PPIx::QuoteLike, or ',
141             'PPIx::QuoteLike::Token';
142              
143 61         115 foreach my $sym ( _find( $ppi, 'PPI::Token::Symbol' ) ) {
144             # Eliminate rogue subscripts
145 56 50       11646 _is_bareword_subscript( $sym )
146             and next;
147 56 50       377 if ( defined( my $name = _name_from_misparsed_magic( $sym ) )
148             ) {
149             # We're $${name}, which is a dereference of $name
150 0         0 $var{$name} = 1;
151             } else {
152             # PPI got it right.
153 56         203 $var{ $sym->symbol() } = 1;
154             }
155             }
156              
157             # For some reason, PPI parses '$#foo' as a
158             # PPI::Token::ArrayIndex. $#$foo is parsed as a Cast followed
159             # by a Symbol, so as long as nobody decides the '$#' cast causes
160             # $elem->symbol() to return something other than '$foo', we're
161             # cool.
162 61         3551 foreach my $elem ( _find( $ppi, 'PPI::Token::ArrayIndex' ) ) {
163 0         0 my $name = $elem->content();
164 0 0       0 $name =~ s/ \A \$ [#] /@/smx or next;
165 0         0 $var{$name} = 1;
166             }
167              
168             # Occasionally you see something like ${foo} outside quotes.
169             # This is legitimate, though PPI parses it as a cast followed by
170             # a block. On the assumption that there are fewer blocks than
171             # words in most Perl, we start at the top and work down. Perl
172             # also handles punctuation variables specified this way, but
173             # since PPI goes berserk when it sees this, we won't bother.
174 61         11612 foreach my $elem ( _find( $ppi, 'PPI::Structure::Block' ) ) {
175              
176 6 50       2014 my $previous = $elem->sprevious_sibling()
177             or next;
178 6 50       142 $previous->isa( 'PPI::Token::Cast' )
179             or next;
180 6         13 my $sigil = $previous->content();
181 6 50       31 $cast_allowed_for_bare_bracketed_variable->{ $sigil }
182             or next;
183              
184 6 50       18 if ( my @kids = $elem->schildren() ) {
185             # The simple case: we parsed a block whose contents,
186             # however they were parsed, are the contents of the
187             # token.
188 6 50       73 1 == @kids
189             or next;
190 6 50       16 $kids[0]->isa( 'PPI::Statement' )
191             or next;
192              
193 6 100       17 ( my $name = join '', map { $_->content() }
  14         55  
194 1         493 $kids[0]->children() ) =~ m/ \A @{[ VARIABLE_RE ]} \z /smxo
195             or next;
196              
197 2         24 $var{ "$sigil$name" } = 1;
198             } else {
199             # The downright ugly case. We have something like ${]}
200             # where PPI can't find the terminator. To solve this we
201             # need to go blundering through the parse until we find
202             # the closing terminator.
203 0 0       0 my $stmt = $elem->statement()
204             or next;
205 0 0       0 if ( my $finish = $elem->finish() ) {
206             # If we appear to have a terminated block, we may # {{
207             # have ${}}, which is the same as $}
208 0 0       0 my $next = $stmt->next_sibling()
209             or next;
210 0 0 0     0 $next->isa( 'PPI::Statement::UnmatchedBrace' )
211             and RIGHT_CURLY eq $next->content()
212             or next;
213 0         0 $var{ $sigil . $finish->content() } = 1;
214             } else {
215             # Otherwise we have something like # [
216             # ${]}
217 0 0       0 my $next = $stmt->next_sibling()
218             or next;
219 0 0       0 my $finish = $next->next_sibling()
220             or next;
221 0 0 0     0 $finish->isa( 'PPI::Statement::UnmatchedBrace' )
222             and RIGHT_CURLY eq $finish->content()
223             or next;
224 0         0 $var{ $sigil . $next->content() } = 1;
225             }
226             }
227             }
228              
229             # Yes, we might have nested string literals, like
230             # "... @{[ qq<$foo> ]} ..."
231 61         9097 foreach my $class ( @relevant_ppi_classes ) {
232 305         43178 foreach my $elem ( _find( $ppi, $class ) ) {
233              
234 6 50       588 my $ql = PPIx::QuoteLike->new( $elem )
235             or next;
236 6 100       16 $ql->interpolates()
237             or next;
238 4         16 foreach my $sym ( $ql->variables() ) {
239 6         45 $var{ $sym } = 1;
240             }
241             }
242             }
243              
244             # By the same token we might have a regexp
245             # TODO for consistency's sake, give PPIx::Regexp a variables()
246             # method.
247 61 50       10740 if ( HAVE_PPIX_REGEXP ) {
248 0         0 foreach my $class ( qw{
249             PPI::Token::QuoteLike::Regexp
250             PPI::Token::Regexp::Match
251             PPI::Token::Regexp::Substitute
252             } ) {
253 0         0 foreach my $elem ( _find( $ppi, $class ) ) {
254 0 0       0 my $re = PPIx::Regexp->new( $elem )
255             or next;
256 0 0       0 foreach my $code ( @{ $re->find(
  0         0  
257             'PPIx::Regexp::Token::Code' ) || [] } ) {
258 0         0 foreach my $name ( __variables( $code->ppi() ) ) {
259 0         0 $var{ $name } = 1;
260             }
261             }
262             }
263             }
264             }
265              
266 61         325 return ( keys %var );
267             }
268             }
269              
270             # We want __variables to work when passed a single token. So we go
271             # through this to do what we wish PPI did -- return an array for a
272             # PPI::Node, or return either the element itself or nothing otherwise.
273             sub _find {
274 488     488   813 my ( $elem, $class ) = @_;
275             $elem->isa( 'PPI::Node' )
276 488 100       1144 and return @{ $elem->find( $class ) || [] };
  464 100       868  
277 24 100       55 $elem->isa( $class )
278             and return $elem;
279 22         35 return;
280             }
281              
282             sub __instance {
283 187     187   303 my ( $object, $class ) = @_;
284 187 100       534 Scalar::Util::blessed( $object ) or return;
285 140         449 return $object->isa( $class );
286             }
287              
288             # The problem this solves is that PPI can parse '{_}' as containing a
289             # PPI::Token::Magic (which is a PPI::Token::Symbol), not a
290             # PPI::Token::Word. This code also returns true for '${_}', which is not
291             # a subscript but has the same basic problem. The latter gets caught
292             # later.
293             sub _is_bareword_subscript {
294 56     56   92 my ( $elem ) = @_;
295 56 50       126 $elem->content() =~ m/ \A \w+ \z /smx
296             or return;
297 0         0 my $parent;
298 0 0 0     0 $parent = $elem->parent()
      0        
299             and $parent->isa( 'PPI::Statement' )
300             and 1 == $parent->children()
301             or return;
302 0 0 0     0 $parent = $parent->parent()
      0        
      0        
303             and ( $parent->isa( 'PPI::Structure::Subscript' )
304             or $parent->isa( 'PPI::Structure::Block' ) )
305             and 1 == $parent->children()
306             or return;
307 0         0 my $start;
308 0 0 0     0 $start = $parent->start()
      0        
309             and $start->isa( 'PPI::Token::Structure' )
310             and q<{> eq $start->content()
311             or return;
312 0         0 return 1;
313             }
314              
315             sub line_number {
316 2     2 1 5 my ( $self ) = @_;
317 2   50     13 return ( $self->location() || [] )->[LOCATION_LINE];
318             }
319              
320             sub logical_filename {
321 2     2 1 7 my ( $self ) = @_;
322 2   50     5 return ( $self->location() || [] )->[LOCATION_LOGICAL_FILE];
323             }
324              
325             sub logical_line_number {
326 2     2 1 5 my ( $self ) = @_;
327 2   50     5 return ( $self->location() || [] )->[LOCATION_LOGICAL_LINE];
328             }
329              
330             {
331             our %REGEXP_CACHE;
332              
333             my %matching_bracket;
334              
335             BEGIN {
336 8     8   1001 %matching_bracket = qw/ ( ) [ ] { } < > /;
337             }
338              
339             sub __match_enclosed {
340 52     52   114 my ( $left ) = @_;
341 52         105 my $ql = quotemeta $left;
342              
343             $REGEXP_CACHE{$ql}
344 52 100       156 and return $REGEXP_CACHE{$ql};
345              
346 40 100       1319 if ( my $right = $matching_bracket{$left} ) {
347              
348             # Based on Regexp::Common $RE{balanced} 2.113 (because I
349             # can't use (?-1)
350              
351 19         34 my $ql = quotemeta $left;
352 19         28 my $qr = quotemeta $right;
353 19         30 my $pkg = __PACKAGE__;
354 19         49 my $r = "(??{ \$${pkg}::REGEXP_CACHE{'$ql'} })";
355              
356 19         59 my @parts = (
357             "(?>[^\\\\$ql$qr]+)",
358             "(?>\\\$[$ql$qr])",
359             '(?>\\\\.)',
360             $r,
361             );
362              
363             {
364 8     8   55 use re qw{ eval };
  8         14  
  8         6871  
  19         25  
365 19         30 local $" = '|';
366 19         1870 $REGEXP_CACHE{$ql} = qr/($ql(?:@parts)*$qr)/sm;
367             }
368              
369 19         112 return $REGEXP_CACHE{$ql};
370              
371             } else {
372              
373             # Based on Regexp::Common $RE{delimited}{-delim=>'`'}
374 21   33     540 return ( $REGEXP_CACHE{$ql} ||=
375             qr< (
376             (?: \Q$left\E )
377             (?: [^\\\Q$left\E]* (?: \\ . [^\\\Q$left\E]* )* )
378             (?: \Q$left\E )
379             ) >smx
380             );
381             }
382             }
383              
384             sub __matching_delimiter {
385 53     53   92 my ( $left ) = @_;
386 53 100       186 my $right = $matching_bracket{$left}
387             or return $left;
388 8         18 return $right;
389             }
390             }
391              
392             sub __normalize_interpolation_for_ppi {
393 40     40   3820 ( local $_ ) = @_;
394              
395             # "@{[ foo() ]}" => 'foo()'
396 40 100       1451 if ( m/ \A \@ [{] \s* ( $BRACKETED_RE ) \s* [}] \z /smx ) {
397 3         7 $_ = $1;
398 3         13 s/ \A [[] \s* //smx;
399 3         14 s/ \s* []] \z //smx;
400 3         12 return "$_";
401             }
402              
403             # "${\( foo() )}" => 'foo()'
404 37 100       984 if ( m/ \A \$ [{] \s* \\ \s* ( $PARENTHESIZED_RE ) \s* [}] \z /smx ) {
405 1         3 $_ = $1;
406 1         5 s/ \A [(] \s* //smx;
407 1         5 s/ \s* [)] \z //smx;
408 1         4 return "$_";
409             }
410              
411             # "${foo}" => '$foo'
412             m/ \A ( $SIGIL_AND_CAST_RE ) \s* [{] \s* ( $SYMBOL_NAME_RE ) \s* [}] \z /smx
413 36 100       433 and return "$1$2";
414              
415             # "${foo{bar}}" => '$foo{bar}'
416             # NOTE that this is a warning, and so not done.
417             # if ( m/ \A ( $SIGIL_AND_CAST_RE ) (?= [{] ) ( $BRACED_RE ) /smx ) {
418             # ( my $sigil, local $_ ) = ( $1, $2 );
419             # s/ \A [{] \s* //smx;
420             # s/ \s* [}] \z //smx;
421             # return "$sigil$_";
422             # }
423              
424             # "$ foo->{bar}" => '$foo->{bar}'
425 28 100       264 if ( m/ \A ( $SIGIL_AND_CAST_RE ) \s+ ( $SYMBOL_NAME_RE ) ( .* ) /smx ) {
426 1         7 return "$1$2$3";
427             }
428              
429             # Everything else
430 27         111 return "$_";
431             }
432              
433             sub statement {
434 3     3 1 57 my ( $self ) = @_;
435 3 50       9 my $top = $self->top()
436             or return;
437 3 50       18 $top->can( 'source' )
438             or return;
439 3 50       7 my $source = $top->source()
440             or return;
441 3 50       12 $source->can( 'statement' )
442             or return;
443 3         9 return $source->statement();
444             }
445              
446             sub visual_column_number {
447 2     2 1 4 my ( $self ) = @_;
448 2   50     6 return ( $self->location() || [] )->[LOCATION_COLUMN];
449             }
450              
451             # This handles two known cases where PPI misparses bracketed variable
452             # names.
453             # * $${foo} is parsed as '$$' when it is really a dereference of $foo.
454             # The argument is the '$$'
455             # * ${$} is parsed as an unterminated block followed by '$}'. The
456             # argument is the '$}'.
457              
458             {
459             my $special = {
460             '$$' => sub { # $${foo},$${$_[0]}
461             my ( $elem ) = @_;
462             my $next;
463             $next = $elem->snext_sibling()
464             and $next->isa( 'PPI::Structure::Subscript' )
465             or return;
466             my $start;
467             $start = $next->start()
468             and LEFT_CURLY eq $start->content()
469             or return;
470             my @kids = $next->schildren();
471             1 == @kids
472             and $kids[0]->isa( 'PPI::Statement' )
473             and @kids = $kids[0]->schildren();
474             if ( 1 == @kids ) {
475             # The $${foo} case
476             return join '', '$', map { $_->content() } @kids;
477             } else {
478             # The $${$_[0]} case. In this case the curly brackets
479             # are really a block, as
480             # $ perl -MO=Deparse -e '$${$_[0]}' makes clear. So we
481             # just return the '$$', since the '$_' will turn up in
482             # the course of things.
483             return $elem->content();
484             }
485             },
486             # {
487             '$}' => sub { # ${$}
488             my ( $elem ) = @_;
489             my $stmt;
490             $stmt = $elem->parent()
491             and $stmt->isa( 'PPI::Statement' )
492             or return;
493             my $block;
494             $block = $stmt->parent()
495             and $block->isa( 'PPI::Structure::Block' )
496             and not $block->finish()
497             or return;
498             my $sigil;
499             $sigil = $block->sprevious_sibling()
500             and $sigil->isa( 'PPI::Token::Cast' )
501             or return;
502             my $name = join '', map { $_->content() } $sigil,
503             $stmt->children();
504             chop $name;
505             return $name;
506             },
507             };
508              
509             sub _name_from_misparsed_magic {
510 56     56   71 my ( $elem ) = @_;
511 56 100       218 $elem->isa( 'PPI::Token::Magic' )
512             or return;
513 7 100       16 my $code = $special->{ $elem->content() }
514             or return;
515 2         11 return $code->( $elem );
516             }
517             }
518              
519             1;
520              
521             __END__