File Coverage

blib/lib/PPIx/QuoteLike/Utils.pm
Criterion Covered Total %
statement 134 165 81.2
branch 57 104 54.8
condition 6 40 15.0
subroutine 27 27 100.0
pod 7 7 100.0
total 231 343 67.3


line stmt bran cond sub pod time code
1             package PPIx::QuoteLike::Utils;
2              
3 8     8   2035 use 5.006;
  8         30  
4              
5 8     8   32 use strict;
  8         13  
  8         158  
6 8     8   33 use warnings;
  8         13  
  8         247  
7              
8 8     8   40 use base qw{ Exporter };
  8         11  
  8         785  
9              
10 8     8   47 use Carp;
  8         18  
  8         469  
11 8         816 use PPIx::QuoteLike::Constant qw{
12             HAVE_PPIX_REGEXP
13             LOCATION_LINE
14             LOCATION_CHARACTER
15             LOCATION_COLUMN
16             LOCATION_LOGICAL_LINE
17             LOCATION_LOGICAL_FILE
18             VARIABLE_RE
19             @CARP_NOT
20 8     8   378 };
  8         19  
21 8     8   3750 use Readonly;
  8         27200  
  8         361  
22 8     8   48 use Scalar::Util ();
  8         15  
  8         136  
23              
24 8     8   32 use constant LEFT_CURLY => q<{>;
  8         14  
  8         387  
25 8     8   38 use constant RIGHT_CURLY => q<}>;
  8         14  
  8         11115  
26              
27             our @EXPORT_OK = qw{
28             column_number
29             is_ppi_quotelike_element
30             line_number
31             logical_filename
32             logical_line_number
33             statement
34             visual_column_number
35             __instance
36             __match_enclosed
37             __matching_delimiter
38             __normalize_interpolation_for_ppi
39             __variables
40             };
41              
42             our $VERSION = '0.023';
43              
44             # Readonly::Scalar my $BRACED_RE => __match_enclosed( LEFT_CURLY );
45             Readonly::Scalar my $BRACKETED_RE => __match_enclosed( '[' ); # ]
46             Readonly::Scalar my $PARENTHESIZED_RE => __match_enclosed( '(' ); # )
47              
48             Readonly::Scalar my $SIGIL_AND_CAST_RE => qr/ \$ \# \$* | [\@\$] \$* /smx;
49             # The following is an interpretation of perldata Identifier Parsing for
50             # Perls before 5.10.
51             Readonly::Scalar my $NORMAL_SYMBOL_NAME_RE => qr/
52             (?:
53             (?: :: )* '?
54             \w+ (?: (?: (?: :: )+ '? | (?: :: )* ' ) \w+ )*
55             (?: :: )* |
56             [[:punct:]]
57             )
58             /smx;
59              
60             Readonly::Scalar my $SYMBOL_NAME_RE => qr/
61             \^ \w+ | $NORMAL_SYMBOL_NAME_RE
62             /smxo;
63              
64             sub column_number {
65 2     2 1 5 my ( $self ) = @_;
66 2   50     6 return ( $self->location() || [] )->[LOCATION_CHARACTER];
67             }
68              
69             {
70              
71             my @relevant_ppi_classes = qw{
72             PPI::Token::Quote
73             PPI::Token::QuoteLike::Backtick
74             PPI::Token::QuoteLike::Command
75             PPI::Token::QuoteLike::Readline
76             PPI::Token::HereDoc
77             };
78              
79             sub is_ppi_quotelike_element {
80 9     9 1 2191 my ( $elem ) = @_;
81              
82 9 100       24 ref $elem
83             or return;
84              
85 8 100       24 Scalar::Util::blessed( $elem )
86             or return;
87              
88 7         14 foreach my $class ( @relevant_ppi_classes ) {
89 25 100       127 $elem->isa( $class )
90             and return 1;
91             }
92              
93 2         9 return;
94             }
95              
96             my $cast_allowed_for_bare_bracketed_variable = {
97             map { $_ => 1 } qw{ @ $ % } };
98              
99             sub __variables {
100 66     66   13573 my ( $ppi ) = @_;
101              
102             # In case we need to manufacture any.
103 66         813 require PPIx::QuoteLike;
104              
105 66 50       214 Scalar::Util::blessed( $ppi )
106             or croak 'Argument must be an object';
107              
108             # TODO the following two lines are a crock, but there does not
109             # seem to be a good alternative. Bad alternatives:
110             # * Introduce PPIx::QuoteLike::Element. But it seems stupid to
111             # introduce a class simply to mark these as members of the
112             # PPIx::QuoteLike family.
113             # If I go this way at all, PPIx::QuoteLike::Element should be
114             # analogous to PPIx::Regexp::Element in that it carries at
115             # least the navigational and Perl version methods.
116             # * Use DOES(). But that was not introduced until 5.10. So I
117             # could:
118             # - Depend on UNIVERSAL::DOES. This kindly steps aside if
119             # UNIVERSAL::DOES() exists, but it seems stupid to introduce
120             # a dependency that is only needed under really old Perls.
121             # - Same as above, only make the dependence conditional on the
122             # version of Perl. This may actually be the best
123             # alternative, but it's still pretty crufty.
124 66 100       258 $ppi->isa( 'PPIx::QuoteLike' )
125             and return $ppi->variables();
126 65 50       194 $ppi->isa( 'PPIx::QuoteLike::Token' )
127             and return $ppi->variables();
128              
129 65         84 my %var;
130              
131             $ppi->isa( 'PPIx::Regexp::Element' )
132 65 50       232 and do {
133 0 0       0 foreach my $code ( @{ $ppi->find(
  0         0  
134             'PPIx::Regexp::Token::Code' ) || [] } ) {
135 0         0 foreach my $name ( __variables( $code->ppi() ) ) {
136 0         0 $var{ $name } = 1;
137             }
138             }
139 0         0 return keys %var;
140             };
141              
142 65 50       147 $ppi->isa( 'PPI::Element' )
143             or croak 'Argument must be a PPI::Element, ',
144             'PPIx::Regexp::Element, PPIx::QuoteLike, or ',
145             'PPIx::QuoteLike::Token';
146              
147 65         112 foreach my $sym ( _find( $ppi, 'PPI::Token::Symbol' ) ) {
148             # Eliminate rogue subscripts
149 57 50       12388 _is_bareword_subscript( $sym )
150             and next;
151 57 50       405 if ( defined( my $name = _name_from_misparsed_magic( $sym ) )
152             ) {
153             # We're $${name}, which is a dereference of $name
154 0         0 $var{$name} = 1;
155             } else {
156             # PPI got it right.
157 57         223 $var{ $sym->symbol() } = 1;
158             }
159             }
160              
161             # For some reason, PPI parses '$#foo' as a
162             # PPI::Token::ArrayIndex. $#$foo is parsed as a Cast followed
163             # by a Symbol, so as long as nobody decides the '$#' cast causes
164             # $elem->symbol() to return something other than '$foo', we're
165             # cool.
166 65         4353 foreach my $elem ( _find( $ppi, 'PPI::Token::ArrayIndex' ) ) {
167 0         0 my $name = $elem->content();
168 0 0       0 $name =~ s/ \A \$ [#] /@/smx or next;
169 0         0 $var{$name} = 1;
170             }
171              
172             # Occasionally you see something like ${foo} outside quotes.
173             # This is legitimate, though PPI parses it as a cast followed by
174             # a block. On the assumption that there are fewer blocks than
175             # words in most Perl, we start at the top and work down. Perl
176             # also handles punctuation variables specified this way, but
177             # since PPI goes berserk when it sees this, we won't bother.
178 65         12760 foreach my $elem ( _find( $ppi, 'PPI::Structure::Block' ) ) {
179              
180 7 50       2461 my $previous = $elem->sprevious_sibling()
181             or next;
182 7 50       190 $previous->isa( 'PPI::Token::Cast' )
183             or next;
184 7         19 my $sigil = $previous->content();
185 7 50       38 $cast_allowed_for_bare_bracketed_variable->{ $sigil }
186             or next;
187              
188 7 50       27 if ( my @kids = $elem->schildren() ) {
189             # The simple case: we parsed a block whose contents,
190             # however they were parsed, are the contents of the
191             # token.
192 7 50       80 1 == @kids
193             or next;
194 7 50       26 $kids[0]->isa( 'PPI::Statement' )
195             or next;
196              
197 7 100       22 ( my $name = join '', map { $_->content() }
  17         73  
198 2         380 $kids[0]->children() ) =~ m/ \A @{[ VARIABLE_RE ]} \z /smxo
199             or next;
200              
201 2         22 $var{ "$sigil$name" } = 1;
202             } else {
203             # The downright ugly case. We have something like ${]}
204             # where PPI can't find the terminator. To solve this we
205             # need to go blundering through the parse until we find
206             # the closing terminator.
207 0 0       0 my $stmt = $elem->statement()
208             or next;
209 0 0       0 if ( my $finish = $elem->finish() ) {
210             # If we appear to have a terminated block, we may # {{
211             # have ${}}, which is the same as $}
212 0 0       0 my $next = $stmt->next_sibling()
213             or next;
214 0 0 0     0 $next->isa( 'PPI::Statement::UnmatchedBrace' )
215             and RIGHT_CURLY eq $next->content()
216             or next;
217 0         0 $var{ $sigil . $finish->content() } = 1;
218             } else {
219             # Otherwise we have something like # [
220             # ${]}
221 0 0       0 my $next = $stmt->next_sibling()
222             or next;
223 0 0       0 my $finish = $next->next_sibling()
224             or next;
225 0 0 0     0 $finish->isa( 'PPI::Statement::UnmatchedBrace' )
226             and RIGHT_CURLY eq $finish->content()
227             or next;
228 0         0 $var{ $sigil . $next->content() } = 1;
229             }
230             }
231             }
232              
233             # Yes, we might have nested string literals, like
234             # "... @{[ qq<$foo> ]} ..."
235 65         9789 foreach my $class ( @relevant_ppi_classes ) {
236 325         47319 foreach my $elem ( _find( $ppi, $class ) ) {
237              
238 8 50       920 my $ql = PPIx::QuoteLike->new( $elem )
239             or next;
240 8 100       22 $ql->interpolates()
241             or next;
242 6         15 foreach my $sym ( $ql->variables() ) {
243 7         53 $var{ $sym } = 1;
244             }
245             }
246             }
247              
248             # By the same token we might have a regexp
249             # TODO for consistency's sake, give PPIx::Regexp a variables()
250             # method.
251 65 50       11797 if ( HAVE_PPIX_REGEXP ) {
252 0         0 foreach my $class ( qw{
253             PPI::Token::QuoteLike::Regexp
254             PPI::Token::Regexp::Match
255             PPI::Token::Regexp::Substitute
256             } ) {
257 0         0 foreach my $elem ( _find( $ppi, $class ) ) {
258 0 0       0 my $re = PPIx::Regexp->new( $elem )
259             or next;
260 0 0       0 foreach my $code ( @{ $re->find(
  0         0  
261             'PPIx::Regexp::Token::Code' ) || [] } ) {
262 0         0 foreach my $name ( __variables( $code->ppi() ) ) {
263 0         0 $var{ $name } = 1;
264             }
265             }
266             }
267             }
268             }
269              
270 65         372 return ( keys %var );
271             }
272             }
273              
274             # We want __variables to work when passed a single token. So we go
275             # through this to do what we wish PPI did -- return an array for a
276             # PPI::Node, or return either the element itself or nothing otherwise.
277             sub _find {
278 520     520   891 my ( $elem, $class ) = @_;
279             $elem->isa( 'PPI::Node' )
280 520 100       1163 and return @{ $elem->find( $class ) || [] };
  496 100       968  
281 24 100       73 $elem->isa( $class )
282             and return $elem;
283 22         30 return;
284             }
285              
286             sub __instance {
287 191     191   298 my ( $object, $class ) = @_;
288 191 100       573 Scalar::Util::blessed( $object ) or return;
289 144         503 return $object->isa( $class );
290             }
291              
292             # The problem this solves is that PPI can parse '{_}' as containing a
293             # PPI::Token::Magic (which is a PPI::Token::Symbol), not a
294             # PPI::Token::Word. This code also returns true for '${_}', which is not
295             # a subscript but has the same basic problem. The latter gets caught
296             # later.
297             sub _is_bareword_subscript {
298 57     57   122 my ( $elem ) = @_;
299 57 50       128 $elem->content() =~ m/ \A \w+ \z /smx
300             or return;
301 0         0 my $parent;
302 0 0 0     0 $parent = $elem->parent()
      0        
303             and $parent->isa( 'PPI::Statement' )
304             and 1 == $parent->children()
305             or return;
306 0 0 0     0 $parent = $parent->parent()
      0        
      0        
307             and ( $parent->isa( 'PPI::Structure::Subscript' )
308             or $parent->isa( 'PPI::Structure::Block' ) )
309             and 1 == $parent->children()
310             or return;
311 0         0 my $start;
312 0 0 0     0 $start = $parent->start()
      0        
313             and $start->isa( 'PPI::Token::Structure' )
314             and q<{> eq $start->content()
315             or return;
316 0         0 return 1;
317             }
318              
319             sub line_number {
320 2     2 1 5 my ( $self ) = @_;
321 2   50     6 return ( $self->location() || [] )->[LOCATION_LINE];
322             }
323              
324             sub logical_filename {
325 2     2 1 5 my ( $self ) = @_;
326 2   50     6 return ( $self->location() || [] )->[LOCATION_LOGICAL_FILE];
327             }
328              
329             sub logical_line_number {
330 2     2 1 5 my ( $self ) = @_;
331 2   50     5 return ( $self->location() || [] )->[LOCATION_LOGICAL_LINE];
332             }
333              
334             {
335             our %REGEXP_CACHE;
336              
337             my %matching_bracket;
338              
339             BEGIN {
340 8     8   1380 %matching_bracket = qw/ ( ) [ ] { } < > /;
341             }
342              
343             sub __match_enclosed {
344 54     54   120 my ( $left ) = @_;
345 54         114 my $ql = quotemeta $left;
346              
347             $REGEXP_CACHE{$ql}
348 54 100       161 and return $REGEXP_CACHE{$ql};
349              
350 41 100       1375 if ( my $right = $matching_bracket{$left} ) {
351              
352             # Based on Regexp::Common $RE{balanced} 2.113 (because I
353             # can't use (?-1)
354              
355 20         32 my $ql = quotemeta $left;
356 20         29 my $qr = quotemeta $right;
357 20         29 my $pkg = __PACKAGE__;
358 20         52 my $r = "(??{ \$${pkg}::REGEXP_CACHE{'$ql'} })";
359              
360 20         63 my @parts = (
361             "(?>[^\\\\$ql$qr]+)",
362             "(?>\\\$[$ql$qr])",
363             '(?>\\\\.)',
364             $r,
365             );
366              
367             {
368 8     8   65 use re qw{ eval };
  8         18  
  8         7099  
  20         26  
369 20         33 local $" = '|';
370 20         1911 $REGEXP_CACHE{$ql} = qr/($ql(?:@parts)*$qr)/sm;
371             }
372              
373 20         145 return $REGEXP_CACHE{$ql};
374              
375             } else {
376              
377             # Based on Regexp::Common $RE{delimited}{-delim=>'`'}
378 21   33     514 return ( $REGEXP_CACHE{$ql} ||=
379             qr< (
380             (?: \Q$left\E )
381             (?: [^\\\Q$left\E]* (?: \\ . [^\\\Q$left\E]* )* )
382             (?: \Q$left\E )
383             ) >smx
384             );
385             }
386             }
387              
388             sub __matching_delimiter {
389 55     55   92 my ( $left ) = @_;
390 55 100       193 my $right = $matching_bracket{$left}
391             or return $left;
392 8         25 return $right;
393             }
394             }
395              
396             sub __normalize_interpolation_for_ppi {
397 44     44   4274 ( local $_ ) = @_;
398              
399             # "@{[ foo() ]}" => 'foo()'
400 44 100       390 if ( m/ \A \@ [{] \s* ( $BRACKETED_RE ) \s* [}] \z /smxo ) {
401 3         10 $_ = $1;
402 3         13 s/ \A [[] \s* //smx;
403 3         17 s/ \s* []] \z //smx;
404 3         11 return "$_";
405             }
406              
407             # "${\( foo() )}" => 'foo()'
408 41 100       260 if ( m/ \A \$ [{] \s* \\ \s* ( $PARENTHESIZED_RE ) \s* [}] \z /smox ) {
409 1         3 $_ = $1;
410 1         3 s/ \A [(] \s* //smx;
411 1         5 s/ \s* [)] \z //smx;
412 1         3 return "$_";
413             }
414              
415             # "${foo}" => '$foo'
416             m/ \A ( $SIGIL_AND_CAST_RE ) \s*
417             [{] \s* ( $NORMAL_SYMBOL_NAME_RE ) \s* [}] \z /smxo
418 40 100       373 and return "$1$2";
419              
420             # "${foo{bar}}" => '$foo{bar}'
421             # NOTE that this is a warning, and so not done.
422             # if ( m/ \A ( $SIGIL_AND_CAST_RE ) (?= [{] ) ( $BRACED_RE ) /smx ) {
423             # ( my $sigil, local $_ ) = ( $1, $2 );
424             # s/ \A [{] \s* //smx;
425             # s/ \s* [}] \z //smx;
426             # return "$sigil$_";
427             # }
428              
429             # "$ foo->{bar}" => '$foo->{bar}'
430 32 100       227 if ( m/ \A ( $SIGIL_AND_CAST_RE ) \s+ ( $SYMBOL_NAME_RE ) ( .* ) /smxo ) {
431 1         6 return "$1$2$3";
432             }
433              
434             # Everything else
435 31         88 return "$_";
436             }
437              
438             sub statement {
439 3     3 1 57 my ( $self ) = @_;
440 3 50       16 my $top = $self->top()
441             or return;
442 3 50       12 $top->can( 'source' )
443             or return;
444 3 50       6 my $source = $top->source()
445             or return;
446 3 50       9 $source->can( 'statement' )
447             or return;
448 3         7 return $source->statement();
449             }
450              
451             sub visual_column_number {
452 2     2 1 3 my ( $self ) = @_;
453 2   50     6 return ( $self->location() || [] )->[LOCATION_COLUMN];
454             }
455              
456             # This handles two known cases where PPI misparses bracketed variable
457             # names.
458             # * $${foo} is parsed as '$$' when it is really a dereference of $foo.
459             # The argument is the '$$'
460             # * ${$} is parsed as an unterminated block followed by '$}'. The
461             # argument is the '$}'.
462              
463             {
464             my $special = {
465             '$$' => sub { # $${foo},$${$_[0]}
466             my ( $elem ) = @_;
467             my $next;
468             $next = $elem->snext_sibling()
469             and $next->isa( 'PPI::Structure::Subscript' )
470             or return;
471             my $start;
472             $start = $next->start()
473             and LEFT_CURLY eq $start->content()
474             or return;
475             my @kids = $next->schildren();
476             1 == @kids
477             and $kids[0]->isa( 'PPI::Statement' )
478             and @kids = $kids[0]->schildren();
479             if ( 1 == @kids ) {
480             # The $${foo} case
481             return join '', '$', map { $_->content() } @kids;
482             } else {
483             # The $${$_[0]} case. In this case the curly brackets
484             # are really a block, as
485             # $ perl -MO=Deparse -e '$${$_[0]}' makes clear. So we
486             # just return the '$$', since the '$_' will turn up in
487             # the course of things.
488             return $elem->content();
489             }
490             },
491             # {
492             '$}' => sub { # ${$}
493             my ( $elem ) = @_;
494             my $stmt;
495             $stmt = $elem->parent()
496             and $stmt->isa( 'PPI::Statement' )
497             or return;
498             my $block;
499             $block = $stmt->parent()
500             and $block->isa( 'PPI::Structure::Block' )
501             and not $block->finish()
502             or return;
503             my $sigil;
504             $sigil = $block->sprevious_sibling()
505             and $sigil->isa( 'PPI::Token::Cast' )
506             or return;
507             my $name = join '', map { $_->content() } $sigil,
508             $stmt->children();
509             chop $name;
510             return $name;
511             },
512             };
513              
514             sub _name_from_misparsed_magic {
515 57     57   79 my ( $elem ) = @_;
516 57 100       215 $elem->isa( 'PPI::Token::Magic' )
517             or return;
518 8 100       18 my $code = $special->{ $elem->content() }
519             or return;
520 2         13 return $code->( $elem );
521             }
522             }
523              
524             1;
525              
526             __END__