File Coverage

blib/lib/PPIx/Regexp/Token/Backreference.pm
Criterion Covered Total %
statement 74 80 92.5
branch 37 54 68.5
condition 2 2 100.0
subroutine 13 14 92.8
pod 3 3 100.0
total 129 153 84.3


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PPIx::Regexp::Token::Backreference - Represent a back reference
4              
5             =head1 SYNOPSIS
6              
7             use PPIx::Regexp::Dumper;
8             PPIx::Regexp::Dumper->new( 'qr{(foo|bar)baz\1}smx' )
9             ->print();
10              
11             =head1 INHERITANCE
12              
13             C is a
14             L.
15              
16             C has no descendants.
17              
18             =head1 DESCRIPTION
19              
20             This class represents back references of all sorts, both the traditional
21             numbered variety and the Perl 5.010 named kind.
22              
23             =head1 METHODS
24              
25             This class provides no public methods beyond those provided by its
26             superclass.
27              
28             =cut
29              
30             package PPIx::Regexp::Token::Backreference;
31              
32 9     9   71 use strict;
  9         21  
  9         286  
33 9     9   65 use warnings;
  9         19  
  9         293  
34              
35 9     9   45 use base qw{ PPIx::Regexp::Token::Reference };
  9         23  
  9         4636  
36              
37 9     9   65 use Carp qw{ confess };
  9         18  
  9         437  
38 9         834 use PPIx::Regexp::Constant qw{
39             MINIMUM_PERL
40             RE_CAPTURE_NAME
41             TOKEN_LITERAL
42             TOKEN_UNKNOWN
43             @CARP_NOT
44 9     9   53 };
  9         25  
45 9     9   60 use PPIx::Regexp::Util qw{ __to_ordinal_en width };
  9         21  
  9         10902  
46              
47             our $VERSION = '0.087';
48              
49             # Return true if the token can be quantified, and false otherwise
50             # sub can_be_quantified { return };
51              
52             sub explain {
53 3     3 1 22 my ( $self ) = @_;
54 3 100       12 $self->is_named()
55             and return sprintf q,
56             $self->name();
57 2 100       10 $self->is_relative()
58             and return sprintf
59             q,
60             __to_ordinal_en( - $self->number() ),
61             $self->absolute();
62 1         4 return sprintf q,
63             $self->absolute();
64             }
65              
66             {
67              
68             my %perl_version_introduced = (
69             g => '5.009005', # \g1 \g-1 \g{1} \g{-1}
70             k => '5.009005', # \k \k'name'
71             '?' => '5.009005', # (?P=name) (PCRE/Python)
72             );
73              
74             sub perl_version_introduced {
75 21     21 1 3150 my ( $self ) = @_;
76 21   100     86 return $perl_version_introduced{substr( $self->content(), 1, 1 )} ||
77             MINIMUM_PERL;
78             }
79              
80             }
81              
82             sub raw_width {
83 30     30 1 78 my ( $self ) = @_;
84 30 50       171 my $re = $self->top()
85             or return ( undef, undef ); # Shouldn't happen.
86 30         64 my @capture;
87 30 100       145 if ( $self->is_named() ) {
88 6         24 my $name = $self->name();
89 6 50       18 foreach my $elem ( @{ $re->find(
  6         26  
90             'PPIx::Regexp::Structure::NamedCapture' ) || [] } ) {
91 6 50       31 $elem->name() eq $name
92             or next;
93 6 50       28 $re->__token_post_order( $elem, $self ) < 0
94             or last;
95 6         17 push @capture, $elem;
96             }
97             } else {
98 24         94 my $number = $self->absolute();
99 24 50       46 foreach my $elem ( @{ $re->find(
  24         90  
100             'PPIx::Regexp::Structure::Capture' ) || [] } ) {
101 36 50       142 $elem->number() == $number
102             or next;
103 36 50       115 $re->__token_post_order( $elem, $self ) < 0
104             or last;
105 36         87 push @capture, $elem;
106             }
107             }
108 30 100       152 @capture == 1
109             and return $capture[0]->raw_width();
110 12         57 my ( $base_min, $base_max ) = $capture[0]->raw_width();
111 12         46 foreach my $elem ( @capture[ 1 .. $#capture ] ) {
112 12         35 my ( $ele_min, $ele_max ) = $elem->raw_width();
113 12 50       45 defined $ele_min
114             or $base_min = undef;
115 12 100       53 defined $base_min
    50          
116             and $base_min = $base_min == $ele_min ? $base_min : undef;
117 12 50       32 defined $ele_max
118             or $base_max = undef;
119 12 100       56 defined $base_max
    50          
120             and $base_max = $base_max == $ele_max ? $base_max : undef;
121             }
122 12         41 return ( $base_min, $base_max );
123             }
124              
125             my @external = ( # Recognition used externally
126             [ qr{ \A \( \? P = ( @{[ RE_CAPTURE_NAME ]} ) \) }smxo,
127             { is_named => 1 },
128             ],
129             );
130              
131             my @recognize_regexp = ( # recognition used internally
132             [
133             qr{ \A \\ (?: # numbered (including relative)
134             ( [0-9]+ ) |
135             g (?: ( -? [0-9]+ ) | \{ ( -? [0-9]+ ) \} )
136             )
137             }smx, { is_named => 0 }, ],
138             [
139             qr{ \A \\ (?: # named
140             g [{] ( @{[ RE_CAPTURE_NAME ]} ) [}] |
141             k (?: \< ( @{[ RE_CAPTURE_NAME ]} ) \> | # named with angles
142             ' ( @{[ RE_CAPTURE_NAME ]} ) ' ) # or quotes
143             )
144             }smxo, { is_named => 1 }, ],
145             );
146              
147             my %recognize = (
148             regexp => \@recognize_regexp,
149             repl => [
150             [ qr{ \A \\ ( [0-9]+ ) }smx, { is_named => 0 } ],
151             ],
152             );
153              
154             # This must be implemented by tokens which do not recognize themselves.
155             # The return is a list of list references. Each list reference must
156             # contain a regular expression that recognizes the token, and optionally
157             # a reference to a hash to pass to make_token as the class-specific
158             # arguments. The regular expression MUST be anchored to the beginning of
159             # the string.
160             sub __PPIX_TOKEN__recognize {
161 18 100   18   252 return __PACKAGE__->isa( scalar caller ) ?
162             ( @external, @recognize_regexp ) :
163             ( @external );
164             }
165              
166             sub __PPIX_TOKENIZER__regexp {
167 110     110   357 my ( undef, $tokenizer, $character ) = @_;
168              
169             # PCRE/Python back references are handled in
170             # PPIx::Regexp::Token::Structure, because they are parenthesized.
171              
172             # All the other styles are escaped.
173 110 100       373 $character eq '\\'
174             or return;
175              
176 46         95 foreach ( @{ $recognize{$tokenizer->get_mode()} } ) {
  46         166  
177 58         119 my ( $re, $arg ) = @{ $_ };
  58         175  
178 58 100       164 my $accept = $tokenizer->find_regexp( $re ) or next;
179 39         111 my %arg = ( %{ $arg }, tokenizer => $tokenizer );
  39         212  
180 39         183 return $tokenizer->make_token( $accept, __PACKAGE__, \%arg );
181             }
182              
183 7         21 return;
184             }
185              
186             sub __PPIX_TOKENIZER__repl {
187 13     13   48 my ( undef, $tokenizer ) = @_; # Invocant, $character unused
188              
189 13 50       39 $tokenizer->interpolates()
190             or return;
191              
192 13         79 goto &__PPIX_TOKENIZER__regexp;
193             }
194              
195             # Called by the lexer to disambiguate between captures, literals, and
196             # whatever. We have to return the number of tokens reblessed to
197             # TOKEN_UNKNOWN (i.e. either 0 or 1) because we get called after the
198             # parse is finalized.
199             sub __PPIX_LEXER__rebless {
200 25     25   110 my ( $self, %arg ) = @_;
201              
202             # Handle named back references
203 25 100       117 if ( $self->is_named() ) {
204 8 50       47 $arg{capture_name}{$self->name()}
205             and return 0;
206 0         0 return $self->__error();
207             }
208              
209             # Get the absolute capture group number.
210 17         88 my $absolute = $self->absolute();
211              
212             # If it is zero or negative, we have a relateive reference to a
213             # non-existent capture group.
214 17 50       66 $absolute <= 0
215             and return $self->__error();
216              
217             # If the absolute number is less than or equal to the maximum
218             # capture group number, we are good.
219             $absolute <= $arg{max_capture}
220 17 100       93 and return 0;
221              
222             # It's not a valid capture. If it's an octal literal, rebless it so.
223             # Note that we can't rebless single-digit numbers, since they can't
224             # be octal literals.
225 1         8 my $content = $self->content();
226 1 50       11 if ( $content =~ m/ \A \\ [0-7]{2,} \z /smx ) {
227 1         14 TOKEN_LITERAL->__PPIX_ELEM__rebless( $self );
228 1         6 return 0;
229             }
230              
231             # Anything else is an error.
232 0           return $self->__error();
233             }
234              
235             sub __error {
236 0     0     my ( $self, $msg ) = @_;
237 0 0         defined $msg
238             or $msg = 'No corresponding capture group';
239 0           TOKEN_UNKNOWN->__PPIX_ELEM__rebless( $self, error => $msg );
240 0           return 1;
241             }
242              
243             1;
244              
245             __END__