File Coverage

blib/lib/PPIx/Regexp/Token/Assertion.pm
Criterion Covered Total %
statement 45 47 95.7
branch 20 24 83.3
condition 10 14 71.4
subroutine 12 13 92.3
pod 3 3 100.0
total 90 101 89.1


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PPIx::Regexp::Token::Assertion - Represent a simple assertion.
4              
5             =head1 SYNOPSIS
6              
7             use PPIx::Regexp::Dumper;
8             PPIx::Regexp::Dumper->new( 'qr{\bfoo\b}smx' )
9             ->print();
10              
11             =head1 INHERITANCE
12              
13             C is a
14             L.
15              
16             C has no descendants.
17              
18             =head1 DESCRIPTION
19              
20             This class represents one of the simple assertions; that is, those that
21             are not defined via parentheses. This includes the zero-width assertions
22             C<^>, C<$>, C<\b>, C<\B>, C<\A>, C<\Z>, C<\z> and C<\G>, as well as:
23              
24             =over
25              
26             =item * The C<\z> assertion added in Perl 5.005,
27              
28             =item * The C<\K> assertion added in Perl 5.009005,
29              
30             =item * The C<\b{gcb}> assertion (and friends) added in Perl 5.021009.
31             Similar braced constructions (like C<\b{foo}>) are unknown tokens.
32              
33             =back
34              
35             =head1 METHODS
36              
37             This class provides no public methods beyond those provided by its
38             superclass.
39              
40             =cut
41              
42             package PPIx::Regexp::Token::Assertion;
43              
44 9     9   64 use strict;
  9         20  
  9         274  
45 9     9   50 use warnings;
  9         36  
  9         262  
46              
47 9     9   48 use base qw{ PPIx::Regexp::Token };
  9         21  
  9         805  
48              
49 9         1031 use PPIx::Regexp::Constant qw{
50             COOKIE_CLASS
51             COOKIE_LOOKAROUND_ASSERTION
52             LITERAL_LEFT_CURLY_ALLOWED
53             MINIMUM_PERL
54             TOKEN_LITERAL
55             TOKEN_UNKNOWN
56             @CARP_NOT
57 9     9   65 };
  9         19  
58              
59 9     9   61 use constant KEEP_EXPLANATION => 'In s///, keep everything before the \\K';
  9         17  
  9         8587  
60              
61             our $VERSION = '0.087_01';
62              
63             # Return true if the token can be quantified, and false otherwise
64             # sub can_be_quantified { return };
65              
66             my @braced_assertions = (
67             [ qr< \\ [bB] [{] (?: g | gcb | wb | sb ) [}] >smx, '5.021009' ],
68             [ qr< \\ [bB] [{] (?: lb ) [}] >smx, '5.023007' ],
69             [ qr< \\ [bB] [{] .*? [}] >smx, undef, TOKEN_UNKNOWN,
70             { error => 'Unknown bound type' },
71             ],
72             );
73              
74             =head2 is_matcher
75              
76             This method returns a true value because an assertion actually matches
77             something.
78              
79             =cut
80              
81 0     0 1 0 sub is_matcher { return 1; }
82              
83             sub perl_version_introduced {
84 24     24 1 6320 my ( $self ) = @_;
85             return ( $self->{perl_version_introduced} ||=
86 24   33     101 $self->_perl_version_introduced() );
87             }
88              
89             {
90              
91             my %perl_version_introduced = (
92             '\\K' => '5.009005',
93             '\\z' => '5.005',
94             );
95              
96             sub _perl_version_introduced {
97 24     24   40 my ( $self ) = @_;
98 24         74 my $content = $self->content();
99 24         54 foreach my $item ( @braced_assertions ) {
100 54 100       1185 $content =~ m/ \A $item->[0] \z /smx
101             and return $item->[1];
102             }
103 14   100     105 return $perl_version_introduced{ $content } || MINIMUM_PERL;
104             }
105              
106             }
107              
108             sub perl_version_removed {
109 24     24 1 11492 my ( $self ) = @_;
110             return ( $self->{perl_version_removed} ||=
111 24   66     100 $self->_perl_version_removed() );
112             }
113              
114             sub _perl_version_removed {
115 21     21   41 my ( $self ) = @_;
116 21 100       59 if ( '\\K' eq $self->content() ) {
117 3         10 my $parent = $self;
118 3         21 while ( $parent = $parent->parent() ) {
119 5 50       36 $parent->isa( 'PPIx::Regexp::Structure::Assertion' )
120             and return '5.031003';
121             }
122             }
123 21         75 return $self->SUPER::perl_version_removed();
124             }
125              
126             {
127             my %explanation = (
128             '$' => 'Assert position is at end of string or newline',
129             '\\A' => 'Assert position is at beginning of string',
130             '\\B' => 'Assert position is not at word/nonword boundary',
131             '\\B{gcb}' => 'Assert position is not at grapheme cluster boundary',
132             '\\B{g}' => 'Assert position is not at grapheme cluster boundary',
133             '\\B{lb}' => 'Assert position is not at line boundary',
134             '\\B{sb}' => 'Assert position is not at sentence boundary',
135             '\\B{wb}' => 'Assert position is not at word boundary',
136             '\\G' => 'Assert position is at pos()',
137             '\\K' => KEEP_EXPLANATION,
138             '\\Z' => 'Assert position is at end of string, or newline before end',
139             '\\b' => 'Assert position is at word/nonword boundary',
140             '\\b{gcb}' => 'Assert position is at grapheme cluster boundary',
141             '\\b{g}' => 'Assert position is at grapheme cluster boundary',
142             '\\b{lb}' => 'Assert position is at line boundary',
143             '\\b{sb}' => 'Assert position is at sentence boundary',
144             '\\b{wb}' => 'Assert position is at word boundary',
145             '\\z' => 'Assert position is at end of string',
146             '^' => 'Assert position is at beginning of string or after newline',
147             );
148              
149             sub __explanation {
150 20     20   75 return \%explanation;
151             }
152             }
153              
154             # An un-escaped literal left curly bracket can always follow this
155             # element.
156             sub __following_literal_left_curly_disallowed_in {
157 1     1   6 return LITERAL_LEFT_CURLY_ALLOWED;
158             }
159              
160             # By logic we should handle '$' here. But
161             # PPIx::Regexp::Token::Interpolation needs to process it to see if it is
162             # a sigil. If it is not, that module is expected to make it into an
163             # assertion. This is to try to keep the order in which the tokenizers
164             # are called non-critical, and try to keep all processing for a
165             # character in one place. Except for the back slash, which gets in
166             # everywhere.
167             #
168             ## my %assertion = map { $_ => 1 } qw{ ^ $ };
169             my %assertion = map { $_ => 1 } qw{ ^ };
170             my %escaped = map { $_ => 1 } qw{ b B A Z z G K };
171              
172             sub __PPIX_TOKENIZER__regexp {
173 153     153   430 my ( undef, $tokenizer, $character ) = @_;
174              
175             # Inside a character class, these are all literals.
176 153 100       2246 my $make = $tokenizer->cookie( COOKIE_CLASS ) ?
177             TOKEN_LITERAL :
178             __PACKAGE__;
179              
180             # '^' and '$'. Or at least '^'. See note above for '$'.
181 153 100       636 $assertion{$character}
182             and return $tokenizer->make_token( 1, $make );
183              
184 142 100       455 $character eq '\\' or return;
185              
186 82 50       232 defined ( my $next = $tokenizer->peek( 1 ) ) or return;
187              
188             # Handle assertions of the form \b{gcb} and friends, introduced in
189             # Perl 5.21.9. These are not recognized inside square bracketed
190             # character classes, where \b is not an assertion but a backspace
191             # character.
192 82 50       278 if ( __PACKAGE__ eq $make ) { # Only outside [...]
193 82         213 foreach my $item ( @braced_assertions ) {
194 228 100       6120 my $end = $tokenizer->find_regexp( qr/ \A $item->[0] /smx )
195             or next;
196 10 50       57 $item->[2]
197             or return $end;
198 0         0 return $tokenizer->make_token( $end, $item->[2], $item->[3] );
199             }
200             }
201              
202             # We special-case '\K' because it was retracted inside look-around
203             # assertions in 5.31.3.
204 72 100 66     2307 if ( 'K' eq $next && __PACKAGE__ eq $make &&
      100        
205             $tokenizer->__cookie_exists( COOKIE_LOOKAROUND_ASSERTION ) ) {
206 3         25 return $tokenizer->make_token( 2, $make, {
207             perl_version_removed => '5.031003',
208             explanation => KEEP_EXPLANATION .
209             '; retracted inside look-around assertion',
210             },
211             );
212             }
213              
214 69 100       332 $escaped{$next}
215             and return $tokenizer->make_token( 2, $make );
216              
217 37         109 return;
218             }
219              
220             1;
221              
222             __END__