| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package PPIx::Regexp::Tokenizer; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 9 |  |  | 9 |  | 60 | use strict; | 
|  | 9 |  |  |  |  | 17 |  | 
|  | 9 |  |  |  |  | 261 |  | 
| 4 | 9 |  |  | 9 |  | 54 | use warnings; | 
|  | 9 |  |  |  |  | 17 |  | 
|  | 9 |  |  |  |  | 235 |  | 
| 5 |  |  |  |  |  |  |  | 
| 6 | 9 |  |  | 9 |  | 47 | use base qw{ PPIx::Regexp::Support }; | 
|  | 9 |  |  |  |  | 19 |  | 
|  | 9 |  |  |  |  | 722 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 | 9 |  |  | 9 |  | 54 | use Carp qw{ carp croak confess }; | 
|  | 9 |  |  |  |  | 25 |  | 
|  | 9 |  |  |  |  | 600 |  | 
| 9 | 9 |  |  |  |  | 1263 | use PPIx::Regexp::Constant qw{ | 
| 10 |  |  |  |  |  |  | ARRAY_REF | 
| 11 |  |  |  |  |  |  | CODE_REF | 
| 12 |  |  |  |  |  |  | HASH_REF | 
| 13 |  |  |  |  |  |  | LOCATION_LINE | 
| 14 |  |  |  |  |  |  | LOCATION_CHARACTER | 
| 15 |  |  |  |  |  |  | LOCATION_COLUMN | 
| 16 |  |  |  |  |  |  | LOCATION_LOGICAL_LINE | 
| 17 |  |  |  |  |  |  | MINIMUM_PERL | 
| 18 |  |  |  |  |  |  | REGEXP_REF | 
| 19 |  |  |  |  |  |  | TOKEN_LITERAL | 
| 20 |  |  |  |  |  |  | TOKEN_UNKNOWN | 
| 21 |  |  |  |  |  |  | @CARP_NOT | 
| 22 | 9 |  |  | 9 |  | 56 | }; | 
|  | 9 |  |  |  |  | 38 |  | 
| 23 | 9 |  |  | 9 |  | 4042 | use PPIx::Regexp::Token::Assertion		(); | 
|  | 9 |  |  |  |  | 25 |  | 
|  | 9 |  |  |  |  | 208 |  | 
| 24 | 9 |  |  | 9 |  | 4151 | use PPIx::Regexp::Token::Backreference		(); | 
|  | 9 |  |  |  |  | 26 |  | 
|  | 9 |  |  |  |  | 250 |  | 
| 25 | 9 |  |  | 9 |  | 4524 | use PPIx::Regexp::Token::Backtrack		(); | 
|  | 9 |  |  |  |  | 23 |  | 
|  | 9 |  |  |  |  | 195 |  | 
| 26 | 9 |  |  | 9 |  | 4156 | use PPIx::Regexp::Token::CharClass::POSIX	(); | 
|  | 9 |  |  |  |  | 26 |  | 
|  | 9 |  |  |  |  | 195 |  | 
| 27 | 9 |  |  | 9 |  | 4083 | use PPIx::Regexp::Token::CharClass::POSIX::Unknown	(); | 
|  | 9 |  |  |  |  | 43 |  | 
|  | 9 |  |  |  |  | 183 |  | 
| 28 | 9 |  |  | 9 |  | 4230 | use PPIx::Regexp::Token::CharClass::Simple	(); | 
|  | 9 |  |  |  |  | 26 |  | 
|  | 9 |  |  |  |  | 207 |  | 
| 29 | 9 |  |  | 9 |  | 4097 | use PPIx::Regexp::Token::Code			(); | 
|  | 9 |  |  |  |  | 45 |  | 
|  | 9 |  |  |  |  | 263 |  | 
| 30 | 9 |  |  | 9 |  | 4288 | use PPIx::Regexp::Token::Comment		(); | 
|  | 9 |  |  |  |  | 22 |  | 
|  | 9 |  |  |  |  | 180 |  | 
| 31 | 9 |  |  | 9 |  | 3996 | use PPIx::Regexp::Token::Condition		(); | 
|  | 9 |  |  |  |  | 24 |  | 
|  | 9 |  |  |  |  | 270 |  | 
| 32 | 9 |  |  | 9 |  | 4219 | use PPIx::Regexp::Token::Control		(); | 
|  | 9 |  |  |  |  | 24 |  | 
|  | 9 |  |  |  |  | 289 |  | 
| 33 | 9 |  |  | 9 |  | 3960 | use PPIx::Regexp::Token::Delimiter		(); | 
|  | 9 |  |  |  |  | 41 |  | 
|  | 9 |  |  |  |  | 229 |  | 
| 34 | 9 |  |  | 9 |  | 3972 | use PPIx::Regexp::Token::Greediness		(); | 
|  | 9 |  |  |  |  | 29 |  | 
|  | 9 |  |  |  |  | 182 |  | 
| 35 | 9 |  |  | 9 |  | 3794 | use PPIx::Regexp::Token::GroupType::Assertion	(); | 
|  | 9 |  |  |  |  | 29 |  | 
|  | 9 |  |  |  |  | 242 |  | 
| 36 | 9 |  |  | 9 |  | 3858 | use PPIx::Regexp::Token::GroupType::Atomic_Script_Run	(); | 
|  | 9 |  |  |  |  | 28 |  | 
|  | 9 |  |  |  |  | 205 |  | 
| 37 | 9 |  |  | 9 |  | 3836 | use PPIx::Regexp::Token::GroupType::BranchReset	(); | 
|  | 9 |  |  |  |  | 28 |  | 
|  | 9 |  |  |  |  | 189 |  | 
| 38 | 9 |  |  | 9 |  | 3753 | use PPIx::Regexp::Token::GroupType::Code	(); | 
|  | 9 |  |  |  |  | 28 |  | 
|  | 9 |  |  |  |  | 192 |  | 
| 39 | 9 |  |  | 9 |  | 3721 | use PPIx::Regexp::Token::GroupType::Modifier	(); | 
|  | 9 |  |  |  |  | 26 |  | 
|  | 9 |  |  |  |  | 246 |  | 
| 40 | 9 |  |  | 9 |  | 3802 | use PPIx::Regexp::Token::GroupType::NamedCapture	(); | 
|  | 9 |  |  |  |  | 26 |  | 
|  | 9 |  |  |  |  | 184 |  | 
| 41 | 9 |  |  | 9 |  | 3677 | use PPIx::Regexp::Token::GroupType::Script_Run	(); | 
|  | 9 |  |  |  |  | 24 |  | 
|  | 9 |  |  |  |  | 202 |  | 
| 42 | 9 |  |  | 9 |  | 3912 | use PPIx::Regexp::Token::GroupType::Subexpression	(); | 
|  | 9 |  |  |  |  | 26 |  | 
|  | 9 |  |  |  |  | 197 |  | 
| 43 | 9 |  |  | 9 |  | 3971 | use PPIx::Regexp::Token::GroupType::Switch	(); | 
|  | 9 |  |  |  |  | 48 |  | 
|  | 9 |  |  |  |  | 215 |  | 
| 44 | 9 |  |  | 9 |  | 4387 | use PPIx::Regexp::Token::Interpolation		(); | 
|  | 9 |  |  |  |  | 42 |  | 
|  | 9 |  |  |  |  | 215 |  | 
| 45 | 9 |  |  | 9 |  | 4639 | use PPIx::Regexp::Token::Literal		(); | 
|  | 9 |  |  |  |  | 28 |  | 
|  | 9 |  |  |  |  | 238 |  | 
| 46 | 9 |  |  | 9 |  | 65 | use PPIx::Regexp::Token::Modifier		(); | 
|  | 9 |  |  |  |  | 20 |  | 
|  | 9 |  |  |  |  | 135 |  | 
| 47 | 9 |  |  | 9 |  | 4197 | use PPIx::Regexp::Token::Operator		(); | 
|  | 9 |  |  |  |  | 25 |  | 
|  | 9 |  |  |  |  | 185 |  | 
| 48 | 9 |  |  | 9 |  | 4114 | use PPIx::Regexp::Token::Quantifier		(); | 
|  | 9 |  |  |  |  | 35 |  | 
|  | 9 |  |  |  |  | 195 |  | 
| 49 | 9 |  |  | 9 |  | 61 | use PPIx::Regexp::Token::Recursion		(); | 
|  | 9 |  |  |  |  | 18 |  | 
|  | 9 |  |  |  |  | 127 |  | 
| 50 | 9 |  |  | 9 |  | 41 | use PPIx::Regexp::Token::Structure		(); | 
|  | 9 |  |  |  |  | 36 |  | 
|  | 9 |  |  |  |  | 113 |  | 
| 51 | 9 |  |  | 9 |  | 4053 | use PPIx::Regexp::Token::Unknown		(); | 
|  | 9 |  |  |  |  | 21 |  | 
|  | 9 |  |  |  |  | 179 |  | 
| 52 | 9 |  |  | 9 |  | 3867 | use PPIx::Regexp::Token::Whitespace		(); | 
|  | 9 |  |  |  |  | 30 |  | 
|  | 9 |  |  |  |  | 261 |  | 
| 53 | 9 |  |  |  |  | 491 | use PPIx::Regexp::Util qw{ | 
| 54 |  |  |  |  |  |  | is_ppi_regexp_element | 
| 55 |  |  |  |  |  |  | __instance | 
| 56 | 9 |  |  | 9 |  | 63 | }; | 
|  | 9 |  |  |  |  | 25 |  | 
| 57 |  |  |  |  |  |  |  | 
| 58 | 9 |  |  | 9 |  | 75 | use Scalar::Util qw{ looks_like_number }; | 
|  | 9 |  |  |  |  | 20 |  | 
|  | 9 |  |  |  |  | 57942 |  | 
| 59 |  |  |  |  |  |  |  | 
| 60 |  |  |  |  |  |  | our $VERSION = '0.087_01'; | 
| 61 |  |  |  |  |  |  |  | 
| 62 |  |  |  |  |  |  | our $DEFAULT_POSTDEREF; | 
| 63 |  |  |  |  |  |  | defined $DEFAULT_POSTDEREF | 
| 64 |  |  |  |  |  |  | or $DEFAULT_POSTDEREF = 1; | 
| 65 |  |  |  |  |  |  |  | 
| 66 |  |  |  |  |  |  | { | 
| 67 |  |  |  |  |  |  | # Names of classes containing tokenization machinery. There are few | 
| 68 |  |  |  |  |  |  | # known ordering requirements, since each class recognizes its own, | 
| 69 |  |  |  |  |  |  | # and I have tried to prevent overlap. Absent such constraints, the | 
| 70 |  |  |  |  |  |  | # order is in perceived frequency of acceptance, to keep the search | 
| 71 |  |  |  |  |  |  | # as short as possible. If I were conscientious I would gather | 
| 72 |  |  |  |  |  |  | # statistics on this. | 
| 73 |  |  |  |  |  |  | my @classes = (	# TODO make readonly when acceptable way appears | 
| 74 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Literal', | 
| 75 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Interpolation', | 
| 76 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Control',			# Note 1 | 
| 77 |  |  |  |  |  |  | 'PPIx::Regexp::Token::CharClass::Simple',	# Note 2 | 
| 78 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Quantifier', | 
| 79 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Greediness', | 
| 80 |  |  |  |  |  |  | 'PPIx::Regexp::Token::CharClass::POSIX',	# Note 3 | 
| 81 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Structure', | 
| 82 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Assertion', | 
| 83 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Backreference', | 
| 84 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Operator',		# Note 4 | 
| 85 |  |  |  |  |  |  | ); | 
| 86 |  |  |  |  |  |  |  | 
| 87 |  |  |  |  |  |  | # Note 1: If we are in quote mode ( \Q ... \E ), Control makes a | 
| 88 |  |  |  |  |  |  | #		literal out of anything it sees other than \E. So it | 
| 89 |  |  |  |  |  |  | #		needs to come before almost all other tokenizers. Not | 
| 90 |  |  |  |  |  |  | #		Literal, which already makes literals, and not | 
| 91 |  |  |  |  |  |  | #		Interpolation, which is legal in quote mode, but | 
| 92 |  |  |  |  |  |  | #		everything else. | 
| 93 |  |  |  |  |  |  |  | 
| 94 |  |  |  |  |  |  | # Note 2: CharClass::Simple must come after Literal, because it | 
| 95 |  |  |  |  |  |  | #		relies on Literal to recognize a Unicode named character | 
| 96 |  |  |  |  |  |  | #		( \N{something} ), so any \N that comes through to it | 
| 97 |  |  |  |  |  |  | #		must be the \N simple character class (which represents | 
| 98 |  |  |  |  |  |  | #		anything but a newline, and was introduced in Perl | 
| 99 |  |  |  |  |  |  | #		5.11.0. | 
| 100 |  |  |  |  |  |  |  | 
| 101 |  |  |  |  |  |  | # Note 3: CharClass::POSIX has to come before Structure, since both | 
| 102 |  |  |  |  |  |  | #		look for square brackets, and CharClass::POSIX is the | 
| 103 |  |  |  |  |  |  | #		more particular. | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | # Note 4: Operator relies on Literal making the characters literal | 
| 106 |  |  |  |  |  |  | #		if they appear in a context where they can not be | 
| 107 |  |  |  |  |  |  | #		operators, and Control making them literals if quoting, | 
| 108 |  |  |  |  |  |  | #		so it must come after both. | 
| 109 |  |  |  |  |  |  |  | 
| 110 |  |  |  |  |  |  | # Return the declared tokenizer classes. | 
| 111 |  |  |  |  |  |  | sub __tokenizer_classes { | 
| 112 | 538 |  |  | 538 |  | 2949 | return @classes; | 
| 113 |  |  |  |  |  |  | } | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | } | 
| 116 |  |  |  |  |  |  |  | 
| 117 |  |  |  |  |  |  | { | 
| 118 |  |  |  |  |  |  | my $errstr; | 
| 119 |  |  |  |  |  |  |  | 
| 120 |  |  |  |  |  |  | sub new { | 
| 121 | 739 |  |  | 739 | 1 | 92344 | my ( $class, $re, %args ) = @_; | 
| 122 | 739 | 50 |  |  |  | 2245 | ref $class and $class = ref $class; | 
| 123 |  |  |  |  |  |  |  | 
| 124 | 739 |  |  |  |  | 1426 | $errstr = undef; | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  | exists $args{default_modifiers} | 
| 127 |  |  |  |  |  |  | and ARRAY_REF ne ref $args{default_modifiers} | 
| 128 | 739 | 50 | 66 |  |  | 2926 | and do { | 
| 129 | 0 |  |  |  |  | 0 | $errstr = 'default_modifiers must be an array reference'; | 
| 130 | 0 |  |  |  |  | 0 | return; | 
| 131 |  |  |  |  |  |  | }; | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | my $self = { | 
| 134 |  |  |  |  |  |  | index_locations => $args{index_locations},	# Index locations | 
| 135 |  |  |  |  |  |  | capture => undef,	# Captures from find_regexp. | 
| 136 |  |  |  |  |  |  | content => undef,	# The string we are tokenizing. | 
| 137 |  |  |  |  |  |  | cookie => {},	# Cookies | 
| 138 |  |  |  |  |  |  | cursor_curr => 0,	# The current position in the string. | 
| 139 |  |  |  |  |  |  | cursor_limit => undef, # The end of the portion of the | 
| 140 |  |  |  |  |  |  | # string being tokenized. | 
| 141 |  |  |  |  |  |  | cursor_orig => undef, # Position of cursor when tokenizer | 
| 142 |  |  |  |  |  |  | # called. Used by get_token to prevent | 
| 143 |  |  |  |  |  |  | # recursion. | 
| 144 |  |  |  |  |  |  | cursor_modifiers => undef,	# Position of modifiers. | 
| 145 |  |  |  |  |  |  | default_modifiers => $args{default_modifiers} || [], | 
| 146 |  |  |  |  |  |  | delimiter_finish => undef,	# Finishing delimiter of regexp. | 
| 147 |  |  |  |  |  |  | delimiter_start => undef,	# Starting delimiter of regexp. | 
| 148 |  |  |  |  |  |  | encoding => $args{encoding}, # Character encoding. | 
| 149 |  |  |  |  |  |  | expect => undef,	# Extra classes to expect. | 
| 150 |  |  |  |  |  |  | expect_next => undef, # Extra classes as of next parse cycle | 
| 151 |  |  |  |  |  |  | failures => 0,	# Number of parse failures. | 
| 152 |  |  |  |  |  |  | find => undef,	# String for find_regexp | 
| 153 |  |  |  |  |  |  | known => {},	# Known tokenizers, by mode. | 
| 154 |  |  |  |  |  |  | location => $args{location}, | 
| 155 |  |  |  |  |  |  | match => undef,	# Match from find_regexp. | 
| 156 |  |  |  |  |  |  | mode => 'init',	# Initialize | 
| 157 |  |  |  |  |  |  | modifiers => [{}],	# Modifier hash. | 
| 158 |  |  |  |  |  |  | pending => [],	# Tokens made but not returned. | 
| 159 |  |  |  |  |  |  | prior => TOKEN_UNKNOWN,	# Prior significant token. | 
| 160 |  |  |  |  |  |  | source => $re,	# The object we were initialized with. | 
| 161 |  |  |  |  |  |  | strict => $args{strict},	# like "use re 'strict';". | 
| 162 |  |  |  |  |  |  | trace => __PACKAGE__->__defined_or( | 
| 163 | 739 |  | 100 |  |  | 9538 | $args{trace}, $ENV{PPIX_REGEXP_TOKENIZER_TRACE}, 0 ), | 
| 164 |  |  |  |  |  |  | }; | 
| 165 |  |  |  |  |  |  |  | 
| 166 | 739 | 100 |  |  |  | 3510 | if ( __instance( $re, 'PPI::Element' ) ) { | 
|  |  | 100 |  |  |  |  |  | 
| 167 | 11 | 50 |  |  |  | 53 | is_ppi_regexp_element( $re ) | 
| 168 |  |  |  |  |  |  | or return __set_errstr( ref $re, 'not supported by', $class ); | 
| 169 |  |  |  |  |  |  | # TODO conditionalizstion on PPI class does not really | 
| 170 |  |  |  |  |  |  | # belong here, but at the moment I have no other idea of | 
| 171 |  |  |  |  |  |  | # where to put it. | 
| 172 | 11 | 50 |  |  |  | 112 | $self->{content} = $re->isa( 'PPI::Token::HereDoc' ) ? | 
| 173 |  |  |  |  |  |  | join( '', $re->content(), "\n", $re->heredoc(), | 
| 174 |  |  |  |  |  |  | $re->terminator(), "\n" ) : | 
| 175 |  |  |  |  |  |  | $re->content(); | 
| 176 |  |  |  |  |  |  | } elsif ( ref $re ) { | 
| 177 | 2 |  |  |  |  | 10 | return __set_errstr( ref $re, 'not supported' ); | 
| 178 |  |  |  |  |  |  | } else { | 
| 179 | 726 |  |  |  |  | 1998 | $self->{content} = $re; | 
| 180 |  |  |  |  |  |  | } | 
| 181 |  |  |  |  |  |  |  | 
| 182 | 737 |  |  |  |  | 1656 | bless $self, $class; | 
| 183 |  |  |  |  |  |  |  | 
| 184 | 737 |  |  |  |  | 2895 | $self->{content} = $self->decode( $self->{content} ); | 
| 185 |  |  |  |  |  |  |  | 
| 186 | 737 |  |  |  |  | 2057 | $self->{cursor_limit} = length $self->{content}; | 
| 187 |  |  |  |  |  |  |  | 
| 188 |  |  |  |  |  |  | $self->{trace} | 
| 189 | 737 | 50 |  |  |  | 2454 | and warn "\ntokenizing '$self->{content}'\n"; | 
| 190 |  |  |  |  |  |  |  | 
| 191 | 737 |  |  |  |  | 2632 | return $self; | 
| 192 |  |  |  |  |  |  | } | 
| 193 |  |  |  |  |  |  |  | 
| 194 |  |  |  |  |  |  | sub __set_errstr { | 
| 195 | 2 |  |  | 2 |  | 7 | $errstr = join ' ', @_; | 
| 196 | 2 |  |  |  |  | 16 | return; | 
| 197 |  |  |  |  |  |  | } | 
| 198 |  |  |  |  |  |  |  | 
| 199 |  |  |  |  |  |  | sub errstr { | 
| 200 | 2 |  |  | 2 | 1 | 6 | return $errstr; | 
| 201 |  |  |  |  |  |  | } | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | } | 
| 204 |  |  |  |  |  |  |  | 
| 205 |  |  |  |  |  |  | sub capture { | 
| 206 | 712 |  |  | 712 | 1 | 1755 | my ( $self ) = @_; | 
| 207 | 712 | 100 |  |  |  | 2151 | $self->{capture} or return; | 
| 208 | 691 | 50 |  |  |  | 1667 | defined wantarray or return; | 
| 209 | 691 | 50 |  |  |  | 1623 | return wantarray ? @{ $self->{capture} } : $self->{capture}; | 
|  | 691 |  |  |  |  | 3284 |  | 
| 210 |  |  |  |  |  |  | } | 
| 211 |  |  |  |  |  |  |  | 
| 212 |  |  |  |  |  |  | sub content { | 
| 213 | 1 |  |  | 1 | 1 | 10 | my ( $self ) = @_; | 
| 214 | 1 |  |  |  |  | 4 | return $self->{content}; | 
| 215 |  |  |  |  |  |  | } | 
| 216 |  |  |  |  |  |  |  | 
| 217 |  |  |  |  |  |  | sub cookie { | 
| 218 | 10177 |  |  | 10177 | 1 | 18543 | my ( $self, $name, @args ) = @_; | 
| 219 | 10177 | 50 |  |  |  | 18901 | defined $name | 
| 220 |  |  |  |  |  |  | or confess "Programming error - undefined cookie name"; | 
| 221 | 10177 | 50 |  |  |  | 19653 | if ( $self->{trace} ) { | 
| 222 | 0 |  |  |  |  | 0 | local $" = ', '; | 
| 223 | 0 |  |  |  |  | 0 | warn "cookie( '$name', @args )\n"; | 
| 224 |  |  |  |  |  |  | } | 
| 225 | 10177 | 100 |  |  |  | 38457 | @args or return $self->{cookie}{$name}; | 
| 226 | 721 |  |  |  |  | 1804 | my $cookie = shift @args; | 
| 227 | 721 | 100 |  |  |  | 2516 | if ( CODE_REF eq ref $cookie ) { | 
|  |  | 50 |  |  |  |  |  | 
| 228 | 593 |  |  |  |  | 2901 | return ( $self->{cookie}{$name} = $cookie ); | 
| 229 |  |  |  |  |  |  | } elsif ( defined $cookie ) { | 
| 230 | 0 |  |  |  |  | 0 | confess "Programming error - cookie must be CODE ref or undef"; | 
| 231 |  |  |  |  |  |  | } else { | 
| 232 | 128 |  |  |  |  | 517 | return delete $self->{cookie}{$name}; | 
| 233 |  |  |  |  |  |  | } | 
| 234 |  |  |  |  |  |  | } | 
| 235 |  |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  | # NOTE: Currently this is called only against | 
| 237 |  |  |  |  |  |  | # COOKIE_LOOKAROUND_ASSERTION, once in PPIx::Token::GroupType::Assertion | 
| 238 |  |  |  |  |  |  | # to prevent the cookie from being remade if it already exists, and once | 
| 239 |  |  |  |  |  |  | # in PPIx::Regexp::Token::Assertion to determine if \K is inside a | 
| 240 |  |  |  |  |  |  | # lookaround assertion. If it gets used other places, or if there is | 
| 241 |  |  |  |  |  |  | # call for it, I should consider removing the underscores and | 
| 242 |  |  |  |  |  |  | # documenting it as public. | 
| 243 |  |  |  |  |  |  | sub __cookie_exists { | 
| 244 | 57 |  |  | 57 |  | 165 | my ( $self, $name ) = @_; | 
| 245 | 57 | 50 |  |  |  | 171 | defined $name | 
| 246 |  |  |  |  |  |  | or confess "Programming error - undefined cookie name"; | 
| 247 | 57 |  |  |  |  | 222 | return $self->{cookie}{$name}; | 
| 248 |  |  |  |  |  |  | } | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | sub default_modifiers { | 
| 251 | 0 |  |  | 0 | 1 | 0 | my ( $self ) = @_; | 
| 252 | 0 |  |  |  |  | 0 | return [ @{ $self->{default_modifiers} } ]; | 
|  | 0 |  |  |  |  | 0 |  | 
| 253 |  |  |  |  |  |  | } | 
| 254 |  |  |  |  |  |  |  | 
| 255 |  |  |  |  |  |  | sub __effective_modifiers { | 
| 256 | 332 |  |  | 332 |  | 844 | my ( $self ) = @_; | 
| 257 |  |  |  |  |  |  | HASH_REF eq ref $self->{effective_modifiers} | 
| 258 | 332 | 100 |  |  |  | 1277 | or return {}; | 
| 259 | 324 |  |  |  |  | 664 | return { %{ $self->{effective_modifiers} } }; | 
|  | 324 |  |  |  |  | 1293 |  | 
| 260 |  |  |  |  |  |  | } | 
| 261 |  |  |  |  |  |  |  | 
| 262 |  |  |  |  |  |  | sub encoding { | 
| 263 | 0 |  |  | 0 | 1 | 0 | my ( $self ) = @_; | 
| 264 | 0 |  |  |  |  | 0 | return $self->{encoding}; | 
| 265 |  |  |  |  |  |  | } | 
| 266 |  |  |  |  |  |  |  | 
| 267 |  |  |  |  |  |  | sub expect { | 
| 268 | 330 |  |  | 330 | 1 | 1395 | my ( $self, @args ) = @_; | 
| 269 |  |  |  |  |  |  |  | 
| 270 |  |  |  |  |  |  | @args | 
| 271 | 330 | 50 |  |  |  | 842 | or return; | 
| 272 |  |  |  |  |  |  |  | 
| 273 |  |  |  |  |  |  | $self->{expect_next} = [ | 
| 274 | 330 | 50 |  |  |  | 787 | map { m/ \A PPIx::Regexp:: /smx ? $_ : 'PPIx::Regexp::' . $_ } | 
|  | 2602 |  |  |  |  | 7939 |  | 
| 275 |  |  |  |  |  |  | @args | 
| 276 |  |  |  |  |  |  | ]; | 
| 277 | 330 |  |  |  |  | 1089 | $self->{expect} = undef; | 
| 278 | 330 |  |  |  |  | 776 | return; | 
| 279 |  |  |  |  |  |  | } | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  | sub failures { | 
| 282 | 8 |  |  | 8 | 1 | 22 | my ( $self ) = @_; | 
| 283 | 8 |  |  |  |  | 21 | return $self->{failures}; | 
| 284 |  |  |  |  |  |  | } | 
| 285 |  |  |  |  |  |  |  | 
| 286 |  |  |  |  |  |  | sub find_matching_delimiter { | 
| 287 | 583 |  |  | 583 | 1 | 1535 | my ( $self ) = @_; | 
| 288 | 583 |  | 100 |  |  | 2555 | $self->{cursor_curr} ||= 0; | 
| 289 |  |  |  |  |  |  | my $start = substr | 
| 290 |  |  |  |  |  |  | $self->{content}, | 
| 291 |  |  |  |  |  |  | $self->{cursor_curr}, | 
| 292 | 583 |  |  |  |  | 1645 | 1; | 
| 293 |  |  |  |  |  |  |  | 
| 294 | 583 |  |  |  |  | 1021 | my $inx = $self->{cursor_curr}; | 
| 295 | 583 |  | 66 |  |  | 2131 | my $finish = ( | 
| 296 |  |  |  |  |  |  | my $bracketed = $self->close_bracket( $start ) ) || $start; | 
| 297 |  |  |  |  |  |  |  | 
| 298 |  |  |  |  |  |  | =begin comment | 
| 299 |  |  |  |  |  |  |  | 
| 300 |  |  |  |  |  |  | $self->{trace} | 
| 301 |  |  |  |  |  |  | and warn "Find matching delimiter: Start with '$start' at $self->{cursor_curr}, end with '$finish' at or before $self->{cursor_limit}\n"; | 
| 302 |  |  |  |  |  |  |  | 
| 303 |  |  |  |  |  |  | =end comment | 
| 304 |  |  |  |  |  |  |  | 
| 305 |  |  |  |  |  |  | =cut | 
| 306 |  |  |  |  |  |  |  | 
| 307 | 583 |  |  |  |  | 1199 | my $nest = 0; | 
| 308 |  |  |  |  |  |  |  | 
| 309 | 583 |  |  |  |  | 1894 | while ( ++$inx < $self->{cursor_limit} ) { | 
| 310 | 6122 |  |  |  |  | 9818 | my $char = substr $self->{content}, $inx, 1; | 
| 311 |  |  |  |  |  |  |  | 
| 312 |  |  |  |  |  |  | =begin comment | 
| 313 |  |  |  |  |  |  |  | 
| 314 |  |  |  |  |  |  | $self->{trace} | 
| 315 |  |  |  |  |  |  | and warn "    looking at '$char' at $inx, nest level $nest\n"; | 
| 316 |  |  |  |  |  |  |  | 
| 317 |  |  |  |  |  |  | =end comment | 
| 318 |  |  |  |  |  |  |  | 
| 319 |  |  |  |  |  |  | =cut | 
| 320 |  |  |  |  |  |  |  | 
| 321 | 6122 | 100 | 100 |  |  | 22768 | if ( $char eq '\\' && $finish ne '\\' ) { | 
|  |  | 100 | 100 |  |  |  |  | 
|  |  | 100 |  |  |  |  |  | 
| 322 | 317 |  |  |  |  | 746 | ++$inx; | 
| 323 |  |  |  |  |  |  | } elsif ( $bracketed && $char eq $start ) { | 
| 324 | 1 |  |  |  |  | 3 | ++$nest; | 
| 325 |  |  |  |  |  |  | } elsif ( $char eq $finish ) { | 
| 326 |  |  |  |  |  |  | --$nest < 0 | 
| 327 | 582 | 100 |  |  |  | 3474 | and return $inx - $self->{cursor_curr}; | 
| 328 |  |  |  |  |  |  | } | 
| 329 |  |  |  |  |  |  | } | 
| 330 |  |  |  |  |  |  |  | 
| 331 | 2 |  |  |  |  | 7 | return; | 
| 332 |  |  |  |  |  |  | } | 
| 333 |  |  |  |  |  |  |  | 
| 334 |  |  |  |  |  |  | sub find_regexp { | 
| 335 | 16631 |  |  | 16631 | 1 | 30844 | my ( $self, $regexp ) = @_; | 
| 336 |  |  |  |  |  |  |  | 
| 337 | 16631 | 50 | 0 |  |  | 35975 | REGEXP_REF eq ref $regexp | 
| 338 |  |  |  |  |  |  | or confess | 
| 339 |  |  |  |  |  |  | 'Argument is a ', ( ref $regexp || 'scalar' ), ' not a Regexp'; | 
| 340 |  |  |  |  |  |  |  | 
| 341 | 16631 | 100 |  |  |  | 36073 | defined $self->{find} or $self->_remainder(); | 
| 342 |  |  |  |  |  |  |  | 
| 343 | 16631 | 100 |  |  |  | 99150 | $self->{find} =~ $regexp | 
| 344 |  |  |  |  |  |  | or return; | 
| 345 |  |  |  |  |  |  |  | 
| 346 | 1840 |  |  |  |  | 3429 | my @capture; | 
| 347 | 1840 |  |  |  |  | 6642 | foreach my $inx ( 0 .. $#+ ) { | 
| 348 | 4247 | 100 | 66 |  |  | 18641 | if ( defined $-[$inx] && defined $+[$inx] ) { | 
| 349 |  |  |  |  |  |  | push @capture, $self->{capture} = substr | 
| 350 |  |  |  |  |  |  | $self->{find}, | 
| 351 | 3758 |  |  |  |  | 19976 | $-[$inx], | 
| 352 |  |  |  |  |  |  | $+[$inx] - $-[$inx]; | 
| 353 |  |  |  |  |  |  | } else { | 
| 354 | 489 |  |  |  |  | 1531 | push @capture, undef; | 
| 355 |  |  |  |  |  |  | } | 
| 356 |  |  |  |  |  |  | } | 
| 357 | 1840 |  |  |  |  | 4880 | $self->{match} = shift @capture; | 
| 358 | 1840 |  |  |  |  | 3808 | $self->{capture} = \@capture; | 
| 359 |  |  |  |  |  |  |  | 
| 360 |  |  |  |  |  |  | # The following circumlocution seems to be needed under Perl 5.13.0 | 
| 361 |  |  |  |  |  |  | # for reasons I do not fathom -- at least in the case where | 
| 362 |  |  |  |  |  |  | # wantarray is false. RT 56864 details the symptoms, which I was | 
| 363 |  |  |  |  |  |  | # never able to reproduce outside Perl::Critic. But returning $+[0] | 
| 364 |  |  |  |  |  |  | # directly, the value could transmogrify between here and the | 
| 365 |  |  |  |  |  |  | # calling module. | 
| 366 |  |  |  |  |  |  | ##  my @data = ( $-[0], $+[0] ); | 
| 367 |  |  |  |  |  |  | ##  return wantarray ? @data : $data[1]; | 
| 368 | 1840 | 50 |  |  |  | 9693 | return wantarray ? ( $-[0] + 0, $+[0] + 0 ) : $+[0] + 0; | 
| 369 |  |  |  |  |  |  | } | 
| 370 |  |  |  |  |  |  |  | 
| 371 |  |  |  |  |  |  | sub get_mode { | 
| 372 | 46 |  |  | 46 | 1 | 97 | my ( $self ) = @_; | 
| 373 | 46 |  |  |  |  | 187 | return $self->{mode}; | 
| 374 |  |  |  |  |  |  | } | 
| 375 |  |  |  |  |  |  |  | 
| 376 |  |  |  |  |  |  | sub get_start_delimiter { | 
| 377 | 1794 |  |  | 1794 | 1 | 2977 | my ( $self ) = @_; | 
| 378 | 1794 |  |  |  |  | 7582 | return $self->{delimiter_start}; | 
| 379 |  |  |  |  |  |  | } | 
| 380 |  |  |  |  |  |  |  | 
| 381 |  |  |  |  |  |  | sub get_token { | 
| 382 | 4114 |  |  | 4114 | 1 | 6916 | my ( $self ) = @_; | 
| 383 |  |  |  |  |  |  |  | 
| 384 |  |  |  |  |  |  | caller eq __PACKAGE__ or $self->{cursor_curr} > $self->{cursor_orig} | 
| 385 | 4114 | 50 | 66 |  |  | 12416 | or confess 'Programming error - get_token() called without ', | 
| 386 |  |  |  |  |  |  | 'first calling make_token()'; | 
| 387 |  |  |  |  |  |  |  | 
| 388 | 4114 |  |  |  |  | 10375 | my $handler = '__PPIX_TOKENIZER__' . $self->{mode}; | 
| 389 |  |  |  |  |  |  |  | 
| 390 |  |  |  |  |  |  | my $code = $self->can( $handler ) | 
| 391 |  |  |  |  |  |  | or confess 'Programming error - ', | 
| 392 |  |  |  |  |  |  | "Getting token in mode '$self->{mode}'. ", | 
| 393 |  |  |  |  |  |  | "cursor_curr = $self->{cursor_curr}; ", | 
| 394 |  |  |  |  |  |  | "cursor_limit = $self->{cursor_limit}; ", | 
| 395 |  |  |  |  |  |  | "length( content ) = ", length $self->{content}, | 
| 396 | 4114 | 50 |  |  |  | 15927 | "; content = '$self->{content}'"; | 
| 397 |  |  |  |  |  |  |  | 
| 398 |  |  |  |  |  |  | my $character = substr( | 
| 399 |  |  |  |  |  |  | $self->{content}, | 
| 400 |  |  |  |  |  |  | $self->{cursor_curr}, | 
| 401 | 4114 |  |  |  |  | 10255 | 1 | 
| 402 |  |  |  |  |  |  | ); | 
| 403 |  |  |  |  |  |  |  | 
| 404 |  |  |  |  |  |  | $self->{trace} | 
| 405 | 4114 | 50 |  |  |  | 9066 | and warn "get_token() got '$character' from $self->{cursor_curr}\n"; | 
| 406 |  |  |  |  |  |  |  | 
| 407 | 4114 |  |  |  |  | 10138 | return ( $code->( $self, $character ) ); | 
| 408 |  |  |  |  |  |  | } | 
| 409 |  |  |  |  |  |  |  | 
| 410 |  |  |  |  |  |  | sub interpolates { | 
| 411 | 141 |  |  | 141 | 1 | 333 | my ( $self ) = @_; | 
| 412 | 141 |  |  |  |  | 596 | return $self->{delimiter_start} ne q{'}; | 
| 413 |  |  |  |  |  |  | } | 
| 414 |  |  |  |  |  |  |  | 
| 415 |  |  |  |  |  |  | sub make_token { | 
| 416 | 5216 |  |  | 5216 | 1 | 12250 | my ( $self, $length, $class, $arg ) = @_; | 
| 417 | 5216 | 100 |  |  |  | 10828 | defined $class or $class = caller; | 
| 418 |  |  |  |  |  |  |  | 
| 419 | 5216 | 50 |  |  |  | 12958 | if ( $length + $self->{cursor_curr} > $self->{cursor_limit} ) { | 
| 420 |  |  |  |  |  |  | $length = $self->{cursor_limit} - $self->{cursor_curr} | 
| 421 | 0 | 0 |  |  |  | 0 | or return; | 
| 422 |  |  |  |  |  |  | } | 
| 423 |  |  |  |  |  |  |  | 
| 424 | 5216 | 50 |  |  |  | 18487 | $class =~ m/ \A PPIx::Regexp:: /smx | 
| 425 |  |  |  |  |  |  | or $class = 'PPIx::Regexp::' . $class; | 
| 426 |  |  |  |  |  |  | my $content = substr | 
| 427 |  |  |  |  |  |  | $self->{content}, | 
| 428 |  |  |  |  |  |  | $self->{cursor_curr}, | 
| 429 | 5216 |  |  |  |  | 12325 | $length; | 
| 430 |  |  |  |  |  |  |  | 
| 431 |  |  |  |  |  |  | $self->{trace} | 
| 432 | 5216 | 50 |  |  |  | 10864 | and warn "make_token( $length, '$class' ) => '$content'\n"; | 
| 433 | 5216 | 50 |  |  |  | 11231 | $self->{trace} > 1 | 
| 434 |  |  |  |  |  |  | and warn "    make_token: cursor_curr = $self->{cursor_curr}; ", | 
| 435 |  |  |  |  |  |  | "cursor_limit = $self->{cursor_limit}\n"; | 
| 436 |  |  |  |  |  |  | my $token = $class->__new( $content, | 
| 437 |  |  |  |  |  |  | tokenizer	=> $self, | 
| 438 | 5216 | 100 |  |  |  | 7938 | %{ $arg || {} } ) | 
|  | 5216 | 50 |  |  |  | 36989 |  | 
| 439 |  |  |  |  |  |  | or return; | 
| 440 |  |  |  |  |  |  |  | 
| 441 |  |  |  |  |  |  | $self->{index_locations} | 
| 442 | 5216 | 100 |  |  |  | 15341 | and $self->_update_location( $token ); | 
| 443 |  |  |  |  |  |  |  | 
| 444 |  |  |  |  |  |  | $token->significant() | 
| 445 | 5216 | 100 |  |  |  | 15343 | and $self->{expect} = undef; | 
| 446 |  |  |  |  |  |  |  | 
| 447 | 5216 | 100 |  |  |  | 23884 | $token->isa( TOKEN_UNKNOWN ) and $self->{failures}++; | 
| 448 |  |  |  |  |  |  |  | 
| 449 | 5216 |  |  |  |  | 9920 | $self->{cursor_curr} += $length; | 
| 450 | 5216 |  |  |  |  | 8147 | $self->{find} = undef; | 
| 451 | 5216 |  |  |  |  | 8248 | $self->{match} = undef; | 
| 452 | 5216 |  |  |  |  | 8845 | $self->{capture} = undef; | 
| 453 |  |  |  |  |  |  |  | 
| 454 | 5216 |  |  |  |  | 8380 | foreach my $name ( keys %{ $self->{cookie} } ) { | 
|  | 5216 |  |  |  |  | 13910 |  | 
| 455 | 3615 |  |  |  |  | 6761 | my $cookie = $self->{cookie}{$name}; | 
| 456 |  |  |  |  |  |  | $cookie->( $self, $token ) | 
| 457 | 3615 | 100 |  |  |  | 9554 | or delete $self->{cookie}{$name}; | 
| 458 |  |  |  |  |  |  | } | 
| 459 |  |  |  |  |  |  |  | 
| 460 |  |  |  |  |  |  | # Record this token as the prior token if it is significant. We must | 
| 461 |  |  |  |  |  |  | # do this after processing cookies, so that the cookies have access | 
| 462 |  |  |  |  |  |  | # to the old token if they want. | 
| 463 |  |  |  |  |  |  | $token->significant() | 
| 464 | 5216 | 100 |  |  |  | 12771 | and $self->{prior_significant_token} = $token; | 
| 465 |  |  |  |  |  |  |  | 
| 466 | 5216 |  |  |  |  | 21698 | return $token; | 
| 467 |  |  |  |  |  |  | } | 
| 468 |  |  |  |  |  |  |  | 
| 469 |  |  |  |  |  |  | sub match { | 
| 470 | 86 |  |  | 86 | 1 | 253 | my ( $self ) = @_; | 
| 471 | 86 |  |  |  |  | 264 | return $self->{match}; | 
| 472 |  |  |  |  |  |  | } | 
| 473 |  |  |  |  |  |  |  | 
| 474 |  |  |  |  |  |  | sub modifier { | 
| 475 | 5036 |  |  | 5036 | 1 | 9865 | my ( $self, $modifier ) = @_; | 
| 476 |  |  |  |  |  |  | return PPIx::Regexp::Token::Modifier::__asserts( | 
| 477 | 5036 |  |  |  |  | 12774 | $self->{modifiers}[-1], $modifier ); | 
| 478 |  |  |  |  |  |  | } | 
| 479 |  |  |  |  |  |  |  | 
| 480 |  |  |  |  |  |  | sub modifier_duplicate { | 
| 481 | 292 |  |  | 292 | 1 | 621 | my ( $self ) = @_; | 
| 482 | 292 |  |  |  |  | 715 | push @{ $self->{modifiers} }, | 
| 483 | 292 |  |  |  |  | 460 | { %{ $self->{modifiers}[-1] } }; | 
|  | 292 |  |  |  |  | 1101 |  | 
| 484 | 292 |  |  |  |  | 700 | return; | 
| 485 |  |  |  |  |  |  | } | 
| 486 |  |  |  |  |  |  |  | 
| 487 |  |  |  |  |  |  | sub modifier_modify { | 
| 488 | 592 |  |  | 592 | 1 | 1691 | my ( $self, %args ) = @_; | 
| 489 |  |  |  |  |  |  |  | 
| 490 |  |  |  |  |  |  | # Modifier code is centralized in PPIx::Regexp::Token::Modifier | 
| 491 |  |  |  |  |  |  | $self->{modifiers}[-1] = | 
| 492 |  |  |  |  |  |  | PPIx::Regexp::Token::Modifier::__PPIX_TOKENIZER__modifier_modify( | 
| 493 | 592 |  |  |  |  | 2313 | $self->{modifiers}[-1], \%args ); | 
| 494 |  |  |  |  |  |  |  | 
| 495 | 592 |  |  |  |  | 1664 | return; | 
| 496 |  |  |  |  |  |  |  | 
| 497 |  |  |  |  |  |  | } | 
| 498 |  |  |  |  |  |  |  | 
| 499 |  |  |  |  |  |  | sub modifier_pop { | 
| 500 | 288 |  |  | 288 | 1 | 663 | my ( $self ) = @_; | 
| 501 | 288 |  |  |  |  | 980 | @{ $self->{modifiers} } > 1 | 
| 502 | 288 | 100 |  |  |  | 521 | and pop @{ $self->{modifiers} }; | 
|  | 282 |  |  |  |  | 722 |  | 
| 503 | 288 |  |  |  |  | 713 | return; | 
| 504 |  |  |  |  |  |  | } | 
| 505 |  |  |  |  |  |  |  | 
| 506 |  |  |  |  |  |  | sub modifier_seen { | 
| 507 | 8 |  |  | 8 | 1 | 35 | my ( $self, $modifier ) = @_; | 
| 508 | 8 |  |  |  |  | 58 | foreach my $mod ( reverse @{ $self->{modifiers} } ) { | 
|  | 8 |  |  |  |  | 32 |  | 
| 509 | 10 | 100 |  |  |  | 52 | exists $mod->{$modifier} | 
| 510 |  |  |  |  |  |  | and return 1; | 
| 511 |  |  |  |  |  |  | } | 
| 512 | 5 |  |  |  |  | 20 | return; | 
| 513 |  |  |  |  |  |  | } | 
| 514 |  |  |  |  |  |  |  | 
| 515 |  |  |  |  |  |  | sub next_token { | 
| 516 | 5750 |  |  | 5750 | 1 | 10093 | my ( $self ) = @_; | 
| 517 |  |  |  |  |  |  |  | 
| 518 |  |  |  |  |  |  | { | 
| 519 |  |  |  |  |  |  |  | 
| 520 | 5750 | 100 |  |  |  | 9430 | if ( @{ $self->{pending} } ) { | 
|  | 9847 |  |  |  |  | 13336 |  | 
|  | 9847 |  |  |  |  | 21044 |  | 
| 521 | 5214 |  |  |  |  | 7768 | return shift @{ $self->{pending} }; | 
|  | 5214 |  |  |  |  | 17270 |  | 
| 522 |  |  |  |  |  |  | } | 
| 523 |  |  |  |  |  |  |  | 
| 524 | 4633 | 100 |  |  |  | 11518 | if ( $self->{cursor_curr} >= $self->{cursor_limit} ) { | 
| 525 |  |  |  |  |  |  | $self->{cursor_limit} >= length $self->{content} | 
| 526 | 1091 | 100 |  |  |  | 5231 | and return; | 
| 527 | 555 | 50 |  |  |  | 1889 | $self->{mode} eq 'finish' and return; | 
| 528 | 555 |  |  |  |  | 1736 | $self->_set_mode( 'finish' ); | 
| 529 | 555 |  |  |  |  | 1130 | $self->{cursor_limit} += length $self->{delimiter_finish}; | 
| 530 |  |  |  |  |  |  | } | 
| 531 |  |  |  |  |  |  |  | 
| 532 | 4097 | 50 |  |  |  | 9599 | if ( my @tokens = $self->get_token() ) { | 
| 533 | 4097 |  |  |  |  | 6314 | push @{ $self->{pending} }, @tokens; | 
|  | 4097 |  |  |  |  | 8954 |  | 
| 534 | 4097 |  |  |  |  | 7293 | redo; | 
| 535 |  |  |  |  |  |  |  | 
| 536 |  |  |  |  |  |  | } | 
| 537 |  |  |  |  |  |  |  | 
| 538 |  |  |  |  |  |  | } | 
| 539 |  |  |  |  |  |  |  | 
| 540 | 0 |  |  |  |  | 0 | return; | 
| 541 |  |  |  |  |  |  |  | 
| 542 |  |  |  |  |  |  | } | 
| 543 |  |  |  |  |  |  |  | 
| 544 |  |  |  |  |  |  | sub peek { | 
| 545 | 379 |  |  | 379 | 1 | 821 | my ( $self, $offset ) = @_; | 
| 546 | 379 | 100 |  |  |  | 875 | defined $offset or $offset = 0; | 
| 547 | 379 | 50 |  |  |  | 904 | $offset < 0 and return; | 
| 548 | 379 |  |  |  |  | 702 | $offset += $self->{cursor_curr}; | 
| 549 | 379 | 50 |  |  |  | 915 | $offset >= $self->{cursor_limit} and return; | 
| 550 | 379 |  |  |  |  | 1697 | return substr $self->{content}, $offset, 1; | 
| 551 |  |  |  |  |  |  | } | 
| 552 |  |  |  |  |  |  |  | 
| 553 |  |  |  |  |  |  | sub ppi_document { | 
| 554 | 83 |  |  | 83 | 1 | 221 | my ( $self ) = @_; | 
| 555 |  |  |  |  |  |  |  | 
| 556 | 83 | 50 |  |  |  | 242 | defined $self->{find} or $self->_remainder(); | 
| 557 |  |  |  |  |  |  |  | 
| 558 | 83 |  |  |  |  | 504 | return PPI::Document->new( \"$self->{find}" ); | 
| 559 |  |  |  |  |  |  | } | 
| 560 |  |  |  |  |  |  |  | 
| 561 |  |  |  |  |  |  | sub prior_significant_token { | 
| 562 | 2413 |  |  | 2413 | 1 | 4640 | my ( $self, $method, @args ) = @_; | 
| 563 | 2413 | 100 |  |  |  | 4798 | defined $method or return $self->{prior_significant_token}; | 
| 564 |  |  |  |  |  |  | $self->{prior_significant_token}->can( $method ) | 
| 565 |  |  |  |  |  |  | or confess 'Programming error - ', | 
| 566 |  |  |  |  |  |  | ( ref $self->{prior_significant_token} || | 
| 567 | 2394 | 50 | 0 |  |  | 9409 | $self->{prior_significant_token} ), | 
| 568 |  |  |  |  |  |  | ' does not support method ', $method; | 
| 569 | 2394 |  |  |  |  | 8787 | return $self->{prior_significant_token}->$method( @args ); | 
| 570 |  |  |  |  |  |  | } | 
| 571 |  |  |  |  |  |  |  | 
| 572 |  |  |  |  |  |  | # my $length = $token->__recognize_postderef( $tokenizer, $iterator ). | 
| 573 |  |  |  |  |  |  | # | 
| 574 |  |  |  |  |  |  | # This method is private to the PPIx-Regexp package, and may be changed | 
| 575 |  |  |  |  |  |  | # or retracted without warning. What it does is to recognize postfix | 
| 576 |  |  |  |  |  |  | # dereferences. It returns the length in characters of the first postfix | 
| 577 |  |  |  |  |  |  | # dereference found, or a false value if none is found. | 
| 578 |  |  |  |  |  |  | # | 
| 579 |  |  |  |  |  |  | # The optional $iterator argument can be one of the following: | 
| 580 |  |  |  |  |  |  | #   - A code reference, which will be called to provide PPI::Element | 
| 581 |  |  |  |  |  |  | #     objects to be checked to see if they represent a postfix | 
| 582 |  |  |  |  |  |  | #     dereference. | 
| 583 |  |  |  |  |  |  | #   - A PPI::Element, which is checked to see if it is a postfix | 
| 584 |  |  |  |  |  |  | #     dereference. | 
| 585 |  |  |  |  |  |  | #   - Undef, or omitted, in which case ppi() is called on the invocant, | 
| 586 |  |  |  |  |  |  | #     and everything that follows the '->' operator is checked to see if | 
| 587 |  |  |  |  |  |  | #     it is a postfix dereference. | 
| 588 |  |  |  |  |  |  | #   - Anything else results in an exception and stack trace. | 
| 589 |  |  |  |  |  |  |  | 
| 590 |  |  |  |  |  |  | { | 
| 591 |  |  |  |  |  |  | sub __recognize_postderef { | 
| 592 | 148 |  |  | 148 |  | 469 | my ( $self, $token, $iterator ) = @_; | 
| 593 |  |  |  |  |  |  |  | 
| 594 |  |  |  |  |  |  | # Note that if ppi() gets called I have to hold a reference to | 
| 595 |  |  |  |  |  |  | # the returned object until I am done with all its children. | 
| 596 | 148 |  |  |  |  | 244 | my $ppi; | 
| 597 | 148 | 100 |  |  |  | 366 | if ( ! defined $iterator ) { | 
|  |  | 50 |  |  |  |  |  | 
|  |  | 0 |  |  |  |  |  | 
| 598 |  |  |  |  |  |  |  | 
| 599 |  |  |  |  |  |  | # This MUST be done before ppi() is called. | 
| 600 |  |  |  |  |  |  | $self->{index_locations} | 
| 601 | 144 | 100 |  |  |  | 400 | and $self->_update_location( $token ); | 
| 602 |  |  |  |  |  |  |  | 
| 603 | 144 |  |  |  |  | 494 | $ppi = $token->ppi(); | 
| 604 | 29 |  |  |  |  | 8358 | my @ops = grep { '->' eq $_->content() } @{ | 
| 605 | 144 | 100 |  |  |  | 324 | $ppi->find( 'PPI::Token::Operator' ) || [] }; | 
|  | 144 |  |  |  |  | 598 |  | 
| 606 |  |  |  |  |  |  | $iterator = sub { | 
| 607 | 150 | 100 |  | 150 |  | 643 | my $op = shift @ops | 
| 608 |  |  |  |  |  |  | or return; | 
| 609 | 15 |  |  |  |  | 89 | return $op->snext_sibling(); | 
| 610 | 144 |  |  |  |  | 39259 | }; | 
| 611 |  |  |  |  |  |  | } elsif ( $iterator->isa( 'PPI::Element' ) ) { | 
| 612 | 4 |  |  |  |  | 10 | my @eles = ( $iterator ); | 
| 613 |  |  |  |  |  |  | $iterator = sub { | 
| 614 | 4 |  |  | 4 |  | 16 | return shift @eles; | 
| 615 | 4 |  |  |  |  | 14 | }; | 
| 616 |  |  |  |  |  |  | } elsif ( CODE_REF ne ref $iterator ) { | 
| 617 | 0 |  |  |  |  | 0 | confess 'Programming error - Iterator not understood'; | 
| 618 |  |  |  |  |  |  | } | 
| 619 |  |  |  |  |  |  |  | 
| 620 | 148 |  |  |  |  | 744 | my $accept = $token->__postderef_accept_cast(); | 
| 621 |  |  |  |  |  |  |  | 
| 622 | 148 |  |  |  |  | 333 | while ( my $elem = $iterator->() ) { | 
| 623 |  |  |  |  |  |  |  | 
| 624 | 19 |  |  |  |  | 443 | my $content = $elem->content(); | 
| 625 |  |  |  |  |  |  |  | 
| 626 |  |  |  |  |  |  | # As of PPI 1.238, all postfix dereferences are parsed as | 
| 627 |  |  |  |  |  |  | # casts. So if we find a cast of the correct content we have | 
| 628 |  |  |  |  |  |  | # a postfix deref. | 
| 629 | 19 | 100 |  |  |  | 184 | $elem->isa( 'PPI::Token::Cast' ) | 
| 630 |  |  |  |  |  |  | or next; | 
| 631 |  |  |  |  |  |  |  | 
| 632 | 15 | 100 |  |  |  | 92 | if ( $content =~ m/ ( .* ) \* \z /smx ) { | 
|  |  | 50 |  |  |  |  |  | 
| 633 |  |  |  |  |  |  | # If we're an acceptable cast ending in a glob, accept | 
| 634 |  |  |  |  |  |  | # it. | 
| 635 | 10 | 100 |  |  |  | 112 | $accept->{$1} | 
| 636 |  |  |  |  |  |  | and return length $content; | 
| 637 |  |  |  |  |  |  | } elsif ( $accept->{$content} ) { | 
| 638 |  |  |  |  |  |  | # If we're an acceptable cast followed by a subscript, | 
| 639 |  |  |  |  |  |  | # we're a slice -- accept both cast and subscript. | 
| 640 | 5 | 50 |  |  |  | 21 | my $next = $elem->snext_sibling() | 
| 641 |  |  |  |  |  |  | or next; | 
| 642 | 5 | 50 |  |  |  | 132 | $next->isa( 'PPI::Structure::Subscript' ) | 
| 643 |  |  |  |  |  |  | or next; | 
| 644 | 5 |  |  |  |  | 23 | return length( $content ) + length( $next->content() ); | 
| 645 |  |  |  |  |  |  | } | 
| 646 |  |  |  |  |  |  |  | 
| 647 |  |  |  |  |  |  | # Otherwise, we're not a postfix dereference; try the next | 
| 648 |  |  |  |  |  |  | # iteration. | 
| 649 |  |  |  |  |  |  | } | 
| 650 |  |  |  |  |  |  |  | 
| 651 |  |  |  |  |  |  | # No postfix dereference found. | 
| 652 | 135 |  |  |  |  | 782 | return; | 
| 653 |  |  |  |  |  |  | } | 
| 654 |  |  |  |  |  |  | } | 
| 655 |  |  |  |  |  |  |  | 
| 656 |  |  |  |  |  |  | sub significant { | 
| 657 | 0 |  |  | 0 | 1 | 0 | return 1; | 
| 658 |  |  |  |  |  |  | } | 
| 659 |  |  |  |  |  |  |  | 
| 660 |  |  |  |  |  |  | sub strict { | 
| 661 | 4 |  |  | 4 | 1 | 12 | my ( $self ) = @_; | 
| 662 | 4 |  |  |  |  | 34 | return $self->{strict}; | 
| 663 |  |  |  |  |  |  | } | 
| 664 |  |  |  |  |  |  |  | 
| 665 |  |  |  |  |  |  | sub _known_tokenizers { | 
| 666 | 3025 |  |  | 3025 |  | 5409 | my ( $self ) = @_; | 
| 667 |  |  |  |  |  |  |  | 
| 668 | 3025 |  |  |  |  | 5262 | my $mode = $self->{mode}; | 
| 669 |  |  |  |  |  |  |  | 
| 670 | 3025 |  |  |  |  | 4506 | my @expect; | 
| 671 | 3025 | 100 |  |  |  | 6651 | if ( $self->{expect_next} ) { | 
| 672 | 328 |  |  |  |  | 988 | $self->{expect} = $self->{expect_next}; | 
| 673 | 328 |  |  |  |  | 657 | $self->{expect_next} = undef; | 
| 674 |  |  |  |  |  |  | } | 
| 675 | 3025 | 100 |  |  |  | 6418 | if ( $self->{expect} ) { | 
| 676 |  |  |  |  |  |  | @expect = $self->_known_tokenizer_check( | 
| 677 | 334 |  |  |  |  | 628 | @{ $self->{expect} } ); | 
|  | 334 |  |  |  |  | 951 |  | 
| 678 |  |  |  |  |  |  | } | 
| 679 |  |  |  |  |  |  |  | 
| 680 |  |  |  |  |  |  | exists $self->{known}{$mode} and return ( | 
| 681 | 3025 | 100 |  |  |  | 7669 | @expect, @{ $self->{known}{$mode} } ); | 
|  | 2487 |  |  |  |  | 9559 |  | 
| 682 |  |  |  |  |  |  |  | 
| 683 | 538 |  |  |  |  | 1496 | my @found = $self->_known_tokenizer_check( | 
| 684 |  |  |  |  |  |  | $self->__tokenizer_classes() ); | 
| 685 |  |  |  |  |  |  |  | 
| 686 | 538 |  |  |  |  | 2256 | $self->{known}{$mode} = \@found; | 
| 687 | 538 |  |  |  |  | 2243 | return (@expect, @found); | 
| 688 |  |  |  |  |  |  | } | 
| 689 |  |  |  |  |  |  |  | 
| 690 |  |  |  |  |  |  | sub _known_tokenizer_check { | 
| 691 | 872 |  |  | 872 |  | 3097 | my ( $self, @args ) = @_; | 
| 692 |  |  |  |  |  |  |  | 
| 693 | 872 |  |  |  |  | 2015 | my $handler = '__PPIX_TOKENIZER__' . $self->{mode}; | 
| 694 | 872 |  |  |  |  | 1275 | my @found; | 
| 695 |  |  |  |  |  |  |  | 
| 696 | 872 |  |  |  |  | 1783 | foreach my $class ( @args ) { | 
| 697 |  |  |  |  |  |  |  | 
| 698 | 8556 | 100 |  |  |  | 44666 | $class->can( $handler ) or next; | 
| 699 | 8367 |  |  |  |  | 15496 | push @found, $class; | 
| 700 |  |  |  |  |  |  |  | 
| 701 |  |  |  |  |  |  | } | 
| 702 |  |  |  |  |  |  |  | 
| 703 | 872 |  |  |  |  | 4640 | return @found; | 
| 704 |  |  |  |  |  |  | } | 
| 705 |  |  |  |  |  |  |  | 
| 706 |  |  |  |  |  |  | sub tokens { | 
| 707 | 204 |  |  | 204 | 1 | 583 | my ( $self ) = @_; | 
| 708 |  |  |  |  |  |  |  | 
| 709 | 204 |  |  |  |  | 445 | my @rslt; | 
| 710 | 204 |  |  |  |  | 807 | while ( my $token = $self->next_token() ) { | 
| 711 | 1924 |  |  |  |  | 4960 | push @rslt, $token; | 
| 712 |  |  |  |  |  |  | } | 
| 713 |  |  |  |  |  |  |  | 
| 714 | 204 |  |  |  |  | 1727 | return @rslt; | 
| 715 |  |  |  |  |  |  | } | 
| 716 |  |  |  |  |  |  |  | 
| 717 |  |  |  |  |  |  | #	$self->_deprecation_notice( $type, $name ); | 
| 718 |  |  |  |  |  |  | # | 
| 719 |  |  |  |  |  |  | #	This method centralizes deprecation. Type is 'attribute' or | 
| 720 |  |  |  |  |  |  | #	'method'. Deprecation is driven of the %deprecate hash. Values | 
| 721 |  |  |  |  |  |  | #	are: | 
| 722 |  |  |  |  |  |  | #	    false - no warning | 
| 723 |  |  |  |  |  |  | #	    1 - warn on first use | 
| 724 |  |  |  |  |  |  | #	    2 - warn on each use | 
| 725 |  |  |  |  |  |  | #	    3 - die on each use. | 
| 726 |  |  |  |  |  |  | # | 
| 727 |  |  |  |  |  |  | #	$self->_deprecation_in_progress( $type, $name ) | 
| 728 |  |  |  |  |  |  | # | 
| 729 |  |  |  |  |  |  | #	This method returns true if the deprecation is in progress. In | 
| 730 |  |  |  |  |  |  | #	fact it returns the deprecation level. | 
| 731 |  |  |  |  |  |  |  | 
| 732 |  |  |  |  |  |  | =begin comment | 
| 733 |  |  |  |  |  |  |  | 
| 734 |  |  |  |  |  |  | { | 
| 735 |  |  |  |  |  |  |  | 
| 736 |  |  |  |  |  |  | my %deprecate = ( | 
| 737 |  |  |  |  |  |  | attribute => { | 
| 738 |  |  |  |  |  |  | postderef	=> 3, | 
| 739 |  |  |  |  |  |  | }, | 
| 740 |  |  |  |  |  |  | ); | 
| 741 |  |  |  |  |  |  |  | 
| 742 |  |  |  |  |  |  | sub _deprecation_notice { | 
| 743 |  |  |  |  |  |  | my ( undef, $type, $name, $repl ) = @_;		# Invocant unused | 
| 744 |  |  |  |  |  |  | $deprecate{$type} or return; | 
| 745 |  |  |  |  |  |  | $deprecate{$type}{$name} or return; | 
| 746 |  |  |  |  |  |  | my $msg = sprintf 'The %s %s is %s', $name, $type, | 
| 747 |  |  |  |  |  |  | $deprecate{$type}{$name} > 2 ? 'removed' : 'deprecated'; | 
| 748 |  |  |  |  |  |  | defined $repl | 
| 749 |  |  |  |  |  |  | and $msg .= "; use $repl instead"; | 
| 750 |  |  |  |  |  |  | $deprecate{$type}{$name} >= 3 | 
| 751 |  |  |  |  |  |  | and croak $msg; | 
| 752 |  |  |  |  |  |  | warnings::enabled( 'deprecated' ) | 
| 753 |  |  |  |  |  |  | and carp $msg; | 
| 754 |  |  |  |  |  |  | $deprecate{$type}{$name} == 1 | 
| 755 |  |  |  |  |  |  | and $deprecate{$type}{$name} = 0; | 
| 756 |  |  |  |  |  |  | return; | 
| 757 |  |  |  |  |  |  | } | 
| 758 |  |  |  |  |  |  |  | 
| 759 |  |  |  |  |  |  | sub _deprecation_in_progress { | 
| 760 |  |  |  |  |  |  | my ( $self, $type, $name ) = @_; | 
| 761 |  |  |  |  |  |  | $deprecate{$type} or return; | 
| 762 |  |  |  |  |  |  | return $deprecate{$type}{$name}; | 
| 763 |  |  |  |  |  |  | } | 
| 764 |  |  |  |  |  |  |  | 
| 765 |  |  |  |  |  |  | } | 
| 766 |  |  |  |  |  |  |  | 
| 767 |  |  |  |  |  |  | =end comment | 
| 768 |  |  |  |  |  |  |  | 
| 769 |  |  |  |  |  |  | =cut | 
| 770 |  |  |  |  |  |  |  | 
| 771 |  |  |  |  |  |  | sub _remainder { | 
| 772 | 3620 |  |  | 3620 |  | 6410 | my ( $self ) = @_; | 
| 773 |  |  |  |  |  |  |  | 
| 774 |  |  |  |  |  |  | $self->{cursor_curr} > $self->{cursor_limit} | 
| 775 | 3620 | 50 |  |  |  | 8728 | and confess "Programming error - Trying to find past end of string"; | 
| 776 |  |  |  |  |  |  | $self->{find} = substr( | 
| 777 |  |  |  |  |  |  | $self->{content}, | 
| 778 |  |  |  |  |  |  | $self->{cursor_curr}, | 
| 779 |  |  |  |  |  |  | $self->{cursor_limit} - $self->{cursor_curr} | 
| 780 | 3620 |  |  |  |  | 9904 | ); | 
| 781 |  |  |  |  |  |  |  | 
| 782 | 3620 |  |  |  |  | 6159 | return; | 
| 783 |  |  |  |  |  |  | } | 
| 784 |  |  |  |  |  |  |  | 
| 785 |  |  |  |  |  |  | sub _make_final_token { | 
| 786 | 10 |  |  | 10 |  | 30 | my ( $self, $len, $class, $arg ) = @_; | 
| 787 | 10 |  |  |  |  | 35 | my $token = $self->make_token( $len, $class, $arg ); | 
| 788 | 10 |  |  |  |  | 43 | $self->_set_mode( 'kaput' ); | 
| 789 | 10 |  |  |  |  | 56 | return $token; | 
| 790 |  |  |  |  |  |  | } | 
| 791 |  |  |  |  |  |  |  | 
| 792 |  |  |  |  |  |  | sub _set_mode { | 
| 793 | 1644 |  |  | 1644 |  | 3687 | my ( $self, $mode ) = @_; | 
| 794 |  |  |  |  |  |  | $self->{trace} | 
| 795 | 1644 | 50 |  |  |  | 3664 | and warn "Tokenizer going from mode $self->{mode} to $mode\n"; | 
| 796 | 1644 |  |  |  |  | 3034 | $self->{mode} = $mode; | 
| 797 | 1644 | 100 |  |  |  | 3950 | if ( 'kaput' eq $mode ) { | 
| 798 |  |  |  |  |  |  | $self->{cursor_curr} = $self->{cursor_limit} = | 
| 799 | 534 |  |  |  |  | 1815 | length $self->{content}; | 
| 800 |  |  |  |  |  |  | } | 
| 801 | 1644 |  |  |  |  | 2781 | return; | 
| 802 |  |  |  |  |  |  | } | 
| 803 |  |  |  |  |  |  |  | 
| 804 |  |  |  |  |  |  | sub __init_error { | 
| 805 | 10 |  |  | 10 |  | 28 | my ( $self , $err ) = @_; | 
| 806 | 10 | 100 |  |  |  | 30 | defined $err | 
| 807 |  |  |  |  |  |  | or $err = 'Tokenizer found illegal first characters'; | 
| 808 |  |  |  |  |  |  | return $self->_make_final_token( | 
| 809 | 10 |  |  |  |  | 69 | length $self->{content}, TOKEN_UNKNOWN, { | 
| 810 |  |  |  |  |  |  | error	=> $err, | 
| 811 |  |  |  |  |  |  | }, | 
| 812 |  |  |  |  |  |  | ); | 
| 813 |  |  |  |  |  |  | } | 
| 814 |  |  |  |  |  |  |  | 
| 815 |  |  |  |  |  |  | sub _update_location { | 
| 816 | 107 |  |  | 107 |  | 220 | my ( $self, $token ) = @_; | 
| 817 |  |  |  |  |  |  | $token->{location}	# Idempotent | 
| 818 | 107 | 100 |  |  |  | 281 | and return; | 
| 819 | 105 |  | 66 |  |  | 291 | my $loc = $self->{_location} ||= do { | 
| 820 |  |  |  |  |  |  | my %loc = ( | 
| 821 |  |  |  |  |  |  | location	=> $self->{location}, | 
| 822 | 12 |  |  |  |  | 52 | ); | 
| 823 | 12 | 100 |  |  |  | 65 | if ( __instance( $self->{source}, 'PPI::Element' ) ) { | 
| 824 | 11 |  | 33 |  |  | 121 | $loc{location} ||= $self->{source}->location(); | 
| 825 | 11 | 50 |  |  |  | 2065 | if ( my $doc = $self->{source}->document() ) { | 
| 826 | 11 |  |  |  |  | 400 | $loc{tab_width} = $doc->tab_width(); | 
| 827 |  |  |  |  |  |  | } | 
| 828 |  |  |  |  |  |  | } | 
| 829 | 12 |  | 100 |  |  | 112 | $loc{tab_width} ||= 1; | 
| 830 | 12 |  |  |  |  | 49 | \%loc; | 
| 831 |  |  |  |  |  |  | }; | 
| 832 |  |  |  |  |  |  | $loc->{location} | 
| 833 | 105 | 50 |  |  |  | 275 | or return; | 
| 834 | 105 |  |  |  |  | 167 | $token->{location} = [ @{ $loc->{location} } ]; | 
|  | 105 |  |  |  |  | 347 |  | 
| 835 | 105 | 50 |  |  |  | 362 | if ( defined( my $content = $token->content() ) ) { | 
| 836 |  |  |  |  |  |  |  | 
| 837 | 105 |  |  |  |  | 196 | my $lines; | 
| 838 | 105 |  |  |  |  | 315 | pos( $content ) = 0; | 
| 839 | 105 |  |  |  |  | 410 | $lines++ while $content =~ m/ \n /smxgc; | 
| 840 | 105 | 100 |  |  |  | 251 | if ( pos $content ) { | 
| 841 | 2 |  |  |  |  | 6 | $loc->{location}[LOCATION_LINE] += $lines; | 
| 842 | 2 |  |  |  |  | 5 | $loc->{location}[LOCATION_LOGICAL_LINE] += $lines; | 
| 843 |  |  |  |  |  |  | $loc->{location}[LOCATION_CHARACTER] = | 
| 844 | 2 |  |  |  |  | 4 | $loc->{location}[LOCATION_COLUMN] = 1; | 
| 845 |  |  |  |  |  |  | } | 
| 846 |  |  |  |  |  |  |  | 
| 847 | 105 | 100 |  |  |  | 286 | if ( my $chars = length( $content ) - pos( $content ) ) { | 
| 848 | 102 |  |  |  |  | 224 | $loc->{location}[LOCATION_CHARACTER] += $chars; | 
| 849 | 102 | 100 | 100 |  |  | 347 | if ( $loc->{tab_width} > 1 && $content =~ m/ \t /smx ) { | 
| 850 | 5 |  |  |  |  | 14 | my $pos = $loc->{location}[LOCATION_COLUMN]; | 
| 851 | 5 |  |  |  |  | 8 | my $tab_width = $loc->{tab_width}; | 
| 852 |  |  |  |  |  |  | # Stolen shamelessly from PPI::Document::_visual_length | 
| 853 | 5 |  |  |  |  | 8 | my ( $vis_inc ); | 
| 854 | 5 |  |  |  |  | 23 | foreach my $part ( split /(\t)/, $content ) { | 
| 855 | 10 | 100 |  |  |  | 20 | if ($part eq "\t") { | 
| 856 | 5 |  |  |  |  | 10 | $vis_inc = $tab_width - ($pos-1) % $tab_width; | 
| 857 |  |  |  |  |  |  | } else { | 
| 858 | 5 |  |  |  |  | 8 | $vis_inc = length $part; | 
| 859 |  |  |  |  |  |  | } | 
| 860 | 10 |  |  |  |  | 16 | $pos    += $vis_inc; | 
| 861 |  |  |  |  |  |  | } | 
| 862 | 5 |  |  |  |  | 13 | $loc->{location}[LOCATION_COLUMN] = $pos; | 
| 863 |  |  |  |  |  |  | } else { | 
| 864 | 97 |  |  |  |  | 182 | $loc->{location}[LOCATION_COLUMN] += $chars; | 
| 865 |  |  |  |  |  |  | } | 
| 866 |  |  |  |  |  |  | } | 
| 867 |  |  |  |  |  |  |  | 
| 868 |  |  |  |  |  |  | } | 
| 869 | 105 |  |  |  |  | 228 | return; | 
| 870 |  |  |  |  |  |  | } | 
| 871 |  |  |  |  |  |  |  | 
| 872 |  |  |  |  |  |  | sub __PPIX_TOKENIZER__init { | 
| 873 | 534 |  |  | 534 |  | 1326 | my ( $self ) = @_; | 
| 874 |  |  |  |  |  |  |  | 
| 875 | 534 | 50 |  |  |  | 3178 | $self->find_regexp( | 
| 876 |  |  |  |  |  |  | qr{ \A ( \s* ) ( qr | m | s )? ( \s* ) ( . ) }smx ) | 
| 877 |  |  |  |  |  |  | or return $self->__init_error(); | 
| 878 |  |  |  |  |  |  |  | 
| 879 | 534 |  |  |  |  | 2790 | my ( $leading_white, $type, $next_white, $delim_start ) = $self->capture(); | 
| 880 |  |  |  |  |  |  |  | 
| 881 | 534 | 100 |  |  |  | 2067 | defined $type | 
| 882 |  |  |  |  |  |  | or $type = ''; | 
| 883 |  |  |  |  |  |  |  | 
| 884 | 534 | 100 | 100 |  |  | 2743 | $type | 
| 885 |  |  |  |  |  |  | or $delim_start =~ m< \A [/?] \z >smx | 
| 886 |  |  |  |  |  |  | or return $self->__init_error(); | 
| 887 | 528 | 100 | 100 |  |  | 2889 | $type | 
|  |  |  | 100 |  |  |  |  | 
| 888 |  |  |  |  |  |  | and not $next_white | 
| 889 |  |  |  |  |  |  | and $delim_start =~ m< \A \w \z >smx | 
| 890 |  |  |  |  |  |  | and return $self->__init_error(); | 
| 891 |  |  |  |  |  |  |  | 
| 892 | 526 |  |  |  |  | 1522 | $self->{type} = $type; | 
| 893 |  |  |  |  |  |  |  | 
| 894 | 526 |  |  |  |  | 1023 | my @tokens; | 
| 895 |  |  |  |  |  |  |  | 
| 896 | 526 | 100 |  |  |  | 2036 | '' ne $leading_white | 
| 897 |  |  |  |  |  |  | and push @tokens, $self->make_token( length $leading_white, | 
| 898 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Whitespace' ); | 
| 899 | 526 |  |  |  |  | 2221 | push @tokens, $self->make_token( length $type, | 
| 900 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Structure' ); | 
| 901 | 526 | 100 |  |  |  | 1726 | '' ne $next_white | 
| 902 |  |  |  |  |  |  | and push @tokens, $self->make_token( length $next_white, | 
| 903 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Whitespace' ); | 
| 904 |  |  |  |  |  |  |  | 
| 905 | 526 |  |  |  |  | 1386 | $self->{delimiter_start} = $delim_start; | 
| 906 |  |  |  |  |  |  |  | 
| 907 |  |  |  |  |  |  | $self->{trace} | 
| 908 | 526 | 50 |  |  |  | 1512 | and warn "Tokenizer found regexp start delimiter '$delim_start' at $self->{cursor_curr}\n"; | 
| 909 |  |  |  |  |  |  |  | 
| 910 | 526 | 50 |  |  |  | 1848 | if ( my $offset = $self->find_matching_delimiter() ) { | 
| 911 | 526 |  |  |  |  | 1317 | my $cursor_limit = $self->{cursor_curr} + $offset; | 
| 912 |  |  |  |  |  |  | $self->{trace} | 
| 913 | 526 | 50 |  |  |  | 1649 | and warn "Tokenizer found regexp end delimiter at $cursor_limit\n"; | 
| 914 | 526 | 100 |  |  |  | 1909 | if ( $self->__number_of_extra_parts() ) { | 
| 915 |  |  |  |  |  |  | ###	    my $found_embedded_comments; | 
| 916 | 43 | 100 |  |  |  | 208 | if ( $self->close_bracket( | 
| 917 |  |  |  |  |  |  | $self->{delimiter_start} ) ) { | 
| 918 |  |  |  |  |  |  | pos $self->{content} = $self->{cursor_curr} + | 
| 919 | 7 |  |  |  |  | 52 | $offset + 1; | 
| 920 |  |  |  |  |  |  | # If we're bracketed, there may be Perl comments between | 
| 921 |  |  |  |  |  |  | # the regex and the replacement. PPI gets the parse | 
| 922 |  |  |  |  |  |  | # wrong as of 1.220, but if we get the handling of the | 
| 923 |  |  |  |  |  |  | # underlying string right, we will Just Work when PPI | 
| 924 |  |  |  |  |  |  | # gets it right. | 
| 925 | 7 |  |  |  |  | 64 | while ( $self->{content} =~ | 
| 926 |  |  |  |  |  |  | m/ \G \s* \n \s* \# [^\n]* /smxgc ) { | 
| 927 |  |  |  |  |  |  | ##		    $found_embedded_comments = 1; | 
| 928 |  |  |  |  |  |  | } | 
| 929 | 7 |  |  |  |  | 42 | $self->{content} =~ m/ \s* /smxgc; | 
| 930 |  |  |  |  |  |  | } else { | 
| 931 |  |  |  |  |  |  | pos $self->{content} = $self->{cursor_curr} + | 
| 932 | 36 |  |  |  |  | 225 | $offset; | 
| 933 |  |  |  |  |  |  | } | 
| 934 |  |  |  |  |  |  | # Localizing cursor_curr and delimiter_start would be | 
| 935 |  |  |  |  |  |  | # cleaner, but I don't want the old values restored if a | 
| 936 |  |  |  |  |  |  | # parse error occurs. | 
| 937 | 43 |  |  |  |  | 132 | my $cursor_curr = $self->{cursor_curr}; | 
| 938 | 43 |  |  |  |  | 109 | my $delimiter_start = $self->{delimiter_start}; | 
| 939 | 43 |  |  |  |  | 121 | $self->{cursor_curr} = pos $self->{content}; | 
| 940 |  |  |  |  |  |  | $self->{delimiter_start} = substr | 
| 941 |  |  |  |  |  |  | $self->{content}, | 
| 942 |  |  |  |  |  |  | $self->{cursor_curr}, | 
| 943 | 43 |  |  |  |  | 138 | 1; | 
| 944 |  |  |  |  |  |  | $self->{trace} | 
| 945 | 43 | 50 |  |  |  | 131 | and warn "Tokenizer found replacement start delimiter '$self->{delimiter_start}' at $self->{cursor_curr}\n"; | 
| 946 | 43 | 100 |  |  |  | 124 | if ( my $s_off = $self->find_matching_delimiter() ) { | 
| 947 |  |  |  |  |  |  | $self->{cursor_modifiers} = | 
| 948 | 41 |  |  |  |  | 195 | $self->{cursor_curr} + $s_off + 1; | 
| 949 |  |  |  |  |  |  | $self->{trace} | 
| 950 | 41 | 50 |  |  |  | 159 | and warn "Tokenizer found replacement end delimiter at @{[ | 
| 951 | 0 |  |  |  |  | 0 | $self->{cursor_curr} + $s_off ]}\n"; | 
| 952 | 41 |  |  |  |  | 112 | $self->{cursor_curr} = $cursor_curr; | 
| 953 | 41 |  |  |  |  | 117 | $self->{delimiter_start} = $delimiter_start; | 
| 954 |  |  |  |  |  |  | } else { | 
| 955 |  |  |  |  |  |  | $self->{trace} | 
| 956 | 2 | 50 |  |  |  | 6 | and warn 'Tokenizer failed to find replacement', | 
| 957 |  |  |  |  |  |  | "end delimiter starting at $self->{cursor_curr}\n"; | 
| 958 | 2 |  |  |  |  | 5 | $self->{cursor_curr} = 0; | 
| 959 |  |  |  |  |  |  | # TODO If I were smart enough here I could check for | 
| 960 |  |  |  |  |  |  | # PPI mis-parses like s{foo} | 
| 961 |  |  |  |  |  |  | #                     #{bar} | 
| 962 |  |  |  |  |  |  | #                      {baz} | 
| 963 |  |  |  |  |  |  | # here, doing so if $found_embedded_comments (commented | 
| 964 |  |  |  |  |  |  | # out above) is true. The problem is that there seem to | 
| 965 |  |  |  |  |  |  | # as many mis-parses as there are possible delimiters. | 
| 966 | 2 |  |  |  |  | 8 | return $self->__init_error( | 
| 967 |  |  |  |  |  |  | 'Tokenizer found mismatched replacement delimiters', | 
| 968 |  |  |  |  |  |  | ); | 
| 969 |  |  |  |  |  |  | } | 
| 970 |  |  |  |  |  |  | } else { | 
| 971 | 483 |  |  |  |  | 1377 | $self->{cursor_modifiers} = $cursor_limit + 1; | 
| 972 |  |  |  |  |  |  | } | 
| 973 | 524 |  |  |  |  | 1223 | $self->{cursor_limit} = $cursor_limit; | 
| 974 |  |  |  |  |  |  | } else { | 
| 975 | 0 |  |  |  |  | 0 | $self->{cursor_curr} = 0; | 
| 976 |  |  |  |  |  |  | return $self->_make_final_token( | 
| 977 | 0 |  |  |  |  | 0 | length( $self->{content} ), TOKEN_UNKNOWN, { | 
| 978 |  |  |  |  |  |  | error	=> 'Tokenizer found mismatched regexp delimiters', | 
| 979 |  |  |  |  |  |  | }, | 
| 980 |  |  |  |  |  |  | ); | 
| 981 |  |  |  |  |  |  | } | 
| 982 |  |  |  |  |  |  |  | 
| 983 |  |  |  |  |  |  | { | 
| 984 |  |  |  |  |  |  | # We have to instantiate the trailing tokens now so we can | 
| 985 |  |  |  |  |  |  | # figure out what modifiers are in effect. But we can't | 
| 986 |  |  |  |  |  |  | # index their locations (if desired) because they are being | 
| 987 |  |  |  |  |  |  | # instantiated out of order | 
| 988 |  |  |  |  |  |  |  | 
| 989 | 524 |  |  |  |  | 926 | local $self->{index_locations} = 0; | 
|  | 524 |  |  |  |  | 1573 |  | 
| 990 |  |  |  |  |  |  |  | 
| 991 | 524 |  |  |  |  | 833 | my @mods = @{ $self->{default_modifiers} }; | 
|  | 524 |  |  |  |  | 1475 |  | 
| 992 | 524 |  |  |  |  | 2024 | pos $self->{content} = $self->{cursor_modifiers}; | 
| 993 | 524 |  |  |  |  | 1547 | local $self->{cursor_curr} = $self->{cursor_modifiers}; | 
| 994 | 524 |  |  |  |  | 1451 | local $self->{cursor_limit} = length $self->{content}; | 
| 995 | 524 |  |  |  |  | 1002 | my @trailing; | 
| 996 |  |  |  |  |  |  | { | 
| 997 | 524 |  |  |  |  | 788 | my $len = $self->find_regexp( qr{ \A [[:lower:]]* }smx ); | 
|  | 524 |  |  |  |  | 2514 |  | 
| 998 | 524 |  |  |  |  | 2881 | push @trailing, $self->make_token( $len, | 
| 999 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Modifier' ); | 
| 1000 |  |  |  |  |  |  | } | 
| 1001 | 524 | 100 |  |  |  | 2921 | if ( my $len = $self->find_regexp( qr{ \A \s+ }smx ) ) { | 
| 1002 | 1 |  |  |  |  | 14 | push @trailing, $self->make_token( $len, | 
| 1003 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Whitespace' ); | 
| 1004 |  |  |  |  |  |  | } | 
| 1005 | 524 | 100 |  |  |  | 2627 | if ( my $len = $self->find_regexp( qr{ \A .+ }smx ) ) { | 
| 1006 | 1 |  |  |  |  | 20 | push @trailing, $self->make_token( $len, TOKEN_UNKNOWN, { | 
| 1007 |  |  |  |  |  |  | error	=> 'Trailing characters after expression', | 
| 1008 |  |  |  |  |  |  | } ); | 
| 1009 |  |  |  |  |  |  | } | 
| 1010 | 524 |  |  |  |  | 1778 | $self->{trailing_tokens} = \@trailing; | 
| 1011 | 524 |  |  |  |  | 2111 | push @mods, $trailing[0]->content(); | 
| 1012 |  |  |  |  |  |  | $self->{effective_modifiers} = | 
| 1013 | 524 |  |  |  |  | 1631 | PPIx::Regexp::Token::Modifier::__aggregate_modifiers ( | 
| 1014 |  |  |  |  |  |  | @mods ); | 
| 1015 |  |  |  |  |  |  | $self->{modifiers} = [ | 
| 1016 | 524 |  |  |  |  | 1284 | { %{ $self->{effective_modifiers} } }, | 
|  | 524 |  |  |  |  | 3331 |  | 
| 1017 |  |  |  |  |  |  | ]; | 
| 1018 |  |  |  |  |  |  | } | 
| 1019 |  |  |  |  |  |  |  | 
| 1020 |  |  |  |  |  |  | $self->{delimiter_finish} = substr | 
| 1021 |  |  |  |  |  |  | $self->{content}, | 
| 1022 |  |  |  |  |  |  | $self->{cursor_limit}, | 
| 1023 | 524 |  |  |  |  | 2139 | 1; | 
| 1024 |  |  |  |  |  |  |  | 
| 1025 | 524 |  |  |  |  | 1579 | push @tokens, $self->make_token( 1, | 
| 1026 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Delimiter' ); | 
| 1027 |  |  |  |  |  |  |  | 
| 1028 | 524 |  |  |  |  | 2904 | $self->_set_mode( 'regexp' ); | 
| 1029 |  |  |  |  |  |  |  | 
| 1030 | 524 |  |  |  |  | 993 | $self->{find} = undef; | 
| 1031 |  |  |  |  |  |  |  | 
| 1032 | 524 |  |  |  |  | 2723 | return @tokens; | 
| 1033 |  |  |  |  |  |  | } | 
| 1034 |  |  |  |  |  |  |  | 
| 1035 |  |  |  |  |  |  | # Match the initial part of the regexp including any leading white | 
| 1036 |  |  |  |  |  |  | # space. The initial delimiter is the first thing not consumed, though | 
| 1037 |  |  |  |  |  |  | # we check it for sanity. | 
| 1038 |  |  |  |  |  |  | sub __initial_match { | 
| 1039 | 0 |  |  | 0 |  | 0 | my ( $self ) = @_; | 
| 1040 |  |  |  |  |  |  |  | 
| 1041 | 0 | 0 |  |  |  | 0 | $self->find_regexp( | 
| 1042 |  |  |  |  |  |  | qr{ \A ( \s* ) ( qr | m | s )? ( \s* ) (?: [^\w\s] ) }smx ) | 
| 1043 |  |  |  |  |  |  | or return; | 
| 1044 |  |  |  |  |  |  |  | 
| 1045 | 0 |  |  |  |  | 0 | my ( $leading_white, $type, $next_white ) = $self->capture(); | 
| 1046 |  |  |  |  |  |  |  | 
| 1047 | 0 | 0 |  |  |  | 0 | defined $type | 
| 1048 |  |  |  |  |  |  | or $type = ''; | 
| 1049 |  |  |  |  |  |  |  | 
| 1050 | 0 |  |  |  |  | 0 | $self->{type} = $type; | 
| 1051 |  |  |  |  |  |  |  | 
| 1052 | 0 |  |  |  |  | 0 | my @tokens; | 
| 1053 |  |  |  |  |  |  |  | 
| 1054 | 0 | 0 |  |  |  | 0 | '' ne $leading_white | 
| 1055 |  |  |  |  |  |  | and push @tokens, $self->make_token( length $leading_white, | 
| 1056 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Whitespace' ); | 
| 1057 | 0 |  |  |  |  | 0 | push @tokens, $self->make_token( length $type, | 
| 1058 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Structure' ); | 
| 1059 | 0 | 0 |  |  |  | 0 | '' ne $next_white | 
| 1060 |  |  |  |  |  |  | and push @tokens, $self->make_token( length $next_white, | 
| 1061 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Whitespace' ); | 
| 1062 |  |  |  |  |  |  |  | 
| 1063 | 0 |  |  |  |  | 0 | return @tokens; | 
| 1064 |  |  |  |  |  |  | } | 
| 1065 |  |  |  |  |  |  |  | 
| 1066 |  |  |  |  |  |  | { | 
| 1067 |  |  |  |  |  |  | my %extra_parts = ( | 
| 1068 |  |  |  |  |  |  | s	=> 1, | 
| 1069 |  |  |  |  |  |  | ); | 
| 1070 |  |  |  |  |  |  |  | 
| 1071 |  |  |  |  |  |  | # Return the number of extra delimited parts. This will be 0 except | 
| 1072 |  |  |  |  |  |  | # for s///, which will be 1. | 
| 1073 |  |  |  |  |  |  | sub __number_of_extra_parts { | 
| 1074 | 850 |  |  | 850 |  | 1767 | my ( $self ) = @_; | 
| 1075 | 850 |  | 100 |  |  | 4110 | return $extra_parts{$self->{type}} || 0; | 
| 1076 |  |  |  |  |  |  | } | 
| 1077 |  |  |  |  |  |  | } | 
| 1078 |  |  |  |  |  |  |  | 
| 1079 |  |  |  |  |  |  | { | 
| 1080 |  |  |  |  |  |  | my @part_class = qw{ | 
| 1081 |  |  |  |  |  |  | PPIx::Regexp::Structure::Regexp | 
| 1082 |  |  |  |  |  |  | PPIx::Regexp::Structure::Replacement | 
| 1083 |  |  |  |  |  |  | }; | 
| 1084 |  |  |  |  |  |  |  | 
| 1085 |  |  |  |  |  |  | # Return the classes for the parts of the expression. | 
| 1086 |  |  |  |  |  |  | sub __part_classes { | 
| 1087 | 324 |  |  | 324 |  | 912 | my ( $self ) = @_; | 
| 1088 | 324 |  |  |  |  | 906 | my $max = $self->__number_of_extra_parts(); | 
| 1089 | 324 |  |  |  |  | 2298 | return @part_class[ 0 .. $max ]; | 
| 1090 |  |  |  |  |  |  | } | 
| 1091 |  |  |  |  |  |  | } | 
| 1092 |  |  |  |  |  |  |  | 
| 1093 |  |  |  |  |  |  | sub __PPIX_TOKENIZER__regexp { | 
| 1094 | 3025 |  |  | 3025 |  | 6757 | my ( $self, $character ) = @_; | 
| 1095 |  |  |  |  |  |  |  | 
| 1096 | 3025 |  |  |  |  | 5303 | my $mode = $self->{mode}; | 
| 1097 | 3025 |  |  |  |  | 5913 | my $handler = '__PPIX_TOKENIZER__' . $mode; | 
| 1098 |  |  |  |  |  |  |  | 
| 1099 | 3025 |  |  |  |  | 5277 | $self->{cursor_orig} = $self->{cursor_curr}; | 
| 1100 | 3025 |  |  |  |  | 6954 | foreach my $class ( $self->_known_tokenizers() ) { | 
| 1101 | 13467 |  |  |  |  | 51955 | my @tokens = grep { $_ } $class->$handler( $self, $character ); | 
|  | 3856 |  |  |  |  | 9960 |  | 
| 1102 |  |  |  |  |  |  | $self->{trace} | 
| 1103 | 13467 | 50 |  |  |  | 27435 | and warn $class, "->$handler( \$self, '$character' )", | 
| 1104 |  |  |  |  |  |  | " => (@tokens)\n"; | 
| 1105 |  |  |  |  |  |  | @tokens | 
| 1106 |  |  |  |  |  |  | and return ( map { | 
| 1107 | 13467 | 100 |  |  |  | 30020 | ref $_ ? $_ : $self->make_token( $_, | 
|  | 3022 | 100 |  |  |  | 11348 |  | 
| 1108 |  |  |  |  |  |  | $class ) } @tokens ); | 
| 1109 |  |  |  |  |  |  | } | 
| 1110 |  |  |  |  |  |  |  | 
| 1111 |  |  |  |  |  |  | # Find a fallback processor for the character. | 
| 1112 | 27 |  | 33 |  |  | 293 | my $fallback = __PACKAGE__->can( '__PPIX_TOKEN_FALLBACK__' . $mode ) | 
| 1113 |  |  |  |  |  |  | || __PACKAGE__->can( '__PPIX_TOKEN_FALLBACK__regexp' ) | 
| 1114 |  |  |  |  |  |  | || confess "Programming error - unable to find fallback for $mode"; | 
| 1115 | 27 |  |  |  |  | 144 | return $fallback->( $self, $character ); | 
| 1116 |  |  |  |  |  |  | } | 
| 1117 |  |  |  |  |  |  |  | 
| 1118 |  |  |  |  |  |  | *__PPIX_TOKENIZER__repl = \&__PPIX_TOKENIZER__regexp; | 
| 1119 |  |  |  |  |  |  |  | 
| 1120 |  |  |  |  |  |  | sub __PPIX_TOKEN_FALLBACK__regexp { | 
| 1121 | 18 |  |  | 18 |  | 71 | my ( $self, $character ) = @_; | 
| 1122 |  |  |  |  |  |  |  | 
| 1123 |  |  |  |  |  |  | # As a fallback in regexp mode, any escaped character is a literal. | 
| 1124 | 18 | 100 | 66 |  |  | 70 | if ( $character eq '\\' | 
| 1125 |  |  |  |  |  |  | && $self->{cursor_limit} - $self->{cursor_curr} > 1 | 
| 1126 |  |  |  |  |  |  | ) { | 
| 1127 | 2 |  |  |  |  | 7 | return $self->make_token( 2, TOKEN_LITERAL ); | 
| 1128 |  |  |  |  |  |  | } | 
| 1129 |  |  |  |  |  |  |  | 
| 1130 |  |  |  |  |  |  | # Any normal character is unknown. | 
| 1131 | 16 |  |  |  |  | 95 | return $self->make_token( 1, TOKEN_UNKNOWN, { | 
| 1132 |  |  |  |  |  |  | error	=> 'Tokenizer found unexpected literal', | 
| 1133 |  |  |  |  |  |  | }, | 
| 1134 |  |  |  |  |  |  | ); | 
| 1135 |  |  |  |  |  |  | } | 
| 1136 |  |  |  |  |  |  |  | 
| 1137 |  |  |  |  |  |  | sub __PPIX_TOKEN_FALLBACK__repl { | 
| 1138 | 9 |  |  | 9 |  | 33 | my ( $self, $character ) = @_; | 
| 1139 |  |  |  |  |  |  |  | 
| 1140 |  |  |  |  |  |  | # As a fallback in replacement mode, any escaped character is a literal. | 
| 1141 | 9 | 100 | 66 |  |  | 40 | if ( $character eq '\\' | 
| 1142 |  |  |  |  |  |  | && defined ( my $next = $self->peek( 1 ) ) ) { | 
| 1143 |  |  |  |  |  |  |  | 
| 1144 | 5 | 0 | 33 |  |  | 19 | if ( $self->interpolates() || $next eq q<'> || $next eq '\\' ) { | 
|  |  |  | 33 |  |  |  |  | 
| 1145 | 5 |  |  |  |  | 20 | return $self->make_token( 2, TOKEN_LITERAL ); | 
| 1146 |  |  |  |  |  |  | } | 
| 1147 | 0 |  |  |  |  | 0 | return $self->make_token( 1, TOKEN_LITERAL ); | 
| 1148 |  |  |  |  |  |  | } | 
| 1149 |  |  |  |  |  |  |  | 
| 1150 |  |  |  |  |  |  | # So is any normal character. | 
| 1151 | 4 |  |  |  |  | 33 | return $self->make_token( 1, TOKEN_LITERAL ); | 
| 1152 |  |  |  |  |  |  | } | 
| 1153 |  |  |  |  |  |  |  | 
| 1154 |  |  |  |  |  |  | sub __PPIX_TOKENIZER__finish { | 
| 1155 | 555 |  |  | 555 |  | 1827 | my ( $self ) = @_;		# $character unused | 
| 1156 |  |  |  |  |  |  |  | 
| 1157 |  |  |  |  |  |  | $self->{cursor_limit} > length $self->{content} | 
| 1158 | 555 | 50 |  |  |  | 2480 | and confess "Programming error - ran off string"; | 
| 1159 |  |  |  |  |  |  |  | 
| 1160 |  |  |  |  |  |  | my @tokens = $self->make_token( length $self->{delimiter_finish}, | 
| 1161 | 555 |  |  |  |  | 2123 | 'PPIx::Regexp::Token::Delimiter' ); | 
| 1162 |  |  |  |  |  |  |  | 
| 1163 | 555 | 100 |  |  |  | 2860 | if ( $self->{cursor_curr} == $self->{cursor_modifiers} ) { | 
| 1164 |  |  |  |  |  |  |  | 
| 1165 |  |  |  |  |  |  | # We are out of string. Add the trailing tokens (created when we | 
| 1166 |  |  |  |  |  |  | # did the initial bracket scan) and close up shop. | 
| 1167 |  |  |  |  |  |  |  | 
| 1168 | 514 |  |  |  |  | 1796 | push @tokens, $self->_get_trailing_tokens(); | 
| 1169 |  |  |  |  |  |  |  | 
| 1170 | 514 |  |  |  |  | 1448 | $self->_set_mode( 'kaput' ); | 
| 1171 |  |  |  |  |  |  |  | 
| 1172 |  |  |  |  |  |  | } else { | 
| 1173 |  |  |  |  |  |  |  | 
| 1174 |  |  |  |  |  |  | # Clear the cookies, because we are going around again. | 
| 1175 | 41 |  |  |  |  | 219 | $self->{cookie} = {}; | 
| 1176 |  |  |  |  |  |  |  | 
| 1177 |  |  |  |  |  |  | # Move the cursor limit to just before the modifiers. | 
| 1178 | 41 |  |  |  |  | 144 | $self->{cursor_limit} = $self->{cursor_modifiers} - 1; | 
| 1179 |  |  |  |  |  |  |  | 
| 1180 |  |  |  |  |  |  | # If the preceding regular expression was bracketed, we need to | 
| 1181 |  |  |  |  |  |  | # consume possible whitespace and find another delimiter. | 
| 1182 |  |  |  |  |  |  |  | 
| 1183 | 41 | 100 |  |  |  | 220 | if ( $self->close_bracket( $self->{delimiter_start} ) ) { | 
| 1184 | 7 |  |  |  |  | 37 | my $accept; | 
| 1185 |  |  |  |  |  |  | # If we are bracketed, there can be honest-to-God Perl | 
| 1186 |  |  |  |  |  |  | # comments between the regexp and the replacement, not just | 
| 1187 |  |  |  |  |  |  | # regexp comments. As of version 1.220, PPI does not get | 
| 1188 |  |  |  |  |  |  | # this parse right, but if we can handle this is a string, | 
| 1189 |  |  |  |  |  |  | # then we will Just Work when PPI gets itself straight. | 
| 1190 | 7 |  |  |  |  | 63 | while ( $self->find_regexp( | 
| 1191 |  |  |  |  |  |  | qr{ \A ( \s* \n \s* ) ( \# [^\n]* \n ) }smx ) ) { | 
| 1192 | 2 |  |  |  |  | 9 | my ( $white_space, $comment ) = $self->capture(); | 
| 1193 | 2 |  |  |  |  | 11 | push @tokens, $self->make_token( | 
| 1194 |  |  |  |  |  |  | length $white_space, | 
| 1195 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Whitespace', | 
| 1196 |  |  |  |  |  |  | ), $self->make_token( | 
| 1197 |  |  |  |  |  |  | length $comment, | 
| 1198 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Comment', | 
| 1199 |  |  |  |  |  |  | ); | 
| 1200 |  |  |  |  |  |  | } | 
| 1201 | 7 | 100 |  |  |  | 71 | $accept = $self->find_regexp( qr{ \A \s+ }smx ) | 
| 1202 |  |  |  |  |  |  | and push @tokens, $self->make_token( | 
| 1203 |  |  |  |  |  |  | $accept, 'PPIx::Regexp::Token::Whitespace' ); | 
| 1204 | 7 |  |  |  |  | 56 | my $character = $self->peek(); | 
| 1205 | 7 |  |  |  |  | 34 | $self->{delimiter_start} = $character; | 
| 1206 | 7 |  |  |  |  | 43 | push @tokens, $self->make_token( | 
| 1207 |  |  |  |  |  |  | 1, 'PPIx::Regexp::Token::Delimiter' ); | 
| 1208 |  |  |  |  |  |  | $self->{delimiter_finish} = substr | 
| 1209 |  |  |  |  |  |  | $self->{content}, | 
| 1210 | 7 |  |  |  |  | 72 | $self->{cursor_limit} - 1, | 
| 1211 |  |  |  |  |  |  | 1; | 
| 1212 |  |  |  |  |  |  | } | 
| 1213 |  |  |  |  |  |  |  | 
| 1214 | 41 | 100 |  |  |  | 172 | if ( $self->modifier( 'e*' ) ) { | 
| 1215 |  |  |  |  |  |  | # With /e or /ee, the replacement portion is code. We make | 
| 1216 |  |  |  |  |  |  | # it all into one big PPIx::Regexp::Token::Code, slap on the | 
| 1217 |  |  |  |  |  |  | # trailing delimiter and modifiers, and return it all. | 
| 1218 |  |  |  |  |  |  | push @tokens, $self->make_token( | 
| 1219 |  |  |  |  |  |  | $self->{cursor_limit} - $self->{cursor_curr}, | 
| 1220 | 10 |  |  |  |  | 97 | 'PPIx::Regexp::Token::Code', | 
| 1221 |  |  |  |  |  |  | { perl_version_introduced => MINIMUM_PERL }, | 
| 1222 |  |  |  |  |  |  | ); | 
| 1223 | 10 |  |  |  |  | 57 | $self->{cursor_limit} = length $self->{content}; | 
| 1224 | 10 |  |  |  |  | 41 | push @tokens, $self->make_token( 1, | 
| 1225 |  |  |  |  |  |  | 'PPIx::Regexp::Token::Delimiter' ), | 
| 1226 |  |  |  |  |  |  | $self->_get_trailing_tokens(); | 
| 1227 | 10 |  |  |  |  | 50 | $self->_set_mode( 'kaput' ); | 
| 1228 |  |  |  |  |  |  | } else { | 
| 1229 |  |  |  |  |  |  | # Put our mode to replacement. | 
| 1230 | 31 |  |  |  |  | 216 | $self->_set_mode( 'repl' ); | 
| 1231 |  |  |  |  |  |  | } | 
| 1232 |  |  |  |  |  |  |  | 
| 1233 |  |  |  |  |  |  | } | 
| 1234 |  |  |  |  |  |  |  | 
| 1235 | 555 |  |  |  |  | 2021 | return @tokens; | 
| 1236 |  |  |  |  |  |  |  | 
| 1237 |  |  |  |  |  |  | } | 
| 1238 |  |  |  |  |  |  |  | 
| 1239 |  |  |  |  |  |  | # To common processing on trailing tokens. | 
| 1240 |  |  |  |  |  |  | sub _get_trailing_tokens { | 
| 1241 | 524 |  |  | 524 |  | 1436 | my ( $self ) = @_; | 
| 1242 | 524 | 100 |  |  |  | 1482 | if ( $self->{index_locations} ) { | 
| 1243 |  |  |  |  |  |  | # We turned off index_locations when these were created, because | 
| 1244 |  |  |  |  |  |  | # they were done out of order. Fix that now. | 
| 1245 | 11 |  |  |  |  | 28 | foreach my $token ( @{ $self->{trailing_tokens} } ) { | 
|  | 11 |  |  |  |  | 43 |  | 
| 1246 | 11 |  |  |  |  | 31 | $self->_update_location( $token ); | 
| 1247 |  |  |  |  |  |  | } | 
| 1248 |  |  |  |  |  |  | } | 
| 1249 | 524 |  |  |  |  | 919 | return @{ delete $self->{trailing_tokens} }; | 
|  | 524 |  |  |  |  | 1892 |  | 
| 1250 |  |  |  |  |  |  | } | 
| 1251 |  |  |  |  |  |  |  | 
| 1252 |  |  |  |  |  |  | 1; | 
| 1253 |  |  |  |  |  |  |  | 
| 1254 |  |  |  |  |  |  | __END__ |