| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Perl::Tokenizer; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 5 |  |  | 5 |  | 347076 | use utf8; | 
|  | 5 |  |  |  |  | 123 |  | 
|  | 5 |  |  |  |  | 28 |  | 
| 4 | 5 |  |  | 5 |  | 244 | use 5.018; | 
|  | 5 |  |  |  |  | 18 |  | 
| 5 | 5 |  |  | 5 |  | 30 | use strict; | 
|  | 5 |  |  |  |  | 10 |  | 
|  | 5 |  |  |  |  | 103 |  | 
| 6 | 5 |  |  | 5 |  | 23 | use warnings; | 
|  | 5 |  |  |  |  | 11 |  | 
|  | 5 |  |  |  |  | 27903 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | require Exporter; | 
| 9 |  |  |  |  |  |  | our @ISA    = qw(Exporter); | 
| 10 |  |  |  |  |  |  | our @EXPORT = qw(perl_tokens); | 
| 11 |  |  |  |  |  |  |  | 
| 12 |  |  |  |  |  |  | our $VERSION = '0.10'; | 
| 13 |  |  |  |  |  |  |  | 
| 14 |  |  |  |  |  |  | =encoding utf8 | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | =head1 NAME | 
| 17 |  |  |  |  |  |  |  | 
| 18 |  |  |  |  |  |  | Perl::Tokenizer - A tiny Perl code tokenizer. | 
| 19 |  |  |  |  |  |  |  | 
| 20 |  |  |  |  |  |  | =head1 VERSION | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | Version 0.10 | 
| 23 |  |  |  |  |  |  |  | 
| 24 |  |  |  |  |  |  | =cut | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | my $make_esc_delim = sub { | 
| 27 |  |  |  |  |  |  | if ($_[0] eq '\\') { | 
| 28 |  |  |  |  |  |  | return qr{\\(.*?)\\}s; | 
| 29 |  |  |  |  |  |  | } | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | my $delim = quotemeta shift; | 
| 32 |  |  |  |  |  |  | qr{$delim([^$delim\\]*+(?>\\.|[^$delim\\]+)*+)$delim}s; | 
| 33 |  |  |  |  |  |  | }; | 
| 34 |  |  |  |  |  |  |  | 
| 35 |  |  |  |  |  |  | my $make_end_delim = sub { | 
| 36 |  |  |  |  |  |  | if ($_[0] eq '\\') { | 
| 37 |  |  |  |  |  |  | return qr{.*?\\}s; | 
| 38 |  |  |  |  |  |  | } | 
| 39 |  |  |  |  |  |  |  | 
| 40 |  |  |  |  |  |  | my $delim = quotemeta shift; | 
| 41 |  |  |  |  |  |  | qr{[^$delim\\]*+(?>\\.|[^$delim\\]+)*+$delim}s; | 
| 42 |  |  |  |  |  |  | }; | 
| 43 |  |  |  |  |  |  |  | 
| 44 |  |  |  |  |  |  | my %bdelims; | 
| 45 |  |  |  |  |  |  | foreach my $d ([qw~< >~], [qw~( )~], [qw~{ }~], [qw~[ ]~]) { | 
| 46 |  |  |  |  |  |  | my @ed = map { quotemeta } @{$d}; | 
| 47 |  |  |  |  |  |  |  | 
| 48 |  |  |  |  |  |  | $bdelims{$d->[0]} = qr{ | 
| 49 |  |  |  |  |  |  | $ed[0] | 
| 50 |  |  |  |  |  |  | (?> | 
| 51 |  |  |  |  |  |  | [^$ed[0]$ed[1]\\]+ | 
| 52 |  |  |  |  |  |  | | | 
| 53 |  |  |  |  |  |  | \\. | 
| 54 |  |  |  |  |  |  | | | 
| 55 |  |  |  |  |  |  | (??{$bdelims{$d->[0]}}) | 
| 56 |  |  |  |  |  |  | )* | 
| 57 |  |  |  |  |  |  | $ed[1] | 
| 58 |  |  |  |  |  |  | }xs; | 
| 59 |  |  |  |  |  |  | } | 
| 60 |  |  |  |  |  |  |  | 
| 61 |  |  |  |  |  |  | # string - single quote | 
| 62 |  |  |  |  |  |  | my $str_sq = $make_esc_delim->(q{'}); | 
| 63 |  |  |  |  |  |  |  | 
| 64 |  |  |  |  |  |  | # string - double quote | 
| 65 |  |  |  |  |  |  | my $str_dq = $make_esc_delim->(q{"}); | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | # backtick - backquote | 
| 68 |  |  |  |  |  |  | my $str_bq = $make_esc_delim->(q{`}); | 
| 69 |  |  |  |  |  |  |  | 
| 70 |  |  |  |  |  |  | # regex - // | 
| 71 |  |  |  |  |  |  | my $match_re = $make_esc_delim->(q{/}); | 
| 72 |  |  |  |  |  |  |  | 
| 73 |  |  |  |  |  |  | # glob/readline | 
| 74 |  |  |  |  |  |  | my $glob = $bdelims{'<'}; | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | # Cache regular expressions that are generated dynamically | 
| 77 |  |  |  |  |  |  | my %cache_esc; | 
| 78 |  |  |  |  |  |  | my %cache_end; | 
| 79 |  |  |  |  |  |  |  | 
| 80 |  |  |  |  |  |  | # Double pairs | 
| 81 |  |  |  |  |  |  | my $dpairs = qr{ | 
| 82 |  |  |  |  |  |  | (?= | 
| 83 |  |  |  |  |  |  | (?(?<=\s) | 
| 84 |  |  |  |  |  |  | (.) | 
| 85 |  |  |  |  |  |  | | | 
| 86 |  |  |  |  |  |  | (\W) | 
| 87 |  |  |  |  |  |  | ) | 
| 88 |  |  |  |  |  |  | ) | 
| 89 |  |  |  |  |  |  | (??{$bdelims{$+} // ($cache_esc{$+} //= $make_esc_delim->($+))}) | 
| 90 |  |  |  |  |  |  | }x; | 
| 91 |  |  |  |  |  |  |  | 
| 92 |  |  |  |  |  |  | # Double pairs -- comments | 
| 93 |  |  |  |  |  |  | my $dcomm = qr{ | 
| 94 |  |  |  |  |  |  | \s* (?>(?<=\s)\# (?-s:.*) \s*)* | 
| 95 |  |  |  |  |  |  | }x; | 
| 96 |  |  |  |  |  |  |  | 
| 97 |  |  |  |  |  |  | # Quote-like balanced (q{}, m//) | 
| 98 |  |  |  |  |  |  | my $make_single_q_balanced = sub { | 
| 99 |  |  |  |  |  |  | my $name = shift; | 
| 100 |  |  |  |  |  |  | qr{ | 
| 101 |  |  |  |  |  |  | $name | 
| 102 |  |  |  |  |  |  | $dcomm | 
| 103 |  |  |  |  |  |  | $dpairs | 
| 104 |  |  |  |  |  |  | }x; | 
| 105 |  |  |  |  |  |  | }; | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | # Quote-like balanced (q{}, m//) | 
| 108 |  |  |  |  |  |  | my %single_q; | 
| 109 |  |  |  |  |  |  | foreach my $name (qw(q qq qr qw qx m)) { | 
| 110 |  |  |  |  |  |  | $single_q{$name} = $make_single_q_balanced->($name); | 
| 111 |  |  |  |  |  |  | } | 
| 112 |  |  |  |  |  |  |  | 
| 113 |  |  |  |  |  |  | # First of balanced pairs | 
| 114 |  |  |  |  |  |  | my $bbpair = qr~[<\[\{\(]~; | 
| 115 |  |  |  |  |  |  |  | 
| 116 |  |  |  |  |  |  | my $make_double_q_balanced = sub { | 
| 117 |  |  |  |  |  |  | my $name = shift; | 
| 118 |  |  |  |  |  |  | qr{ | 
| 119 |  |  |  |  |  |  | $name | 
| 120 |  |  |  |  |  |  | $dcomm | 
| 121 |  |  |  |  |  |  |  | 
| 122 |  |  |  |  |  |  | (?(?=$bbpair)                    # balanced pairs (e.g.: s{}//) | 
| 123 |  |  |  |  |  |  | $dpairs | 
| 124 |  |  |  |  |  |  | $dcomm | 
| 125 |  |  |  |  |  |  | $dpairs | 
| 126 |  |  |  |  |  |  | |                     # or: single delims (e.g.: s///) | 
| 127 |  |  |  |  |  |  | $dpairs | 
| 128 |  |  |  |  |  |  | (??{$cache_end{$+} //= $make_end_delim->($+)}) | 
| 129 |  |  |  |  |  |  | ) | 
| 130 |  |  |  |  |  |  | }x; | 
| 131 |  |  |  |  |  |  | }; | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | # Double quote-like balanced (s{}{}, s///) | 
| 134 |  |  |  |  |  |  | my %double_q; | 
| 135 |  |  |  |  |  |  | foreach my $name (qw(tr s y)) { | 
| 136 |  |  |  |  |  |  | $double_q{$name} = $make_double_q_balanced->($name); | 
| 137 |  |  |  |  |  |  | } | 
| 138 |  |  |  |  |  |  |  | 
| 139 |  |  |  |  |  |  | my $number     = qr{(?=\.?[0-9])[0-9_]*(?:\.(?!\.)[0-9_]*)?(?:[Ee](?:[+-]?[0-9_]+))?}; | 
| 140 |  |  |  |  |  |  | my $hex_num    = qr{0x[_0-9A-Fa-f]*}; | 
| 141 |  |  |  |  |  |  | my $binary_num = qr{0b[_01]*}; | 
| 142 |  |  |  |  |  |  |  | 
| 143 |  |  |  |  |  |  | my $var_name = qr{(?>\w+|(?>::)+|'(?=\w))++}; | 
| 144 |  |  |  |  |  |  | my $vstring  = qr{\b(?:v[0-9]+(?>\.[0-9][0-9_]*+)*+ | [0-9][0-9_]*(?>\.[0-9][0-9_]*){2,})\b}x; | 
| 145 |  |  |  |  |  |  |  | 
| 146 |  |  |  |  |  |  | # HERE-DOC beginning | 
| 147 |  |  |  |  |  |  | my $bhdoc = qr{ | 
| 148 |  |  |  |  |  |  | <<(?>\h*(?>$str_sq|$str_dq)|\\?+(\w+)) | 
| 149 |  |  |  |  |  |  | }x; | 
| 150 |  |  |  |  |  |  |  | 
| 151 |  |  |  |  |  |  | my $tr_flags             = qr{[rcds]*}; | 
| 152 |  |  |  |  |  |  | my $match_flags          = qr{[mnsixpogcdual]*}; | 
| 153 |  |  |  |  |  |  | my $substitution_flags   = qr{[mnsixpogcerdual]*}; | 
| 154 |  |  |  |  |  |  | my $compiled_regex_flags = qr{[mnsixpodual]*}; | 
| 155 |  |  |  |  |  |  |  | 
| 156 |  |  |  |  |  |  | my @postfix_operators    = qw( ++ -- ); | 
| 157 |  |  |  |  |  |  | my @prec_operators       = qw ( ... .. -> ++ -- =~ <=> \\ ? ~~ ~. ~ : >> >= > << <= < == != ! ); | 
| 158 |  |  |  |  |  |  | my @assignment_operators = qw( && || // ** % ^. ^ &. & |. | * + - = / . << >> ); | 
| 159 |  |  |  |  |  |  |  | 
| 160 |  |  |  |  |  |  | my $operators = do { | 
| 161 |  |  |  |  |  |  | local $" = '|'; | 
| 162 |  |  |  |  |  |  | qr{@{[map{quotemeta} @prec_operators, @assignment_operators]}}; | 
| 163 |  |  |  |  |  |  | }; | 
| 164 |  |  |  |  |  |  |  | 
| 165 |  |  |  |  |  |  | my $postfix_operators = do { | 
| 166 |  |  |  |  |  |  | local $" = '|'; | 
| 167 |  |  |  |  |  |  | qr{@{[map{quotemeta} @postfix_operators]}}; | 
| 168 |  |  |  |  |  |  | }; | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | my $assignment_operators = do { | 
| 171 |  |  |  |  |  |  | local $" = '|'; | 
| 172 |  |  |  |  |  |  | qr{@{[map{($_ eq '=') ? '=(?!=)' : "\Q$_=\E"} @assignment_operators]}}; | 
| 173 |  |  |  |  |  |  | }; | 
| 174 |  |  |  |  |  |  |  | 
| 175 |  |  |  |  |  |  | my @special_var_names = (qw( \\ | + / ~ ! @ $ % ^ & * ( ) } < > : ; " ` ' ? = - [ ] . ), '#', ','); | 
| 176 |  |  |  |  |  |  | my $special_var_names = do { | 
| 177 |  |  |  |  |  |  | local $" = '|'; | 
| 178 |  |  |  |  |  |  | qr{@{[map {quotemeta} @special_var_names]}}; | 
| 179 |  |  |  |  |  |  | }; | 
| 180 |  |  |  |  |  |  |  | 
| 181 |  |  |  |  |  |  | my $bracket_var = qr~(?=\s*\{)(?!\s*\{\s*(?:\^?$var_name|$special_var_names|\{)\s*\})~; | 
| 182 |  |  |  |  |  |  |  | 
| 183 |  |  |  |  |  |  | #<<< | 
| 184 |  |  |  |  |  |  | my $perl_keywords = | 
| 185 |  |  |  |  |  |  | qr/(?:CORE::)?(?>(a(?:(?:ccept|larm|tan2|bs|nd))|b(?:in(?:(?:mode|d))|less|reak)|c(?:aller|h(?:dir|mod | 
| 186 |  |  |  |  |  |  | |o(?:(?:m?p|wn))|r(?:oot)?)|lose(?:dir)?|mp|o(?:n(?:(?:tinue|nect))|s)|rypt)|d(?:bm(?:(?:close|open))| | 
| 187 |  |  |  |  |  |  | e(?:f(?:(?:ault|ined))|lete)|ie|ump|o)|e(?:ach|ls(?:(?:if|e))|nd(?:grent|hostent|netent|p(?:(?:roto|w) | 
| 188 |  |  |  |  |  |  | ent)|servent)|of|val|x(?:ec|i(?:(?:sts|t))|p)|q)|f(?:c(?:ntl)?|ileno|lock|or(?:(?:each|m(?:(?:line|at) | 
| 189 |  |  |  |  |  |  | )|k))?)|g(?:e(?:t(?:gr(?:(?:ent|gid|nam))|host(?:by(?:(?:addr|name))|ent)|login|net(?:by(?:(?:addr|nam | 
| 190 |  |  |  |  |  |  | e))|ent)|p(?:eername|grp|pid|r(?:iority|oto(?:byn(?:(?:umber|ame))|ent))|w(?:(?:ent|nam|uid)))|s(?:erv | 
| 191 |  |  |  |  |  |  | (?:by(?:(?:name|port))|ent)|ock(?:(?:name|opt)))|c))?|iven|lob|mtime|oto|rep|t)|hex|i(?:mport|n(?:(?:d | 
| 192 |  |  |  |  |  |  | ex|t))|octl|sa|f)|join|k(?:(?:eys|ill))|l(?:ast|c(?:first)?|e(?:ngth)?|i(?:(?:sten|nk))|o(?:c(?:al(?:t | 
| 193 |  |  |  |  |  |  | ime)?|k)|g)|stat|t)|m(?:ap|kdir|sg(?:(?:ctl|get|rcv|snd))|y)|n(?:e(?:xt)?|ot?)|o(?:ct|pen(?:dir)?|rd?| | 
| 194 |  |  |  |  |  |  | ur)|p(?:ack(?:age)?|ipe|o[ps]|r(?:(?:ototype|intf?))|ush)|quotemeta|r(?:and|e(?:ad(?:(?:(?:lin[ek]|pip | 
| 195 |  |  |  |  |  |  | e|dir)))?|cv|do|name|quire|set|turn|verse|winddir|f)|index|mdir)|s(?:ay|calar|e(?:ek(?:dir)?|lect|m(?: | 
| 196 |  |  |  |  |  |  | (?:ctl|get|op))|nd|t(?:grent|hostent|netent|p(?:grp|r(?:(?:iority|otoent))|went)|s(?:(?:erven|ockop)t) | 
| 197 |  |  |  |  |  |  | ))|h(?:ift|m(?:(?:write|read|ctl|get))|utdown)|in|leep|o(?:cket(?:pair)?|rt)|p(?:li(?:(?:ce|t))|rintf) | 
| 198 |  |  |  |  |  |  | |qrt|rand|t(?:(?:ate?|udy))|ub(?:str)?|y(?:mlink|s(?:(?:write|call|open|read|seek|tem))))|t(?:ell(?:di | 
| 199 |  |  |  |  |  |  | r)?|i(?:(?:mes?|ed?))|runcate)|u(?:c(?:first)?|mask|n(?:def|l(?:(?:ess|ink))|pack|shift|ti[el])|se|tim | 
| 200 |  |  |  |  |  |  | e)|v(?:(?:alues|ec))|w(?:a(?:it(?:pid)?|ntarray|rn)|h(?:(?:ile|en))|rite)|xor|BEGIN|END|INIT|CHECK))\b | 
| 201 |  |  |  |  |  |  | /x; | 
| 202 |  |  |  |  |  |  | #>>> | 
| 203 |  |  |  |  |  |  |  | 
| 204 |  |  |  |  |  |  | my $perl_filetests = qr/\-[ABCMORSTWXbcdefgkloprstuwxz]/; | 
| 205 |  |  |  |  |  |  |  | 
| 206 |  |  |  |  |  |  | sub perl_tokens(&$) { | 
| 207 | 66 |  |  | 66 | 1 | 23534 | my ($callback, $code) = @_; | 
| 208 |  |  |  |  |  |  |  | 
| 209 | 66 | 50 |  |  |  | 229 | ref($callback) eq 'CODE' | 
| 210 |  |  |  |  |  |  | or die "usage: perl_tokens {...} \$code;"; | 
| 211 |  |  |  |  |  |  |  | 
| 212 | 66 |  |  |  |  | 104 | my $variable      = 0; | 
| 213 | 66 |  |  |  |  | 101 | my $flat          = 0; | 
| 214 | 66 |  |  |  |  | 93 | my $regex         = 1; | 
| 215 | 66 |  |  |  |  | 98 | my $canpod        = 1; | 
| 216 | 66 |  |  |  |  | 82 | my $proto         = 0; | 
| 217 | 66 |  |  |  |  | 85 | my $format        = 0; | 
| 218 | 66 |  |  |  |  | 95 | my $expect_format = 0; | 
| 219 | 66 |  |  |  |  | 85 | my $postfix_op    = 0; | 
| 220 | 66 |  |  |  |  | 93 | my @heredoc_eofs; | 
| 221 |  |  |  |  |  |  |  | 
| 222 | 66 |  |  |  |  | 138 | $code = "$code"; | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | { | 
| 225 | 66 | 100 | 66 |  |  | 93 | if ($expect_format == 1 and $code =~ /\G(?=\R)/) { | 
|  | 351 |  |  |  |  | 1066 |  | 
| 226 | 2 | 50 |  |  |  | 16 | if ($code =~ /.*?\R\.\h*(?=\R|\z)/cgs) { | 
| 227 | 2 |  |  |  |  | 10 | $callback->('vertical_space', $-[0],     $-[0] + 1); | 
| 228 | 2 |  |  |  |  | 1058 | $callback->('format',         $-[0] + 1, $+[0]); | 
| 229 | 2 |  |  |  |  | 1122 | $expect_format = 0; | 
| 230 | 2 |  |  |  |  | 5 | $canpod        = 1; | 
| 231 | 2 |  |  |  |  | 4 | $regex         = 1; | 
| 232 | 2 |  |  |  |  | 4 | $postfix_op    = 0; | 
| 233 |  |  |  |  |  |  | } | 
| 234 |  |  |  |  |  |  | else { | 
| 235 | 0 | 0 |  |  |  | 0 | if ($code =~ /\G(.)/cgs) { | 
| 236 | 0 |  |  |  |  | 0 | $callback->('unknown_char', $-[0], $+[0]); | 
| 237 | 0 |  |  |  |  | 0 | redo; | 
| 238 |  |  |  |  |  |  | } | 
| 239 |  |  |  |  |  |  | } | 
| 240 | 2 |  |  |  |  | 4 | redo; | 
| 241 |  |  |  |  |  |  | } | 
| 242 |  |  |  |  |  |  |  | 
| 243 | 349 | 100 | 100 |  |  | 914 | if ($#heredoc_eofs >= 0 and $code =~ /\G(?=\R)/) { | 
| 244 | 3 |  |  |  |  | 8 | my $token = shift @heredoc_eofs; | 
| 245 | 3 | 50 |  |  |  | 76 | if ($code =~ m{\G.*?\R\Q$token\E(?=\R|\z)}sgc) { | 
| 246 | 3 |  |  |  |  | 21 | $callback->('vertical_space', $-[0],     $-[0] + 1); | 
| 247 | 3 |  |  |  |  | 1664 | $callback->('heredoc',        $-[0] + 1, $+[0]); | 
| 248 |  |  |  |  |  |  | } | 
| 249 | 3 |  |  |  |  | 1655 | redo; | 
| 250 |  |  |  |  |  |  | } | 
| 251 |  |  |  |  |  |  |  | 
| 252 | 346 | 100 | 100 |  |  | 3616 | if (($regex == 1 or $code =~ /\G(?!<<[0-9])/) and $code =~ m{\G$bhdoc}gc) { | 
|  |  |  | 100 |  |  |  |  | 
| 253 | 3 |  |  |  |  | 15 | $callback->('heredoc_beg', $-[0], $+[0]); | 
| 254 | 3 |  |  |  |  | 1733 | push @heredoc_eofs, $+; | 
| 255 | 3 |  |  |  |  | 8 | $regex  = 0; | 
| 256 | 3 |  |  |  |  | 6 | $canpod = 0; | 
| 257 | 3 |  |  |  |  | 7 | redo; | 
| 258 |  |  |  |  |  |  | } | 
| 259 |  |  |  |  |  |  |  | 
| 260 | 343 | 50 | 66 |  |  | 1057 | if ($canpod == 1 and $code =~ /\G^=[a-zA-Z]/cgm) { | 
| 261 | 0 | 0 |  |  |  | 0 | $code =~ /\G.*?\R=cut\h*(?=\R|\z)/cgs | 
| 262 |  |  |  |  |  |  | or $code =~ /\G.*\z/cgs; | 
| 263 | 0 |  |  |  |  | 0 | $callback->('pod', $-[0] - 2, $+[0]); | 
| 264 | 0 |  |  |  |  | 0 | redo; | 
| 265 |  |  |  |  |  |  | } | 
| 266 |  |  |  |  |  |  |  | 
| 267 | 343 | 100 |  |  |  | 1055 | if ($code =~ /\G(?=\s)/) { | 
| 268 | 40 | 100 |  |  |  | 123 | if ($code =~ /\G\h+/cg) { | 
| 269 | 37 |  |  |  |  | 169 | $callback->('horizontal_space', $-[0], $+[0]); | 
| 270 | 37 |  |  |  |  | 20706 | redo; | 
| 271 |  |  |  |  |  |  | } | 
| 272 |  |  |  |  |  |  |  | 
| 273 | 3 | 50 |  |  |  | 13 | if ($code =~ /\G\v+/cg) { | 
| 274 | 3 |  |  |  |  | 17 | $callback->('vertical_space', $-[0], $+[0]); | 
| 275 | 3 |  |  |  |  | 1643 | redo; | 
| 276 |  |  |  |  |  |  | } | 
| 277 |  |  |  |  |  |  |  | 
| 278 | 0 | 0 |  |  |  | 0 | if ($code =~ /\G\s+/cg) { | 
| 279 | 0 |  |  |  |  | 0 | $callback->('other_space', $-[0], $+[0]); | 
| 280 | 0 |  |  |  |  | 0 | redo; | 
| 281 |  |  |  |  |  |  | } | 
| 282 |  |  |  |  |  |  | } | 
| 283 |  |  |  |  |  |  |  | 
| 284 | 303 | 100 |  |  |  | 659 | if ($variable > 0) { | 
| 285 | 22 | 50 | 33 |  |  | 230 | if ($code =~ m{\G$var_name}gco or $code =~ m{\G(?<=\$)\#$var_name}gco) { | 
| 286 | 22 |  |  |  |  | 134 | $callback->('var_name', $-[0], $+[0]); | 
| 287 | 22 |  |  |  |  | 12314 | $regex    = 0; | 
| 288 | 22 |  |  |  |  | 37 | $variable = 0; | 
| 289 | 22 |  |  |  |  | 28 | $canpod   = 0; | 
| 290 | 22 | 100 |  |  |  | 134 | $flat     = ($code =~ /\G(?=\s*\{)/) ? 1 : 0; | 
| 291 | 22 |  |  |  |  | 52 | redo; | 
| 292 |  |  |  |  |  |  | } | 
| 293 |  |  |  |  |  |  |  | 
| 294 | 0 | 0 | 0 |  |  | 0 | if ( | 
|  |  |  | 0 |  |  |  |  | 
| 295 |  |  |  |  |  |  | $code =~ m{\G(?!\$+$var_name)}o | 
| 296 |  |  |  |  |  |  | and (   $code =~ m~\G(?:\s+|#?)\{\s*(?:$var_name|$special_var_names|[#{])\s*\}~goc | 
| 297 |  |  |  |  |  |  | or $code =~ m{\G(?:\^\w+|#(?!\{)|$special_var_names)}gco | 
| 298 |  |  |  |  |  |  | or $code =~ /\G#/cg) | 
| 299 |  |  |  |  |  |  | ) { | 
| 300 | 0 |  |  |  |  | 0 | $callback->('special_var_name', $-[0], $+[0]); | 
| 301 | 0 |  |  |  |  | 0 | $regex    = 0; | 
| 302 | 0 |  |  |  |  | 0 | $canpod   = 0; | 
| 303 | 0 |  |  |  |  | 0 | $variable = 0; | 
| 304 | 0 | 0 |  |  |  | 0 | $flat     = ($code =~ /\G(? | 
| 305 | 0 |  |  |  |  | 0 | redo; | 
| 306 |  |  |  |  |  |  | } | 
| 307 |  |  |  |  |  |  |  | 
| 308 |  |  |  |  |  |  | # continue | 
| 309 |  |  |  |  |  |  | } | 
| 310 |  |  |  |  |  |  |  | 
| 311 | 281 | 100 |  |  |  | 604 | if ($code =~ /\G#.*/cg) { | 
| 312 | 2 |  |  |  |  | 11 | $callback->('comment', $-[0], $+[0]); | 
| 313 | 2 |  |  |  |  | 1210 | redo; | 
| 314 |  |  |  |  |  |  | } | 
| 315 |  |  |  |  |  |  |  | 
| 316 | 279 | 100 | 100 |  |  | 1112 | if (($regex == 1 and not($postfix_op)) or $code =~ /\G(?=[\@\$])/) { | 
|  |  |  | 100 |  |  |  |  | 
| 317 | 148 | 100 |  |  |  | 383 | if ($code =~ /\G\$/cg) { | 
| 318 | 21 |  |  |  |  | 107 | $callback->('scalar_sigil', $-[0], $+[0]); | 
| 319 | 21 | 50 |  |  |  | 12341 | $code =~ /\G$bracket_var/o or ++$variable; | 
| 320 | 21 |  |  |  |  | 44 | $regex  = 0; | 
| 321 | 21 |  |  |  |  | 33 | $canpod = 0; | 
| 322 | 21 |  |  |  |  | 28 | $flat   = 1; | 
| 323 | 21 |  |  |  |  | 45 | redo; | 
| 324 |  |  |  |  |  |  | } | 
| 325 |  |  |  |  |  |  |  | 
| 326 | 127 | 100 |  |  |  | 278 | if ($code =~ /\G\@/cg) { | 
| 327 | 1 |  |  |  |  | 6 | $callback->('array_sigil', $-[0], $+[0]); | 
| 328 | 1 | 50 |  |  |  | 670 | $code =~ /\G$bracket_var/o or ++$variable; | 
| 329 | 1 |  |  |  |  | 5 | $regex  = 0; | 
| 330 | 1 |  |  |  |  | 3 | $canpod = 0; | 
| 331 | 1 |  |  |  |  | 2 | $flat   = 1; | 
| 332 | 1 |  |  |  |  | 3 | redo; | 
| 333 |  |  |  |  |  |  | } | 
| 334 |  |  |  |  |  |  |  | 
| 335 | 126 | 50 |  |  |  | 263 | if ($code =~ /\G\%/cg) { | 
| 336 | 0 |  |  |  |  | 0 | $callback->('hash_sigil', $-[0], $+[0]); | 
| 337 | 0 | 0 |  |  |  | 0 | $code =~ /\G$bracket_var/o or ++$variable; | 
| 338 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 339 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 340 | 0 |  |  |  |  | 0 | $flat   = 1; | 
| 341 | 0 |  |  |  |  | 0 | redo; | 
| 342 |  |  |  |  |  |  | } | 
| 343 |  |  |  |  |  |  |  | 
| 344 | 126 | 50 |  |  |  | 244 | if ($code =~ /\G\*/cg) { | 
| 345 | 0 |  |  |  |  | 0 | $callback->('glob_sigil', $-[0], $+[0]); | 
| 346 | 0 | 0 |  |  |  | 0 | $code =~ /\G$bracket_var/o or ++$variable; | 
| 347 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 348 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 349 | 0 |  |  |  |  | 0 | $flat   = 1; | 
| 350 | 0 |  |  |  |  | 0 | redo; | 
| 351 |  |  |  |  |  |  | } | 
| 352 |  |  |  |  |  |  |  | 
| 353 | 126 | 50 |  |  |  | 253 | if ($code =~ /\G&/cg) { | 
| 354 | 0 |  |  |  |  | 0 | $callback->('ampersand_sigil', $-[0], $+[0]); | 
| 355 | 0 | 0 |  |  |  | 0 | $code =~ /\G$bracket_var/o or ++$variable; | 
| 356 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 357 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 358 | 0 |  |  |  |  | 0 | $flat   = 1; | 
| 359 | 0 |  |  |  |  | 0 | redo; | 
| 360 |  |  |  |  |  |  | } | 
| 361 |  |  |  |  |  |  |  | 
| 362 |  |  |  |  |  |  | # continue | 
| 363 |  |  |  |  |  |  | } | 
| 364 |  |  |  |  |  |  |  | 
| 365 | 257 | 50 | 33 |  |  | 581 | if ($proto == 1 and $code =~ /\G\(.*?\)/cgs) { | 
| 366 | 0 |  |  |  |  | 0 | $callback->('sub_proto', $-[0], $+[0]); | 
| 367 | 0 |  |  |  |  | 0 | $proto  = 0; | 
| 368 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 369 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 370 | 0 |  |  |  |  | 0 | redo; | 
| 371 |  |  |  |  |  |  | } | 
| 372 |  |  |  |  |  |  |  | 
| 373 | 257 | 100 |  |  |  | 467 | if ($code =~ /\G\(/cg) { | 
| 374 | 2 |  |  |  |  | 12 | $callback->('parenthesis_open', $-[0], $+[0]); | 
| 375 | 2 |  |  |  |  | 1163 | $regex  = 1; | 
| 376 | 2 |  |  |  |  | 5 | $flat   = 0; | 
| 377 | 2 |  |  |  |  | 4 | $canpod = 0; | 
| 378 | 2 |  |  |  |  | 5 | redo; | 
| 379 |  |  |  |  |  |  | } | 
| 380 |  |  |  |  |  |  |  | 
| 381 | 255 | 100 |  |  |  | 487 | if ($code =~ /\G\)/cg) { | 
| 382 | 2 |  |  |  |  | 12 | $callback->('parenthesis_close', $-[0], $+[0]); | 
| 383 | 2 |  |  |  |  | 1080 | $regex  = 0; | 
| 384 | 2 |  |  |  |  | 5 | $canpod = 0; | 
| 385 | 2 |  |  |  |  | 5 | $flat   = 0; | 
| 386 | 2 |  |  |  |  | 4 | redo; | 
| 387 |  |  |  |  |  |  | } | 
| 388 |  |  |  |  |  |  |  | 
| 389 | 253 | 100 |  |  |  | 490 | if ($code =~ /\G\{/cg) { | 
| 390 | 4 |  |  |  |  | 37 | $callback->('curly_bracket_open', $-[0], $+[0]); | 
| 391 | 4 |  |  |  |  | 2211 | $regex = 1; | 
| 392 | 4 |  |  |  |  | 9 | $proto = 0; | 
| 393 | 4 |  |  |  |  | 9 | redo; | 
| 394 |  |  |  |  |  |  | } | 
| 395 |  |  |  |  |  |  |  | 
| 396 | 249 | 100 |  |  |  | 473 | if ($code =~ /\G\}/cg) { | 
| 397 | 4 |  |  |  |  | 18 | $callback->('curly_bracket_close', $-[0], $+[0]); | 
| 398 | 4 |  |  |  |  | 2195 | $flat   = 0; | 
| 399 | 4 |  |  |  |  | 6 | $canpod = 1; | 
| 400 | 4 |  |  |  |  | 10 | redo; | 
| 401 |  |  |  |  |  |  | } | 
| 402 |  |  |  |  |  |  |  | 
| 403 | 245 | 100 |  |  |  | 478 | if ($code =~ /\G\[/cg) { | 
| 404 | 2 |  |  |  |  | 11 | $callback->('right_bracket_open', $-[0], $+[0]); | 
| 405 | 2 |  |  |  |  | 1121 | $regex      = 1; | 
| 406 | 2 |  |  |  |  | 6 | $postfix_op = 0; | 
| 407 | 2 |  |  |  |  | 4 | $flat       = 0; | 
| 408 | 2 |  |  |  |  | 4 | $canpod     = 0; | 
| 409 | 2 |  |  |  |  | 4 | redo; | 
| 410 |  |  |  |  |  |  | } | 
| 411 |  |  |  |  |  |  |  | 
| 412 | 243 | 100 |  |  |  | 450 | if ($code =~ /\G\]/cg) { | 
| 413 | 2 |  |  |  |  | 10 | $callback->('right_bracket_close', $-[0], $+[0]); | 
| 414 | 2 |  |  |  |  | 1095 | $regex  = 0; | 
| 415 | 2 |  |  |  |  | 5 | $canpod = 0; | 
| 416 | 2 |  |  |  |  | 3 | $flat   = 0; | 
| 417 | 2 |  |  |  |  | 4 | redo; | 
| 418 |  |  |  |  |  |  | } | 
| 419 |  |  |  |  |  |  |  | 
| 420 | 241 | 50 |  |  |  | 506 | if ($proto == 0) { | 
| 421 | 241 | 100 | 100 |  |  | 590 | if ($canpod == 1 and $code =~ /\Gformat\b/cg) { | 
| 422 | 2 |  |  |  |  | 14 | $callback->('keyword', $-[0], $+[0]); | 
| 423 | 2 |  |  |  |  | 1157 | $regex  = 0; | 
| 424 | 2 |  |  |  |  | 4 | $canpod = 0; | 
| 425 | 2 |  |  |  |  | 4 | $format = 1; | 
| 426 | 2 |  |  |  |  | 5 | redo; | 
| 427 |  |  |  |  |  |  | } | 
| 428 |  |  |  |  |  |  |  | 
| 429 | 239 | 100 | 66 |  |  | 4338 | if ( | 
|  |  |  | 100 |  |  |  |  | 
| 430 |  |  |  |  |  |  | ( | 
| 431 |  |  |  |  |  |  | $flat == 0 or (    $flat == 1 | 
| 432 |  |  |  |  |  |  | and $code =~ /\G(?!\w+\h*\})/) | 
| 433 |  |  |  |  |  |  | ) | 
| 434 |  |  |  |  |  |  | and $code =~ m{\G(?)$perl_keywords}gco | 
| 435 |  |  |  |  |  |  | ) { | 
| 436 | 5 |  |  |  |  | 21 | my $name         = $1; | 
| 437 | 5 |  |  |  |  | 22 | my @pos          = ($-[0], $+[0]); | 
| 438 | 5 |  |  |  |  | 22 | my $is_bare_word = ($code =~ /\G(?=\h*=>)/); | 
| 439 | 5 | 50 |  |  |  | 26 | $callback->(($is_bare_word ? 'bare_word' : 'keyword'), @pos); | 
| 440 |  |  |  |  |  |  |  | 
| 441 | 5 | 50 | 33 |  |  | 2913 | if ($name eq 'sub' and not $is_bare_word) { | 
| 442 | 0 |  |  |  |  | 0 | $proto = 1; | 
| 443 | 0 |  |  |  |  | 0 | $regex = 0; | 
| 444 |  |  |  |  |  |  | } | 
| 445 |  |  |  |  |  |  | else { | 
| 446 | 5 |  |  |  |  | 11 | $regex      = 1; | 
| 447 | 5 |  |  |  |  | 8 | $postfix_op = 0; | 
| 448 |  |  |  |  |  |  | } | 
| 449 | 5 |  |  |  |  | 9 | $canpod = 0; | 
| 450 | 5 |  |  |  |  | 43 | redo; | 
| 451 |  |  |  |  |  |  | } | 
| 452 |  |  |  |  |  |  |  | 
| 453 |  |  |  |  |  |  | # continue | 
| 454 |  |  |  |  |  |  | } | 
| 455 |  |  |  |  |  |  |  | 
| 456 | 234 | 50 | 33 |  |  | 1148 | if ($code =~ /\G(?!(?>tr|[ysm]|q[rwxq]?)\h*=>)/ and $code =~ /\G(?)/) { | 
| 457 |  |  |  |  |  |  |  | 
| 458 | 234 | 100 | 66 |  |  | 1021 | if (($flat == 1 and $code =~ /\G(?=[a-z]+\h*\})/) or $code =~ /\G((?<=\{)|(?<=\{\h))(?=[a-z]+\h*\})/) { | 
|  |  |  | 100 |  |  |  |  | 
| 459 |  |  |  |  |  |  | ## ok | 
| 460 |  |  |  |  |  |  | } | 
| 461 |  |  |  |  |  |  | else { | 
| 462 |  |  |  |  |  |  |  | 
| 463 | 231 | 100 |  |  |  | 787 | if ($code =~ m{\G $double_q{s} $substitution_flags }gcxo) { | 
| 464 | 4 |  |  |  |  | 20 | $callback->('substitution', $-[0], $+[0]); | 
| 465 | 4 |  |  |  |  | 2114 | $regex  = 0; | 
| 466 | 4 |  |  |  |  | 6 | $canpod = 0; | 
| 467 | 4 |  |  |  |  | 9 | redo; | 
| 468 |  |  |  |  |  |  | } | 
| 469 |  |  |  |  |  |  |  | 
| 470 | 227 | 100 |  |  |  | 1195 | if ($code =~ m{\G (?> $double_q{tr} | $double_q{y} ) $tr_flags }gxco) { | 
| 471 | 2 |  |  |  |  | 12 | $callback->('transliteration', $-[0], $+[0]); | 
| 472 | 2 |  |  |  |  | 1280 | $regex  = 0; | 
| 473 | 2 |  |  |  |  | 4 | $canpod = 0; | 
| 474 | 2 |  |  |  |  | 7 | redo; | 
| 475 |  |  |  |  |  |  | } | 
| 476 |  |  |  |  |  |  |  | 
| 477 | 225 | 100 | 100 |  |  | 1337 | if ($code =~ m{\G $single_q{m} $match_flags }gcxo | 
|  |  |  | 100 |  |  |  |  | 
| 478 |  |  |  |  |  |  | or ($regex == 1 and $code =~ m{\G $match_re $match_flags }gcxo)) { | 
| 479 | 4 |  |  |  |  | 20 | $callback->('match_regex', $-[0], $+[0]); | 
| 480 | 4 |  |  |  |  | 2155 | $regex  = 0; | 
| 481 | 4 |  |  |  |  | 7 | $canpod = 0; | 
| 482 | 4 |  |  |  |  | 11 | redo; | 
| 483 |  |  |  |  |  |  | } | 
| 484 |  |  |  |  |  |  |  | 
| 485 | 221 | 100 |  |  |  | 594 | if ($code =~ m{\G $single_q{qr} $compiled_regex_flags }gcxo) { | 
| 486 | 1 |  |  |  |  | 7 | $callback->('compiled_regex', $-[0], $+[0]); | 
| 487 | 1 |  |  |  |  | 619 | $regex  = 0; | 
| 488 | 1 |  |  |  |  | 2 | $canpod = 0; | 
| 489 | 1 |  |  |  |  | 5 | redo; | 
| 490 |  |  |  |  |  |  | } | 
| 491 |  |  |  |  |  |  |  | 
| 492 | 220 | 50 |  |  |  | 505 | if ($code =~ m{\G$single_q{q}}gco) { | 
| 493 | 0 |  |  |  |  | 0 | $callback->('q_string', $-[0], $+[0]); | 
| 494 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 495 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 496 | 0 |  |  |  |  | 0 | redo; | 
| 497 |  |  |  |  |  |  | } | 
| 498 |  |  |  |  |  |  |  | 
| 499 | 220 | 50 |  |  |  | 494 | if ($code =~ m{\G$single_q{qq}}gco) { | 
| 500 | 0 |  |  |  |  | 0 | $callback->('qq_string', $-[0], $+[0]); | 
| 501 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 502 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 503 | 0 |  |  |  |  | 0 | redo; | 
| 504 |  |  |  |  |  |  | } | 
| 505 |  |  |  |  |  |  |  | 
| 506 | 220 | 50 |  |  |  | 510 | if ($code =~ m{\G$single_q{qw}}gco) { | 
| 507 | 0 |  |  |  |  | 0 | $callback->('qw_string', $-[0], $+[0]); | 
| 508 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 509 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 510 | 0 |  |  |  |  | 0 | redo; | 
| 511 |  |  |  |  |  |  | } | 
| 512 |  |  |  |  |  |  |  | 
| 513 | 220 | 50 |  |  |  | 523 | if ($code =~ m{\G$single_q{qx}}gco) { | 
| 514 | 0 |  |  |  |  | 0 | $callback->('qx_string', $-[0], $+[0]); | 
| 515 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 516 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 517 | 0 |  |  |  |  | 0 | redo; | 
| 518 |  |  |  |  |  |  | } | 
| 519 |  |  |  |  |  |  | } | 
| 520 |  |  |  |  |  |  |  | 
| 521 |  |  |  |  |  |  | # continue | 
| 522 |  |  |  |  |  |  | } | 
| 523 |  |  |  |  |  |  |  | 
| 524 | 223 | 100 |  |  |  | 592 | if ($code =~ m{\G$str_dq}gco) { | 
| 525 | 10 |  |  |  |  | 52 | $callback->('double_quoted_string', $-[0], $+[0]); | 
| 526 | 10 |  |  |  |  | 5830 | $regex  = 0; | 
| 527 | 10 |  |  |  |  | 19 | $canpod = 0; | 
| 528 | 10 |  |  |  |  | 16 | $flat   = 0; | 
| 529 | 10 |  |  |  |  | 22 | redo; | 
| 530 |  |  |  |  |  |  | } | 
| 531 |  |  |  |  |  |  |  | 
| 532 | 213 | 50 |  |  |  | 522 | if ($code =~ m{\G$str_sq}gco) { | 
| 533 | 0 |  |  |  |  | 0 | $callback->('single_quoted_string', $-[0], $+[0]); | 
| 534 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 535 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 536 | 0 |  |  |  |  | 0 | $flat   = 0; | 
| 537 | 0 |  |  |  |  | 0 | redo; | 
| 538 |  |  |  |  |  |  | } | 
| 539 |  |  |  |  |  |  |  | 
| 540 | 213 | 50 |  |  |  | 492 | if ($code =~ m{\G$str_bq}gco) { | 
| 541 | 0 |  |  |  |  | 0 | $callback->('backtick', $-[0], $+[0]); | 
| 542 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 543 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 544 | 0 |  |  |  |  | 0 | $flat   = 0; | 
| 545 | 0 |  |  |  |  | 0 | redo; | 
| 546 |  |  |  |  |  |  | } | 
| 547 |  |  |  |  |  |  |  | 
| 548 | 213 | 100 |  |  |  | 408 | if ($code =~ /\G;/cg) { | 
| 549 | 4 |  |  |  |  | 25 | $callback->('semicolon', $-[0], $+[0]); | 
| 550 | 4 |  |  |  |  | 2266 | $canpod     = 1; | 
| 551 | 4 |  |  |  |  | 22 | $regex      = 1; | 
| 552 | 4 |  |  |  |  | 11 | $postfix_op = 0; | 
| 553 | 4 |  |  |  |  | 9 | $proto      = 0; | 
| 554 | 4 |  |  |  |  | 7 | $flat       = 0; | 
| 555 | 4 |  |  |  |  | 10 | redo; | 
| 556 |  |  |  |  |  |  | } | 
| 557 |  |  |  |  |  |  |  | 
| 558 | 209 | 50 |  |  |  | 415 | if ($code =~ /\G=>/cg) { | 
| 559 | 0 |  |  |  |  | 0 | $callback->('fat_comma', $-[0], $+[0]); | 
| 560 | 0 |  |  |  |  | 0 | $regex      = 1; | 
| 561 | 0 |  |  |  |  | 0 | $postfix_op = 0; | 
| 562 | 0 |  |  |  |  | 0 | $canpod     = 0; | 
| 563 | 0 |  |  |  |  | 0 | $flat       = 0; | 
| 564 | 0 |  |  |  |  | 0 | redo; | 
| 565 |  |  |  |  |  |  | } | 
| 566 |  |  |  |  |  |  |  | 
| 567 | 209 | 100 |  |  |  | 391 | if ($code =~ /\G,/cg) { | 
| 568 | 3 |  |  |  |  | 17 | $callback->('comma', $-[0], $+[0]); | 
| 569 | 3 |  |  |  |  | 1680 | $regex      = 1; | 
| 570 | 3 |  |  |  |  | 5 | $postfix_op = 0; | 
| 571 | 3 |  |  |  |  | 6 | $canpod     = 0; | 
| 572 | 3 |  |  |  |  | 6 | $flat       = 0; | 
| 573 | 3 |  |  |  |  | 6 | redo; | 
| 574 |  |  |  |  |  |  | } | 
| 575 |  |  |  |  |  |  |  | 
| 576 | 206 | 50 |  |  |  | 745 | if ($code =~ m{\G$vstring}gco) { | 
| 577 | 0 |  |  |  |  | 0 | $callback->('v_string', $-[0], $+[0]); | 
| 578 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 579 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 580 | 0 |  |  |  |  | 0 | redo; | 
| 581 |  |  |  |  |  |  | } | 
| 582 |  |  |  |  |  |  |  | 
| 583 | 206 | 100 |  |  |  | 451 | if ($code =~ m{\G$perl_filetests\b}gco) { | 
| 584 | 1 |  |  |  |  | 7 | my @pos = ($-[0], $+[0]); | 
| 585 | 1 |  |  |  |  | 6 | my $is_bare_word = ($code =~ /\G(?=\h*=>)/); | 
| 586 |  |  |  |  |  |  |  | 
| 587 | 1 | 50 |  |  |  | 8 | $callback->(($is_bare_word ? 'bare_word' : 'file_test'), @pos); | 
| 588 |  |  |  |  |  |  |  | 
| 589 | 1 | 50 |  |  |  | 602 | if ($is_bare_word) { | 
| 590 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 591 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 592 |  |  |  |  |  |  | } | 
| 593 |  |  |  |  |  |  | else { | 
| 594 | 1 |  |  |  |  | 3 | $regex      = 1;    # ambiguous, but possible | 
| 595 | 1 |  |  |  |  | 2 | $postfix_op = 0; | 
| 596 | 1 |  |  |  |  | 3 | $canpod     = 0; | 
| 597 |  |  |  |  |  |  | } | 
| 598 | 1 |  |  |  |  | 2 | redo; | 
| 599 |  |  |  |  |  |  | } | 
| 600 |  |  |  |  |  |  |  | 
| 601 | 205 | 50 |  |  |  | 484 | if ($code =~ /\G(?=__)/) { | 
| 602 | 0 | 0 |  |  |  | 0 | if ($code =~ m{\G__(?>DATA|END)__\b\h*+(?!=>).*\z}gcs) { | 
| 603 | 0 |  |  |  |  | 0 | $callback->('data', $-[0], $+[0]); | 
| 604 | 0 |  |  |  |  | 0 | redo; | 
| 605 |  |  |  |  |  |  | } | 
| 606 |  |  |  |  |  |  |  | 
| 607 | 0 | 0 |  |  |  | 0 | if ($code =~ m{\G__(?>SUB|FILE|PACKAGE|LINE)__\b(?!\h*+=>)}gc) { | 
| 608 | 0 |  |  |  |  | 0 | $callback->('special_keyword', $-[0], $+[0]); | 
| 609 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 610 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 611 | 0 |  |  |  |  | 0 | redo; | 
| 612 |  |  |  |  |  |  | } | 
| 613 |  |  |  |  |  |  |  | 
| 614 |  |  |  |  |  |  | # continue | 
| 615 |  |  |  |  |  |  | } | 
| 616 |  |  |  |  |  |  |  | 
| 617 | 205 | 100 | 100 |  |  | 835 | if ($regex == 1 and $code =~ /\G(? | 
|  |  |  | 100 |  |  |  |  | 
| 618 | 1 |  |  |  |  | 7 | $callback->('glob_readline', $-[0], $+[0]); | 
| 619 | 1 |  |  |  |  | 573 | $regex  = 0; | 
| 620 | 1 |  |  |  |  | 3 | $canpod = 0; | 
| 621 | 1 |  |  |  |  | 2 | redo; | 
| 622 |  |  |  |  |  |  | } | 
| 623 |  |  |  |  |  |  |  | 
| 624 | 204 | 100 |  |  |  | 762 | if ($code =~ m{\G$assignment_operators}gco) { | 
| 625 | 16 |  |  |  |  | 80 | $callback->('assignment_operator', $-[0], $+[0]); | 
| 626 | 16 | 100 |  |  |  | 9008 | if ($format) { | 
| 627 | 2 | 50 |  |  |  | 24 | if (substr($code, $-[0], $+[0] - $-[0]) eq '=') { | 
| 628 | 2 |  |  |  |  | 4 | $format        = 0; | 
| 629 | 2 |  |  |  |  | 4 | $expect_format = 1; | 
| 630 |  |  |  |  |  |  | } | 
| 631 |  |  |  |  |  |  | } | 
| 632 | 16 |  |  |  |  | 30 | $regex  = 1; | 
| 633 | 16 |  |  |  |  | 25 | $canpod = 0; | 
| 634 | 16 |  |  |  |  | 24 | $flat   = 0; | 
| 635 | 16 |  |  |  |  | 73 | redo; | 
| 636 |  |  |  |  |  |  | } | 
| 637 |  |  |  |  |  |  |  | 
| 638 | 188 | 50 |  |  |  | 349 | if ($code =~ /\G->/cg) { | 
| 639 | 0 |  |  |  |  | 0 | $callback->('dereference_operator', $-[0], $+[0]); | 
| 640 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 641 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 642 | 0 |  |  |  |  | 0 | $flat   = 1; | 
| 643 | 0 |  |  |  |  | 0 | redo; | 
| 644 |  |  |  |  |  |  | } | 
| 645 |  |  |  |  |  |  |  | 
| 646 | 188 | 100 | 66 |  |  | 915 | if ($code =~ m{\G$operators}gco or $code =~ /\Gx(?=[0-9\W])/cg) { | 
| 647 | 50 |  |  |  |  | 234 | $callback->('operator', $-[0], $+[0]); | 
| 648 | 50 | 100 |  |  |  | 28941 | if (substr($code, $-[0], ($+[0] - $-[0])) =~ /^$postfix_operators\z/o) { | 
| 649 | 5 |  |  |  |  | 14 | $postfix_op = 1; | 
| 650 |  |  |  |  |  |  | } | 
| 651 |  |  |  |  |  |  | else { | 
| 652 | 45 |  |  |  |  | 133 | $postfix_op = 0; | 
| 653 |  |  |  |  |  |  | } | 
| 654 | 50 |  |  |  |  | 89 | $canpod = 0; | 
| 655 | 50 |  |  |  |  | 88 | $regex  = 1; | 
| 656 | 50 |  |  |  |  | 62 | $flat   = 0; | 
| 657 | 50 |  |  |  |  | 95 | redo; | 
| 658 |  |  |  |  |  |  | } | 
| 659 |  |  |  |  |  |  |  | 
| 660 | 138 | 50 |  |  |  | 339 | if ($code =~ m{\G$hex_num}gco) { | 
| 661 | 0 |  |  |  |  | 0 | $callback->('hex_number', $-[0], $+[0]); | 
| 662 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 663 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 664 | 0 |  |  |  |  | 0 | redo; | 
| 665 |  |  |  |  |  |  | } | 
| 666 |  |  |  |  |  |  |  | 
| 667 | 138 | 50 |  |  |  | 337 | if ($code =~ m{\G$binary_num}gco) { | 
| 668 | 0 |  |  |  |  | 0 | $callback->('binary_number', $-[0], $+[0]); | 
| 669 | 0 |  |  |  |  | 0 | $regex  = 0; | 
| 670 | 0 |  |  |  |  | 0 | $canpod = 0; | 
| 671 | 0 |  |  |  |  | 0 | redo; | 
| 672 |  |  |  |  |  |  | } | 
| 673 |  |  |  |  |  |  |  | 
| 674 | 138 | 100 |  |  |  | 638 | if ($code =~ m{\G$number}gco) { | 
| 675 | 66 |  |  |  |  | 318 | $callback->('number', $-[0], $+[0]); | 
| 676 | 66 |  |  |  |  | 37448 | $regex  = 0; | 
| 677 | 66 |  |  |  |  | 120 | $canpod = 0; | 
| 678 | 66 |  |  |  |  | 132 | redo; | 
| 679 |  |  |  |  |  |  | } | 
| 680 |  |  |  |  |  |  |  | 
| 681 | 72 | 100 |  |  |  | 174 | if ($code =~ m{\GSTD(?>OUT|ERR|IN)\b}gc) { | 
| 682 | 2 |  |  |  |  | 14 | $callback->('special_fh', $-[0], $+[0]); | 
| 683 | 2 |  |  |  |  | 1068 | $regex      = 1; | 
| 684 | 2 |  |  |  |  | 5 | $postfix_op = 0; | 
| 685 | 2 |  |  |  |  | 5 | $canpod     = 0; | 
| 686 | 2 |  |  |  |  | 6 | redo; | 
| 687 |  |  |  |  |  |  | } | 
| 688 |  |  |  |  |  |  |  | 
| 689 | 70 | 100 |  |  |  | 359 | if ($code =~ m{\G$var_name}gco) { | 
| 690 | 4 | 50 |  |  |  | 30 | $callback->(($proto == 1 ? 'sub_name' : 'bare_word'), $-[0], $+[0]); | 
| 691 | 4 |  |  |  |  | 2200 | $regex  = 0; | 
| 692 | 4 |  |  |  |  | 10 | $canpod = 0; | 
| 693 | 4 |  |  |  |  | 7 | $flat   = 0; | 
| 694 | 4 |  |  |  |  | 10 | redo; | 
| 695 |  |  |  |  |  |  | } | 
| 696 |  |  |  |  |  |  |  | 
| 697 | 66 | 50 |  |  |  | 198 | if ($code =~ /\G(.)/cgs) { | 
| 698 | 0 |  |  |  |  | 0 | $callback->('unknown_char', $-[0], $+[0]); | 
| 699 | 0 |  |  |  |  | 0 | redo; | 
| 700 |  |  |  |  |  |  | } | 
| 701 |  |  |  |  |  |  |  | 
| 702 |  |  |  |  |  |  | # all done | 
| 703 |  |  |  |  |  |  | } | 
| 704 |  |  |  |  |  |  |  | 
| 705 | 66 |  |  |  |  | 1632 | return pos($code); | 
| 706 |  |  |  |  |  |  | } | 
| 707 |  |  |  |  |  |  |  | 
| 708 |  |  |  |  |  |  | 1; | 
| 709 |  |  |  |  |  |  |  | 
| 710 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 711 |  |  |  |  |  |  |  | 
| 712 |  |  |  |  |  |  | use Perl::Tokenizer; | 
| 713 |  |  |  |  |  |  | my $code = 'my $num = 42;'; | 
| 714 |  |  |  |  |  |  | perl_tokens { print "@_\n" } $code; | 
| 715 |  |  |  |  |  |  |  | 
| 716 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 717 |  |  |  |  |  |  |  | 
| 718 |  |  |  |  |  |  | Perl::Tokenizer is a tiny tokenizer which splits a given Perl code into a list of tokens, using the power of regular expressions. | 
| 719 |  |  |  |  |  |  |  | 
| 720 |  |  |  |  |  |  | =head1 SUBROUTINES | 
| 721 |  |  |  |  |  |  |  | 
| 722 |  |  |  |  |  |  | =over 4 | 
| 723 |  |  |  |  |  |  |  | 
| 724 |  |  |  |  |  |  | =item perl_tokens(&$) | 
| 725 |  |  |  |  |  |  |  | 
| 726 |  |  |  |  |  |  | This function takes a callback subroutine and a string. The subroutine is called for each token in real-time. | 
| 727 |  |  |  |  |  |  |  | 
| 728 |  |  |  |  |  |  | perl_tokens { | 
| 729 |  |  |  |  |  |  | my ($token, $pos_beg, $pos_end) = @_; | 
| 730 |  |  |  |  |  |  | ... | 
| 731 |  |  |  |  |  |  | } $code; | 
| 732 |  |  |  |  |  |  |  | 
| 733 |  |  |  |  |  |  | The positions are absolute to the string. | 
| 734 |  |  |  |  |  |  |  | 
| 735 |  |  |  |  |  |  | =back | 
| 736 |  |  |  |  |  |  |  | 
| 737 |  |  |  |  |  |  | =head2 EXPORT | 
| 738 |  |  |  |  |  |  |  | 
| 739 |  |  |  |  |  |  | The function B is exported by default. This is the only function provided by this module. | 
| 740 |  |  |  |  |  |  |  | 
| 741 |  |  |  |  |  |  | =head1 TOKENS | 
| 742 |  |  |  |  |  |  |  | 
| 743 |  |  |  |  |  |  | The standard token names that are available are: | 
| 744 |  |  |  |  |  |  |  | 
| 745 |  |  |  |  |  |  | format .................. Format text | 
| 746 |  |  |  |  |  |  | heredoc_beg ............. The beginning of a here-document ('<<"EOT"') | 
| 747 |  |  |  |  |  |  | heredoc ................. The content of a here-document | 
| 748 |  |  |  |  |  |  | pod ..................... An inline POD document, until '=cut' or end of the file | 
| 749 |  |  |  |  |  |  | horizontal_space ........ Horizontal whitespace (matched by /\h/) | 
| 750 |  |  |  |  |  |  | vertical_space .......... Vertical whitespace (matched by /\v/) | 
| 751 |  |  |  |  |  |  | other_space ............. Whitespace that is neither vertical nor horizontal (matched by /\s/) | 
| 752 |  |  |  |  |  |  | var_name ................ Alphanumeric name of a variable (excluding the sigil) | 
| 753 |  |  |  |  |  |  | special_var_name ........ Non-alphanumeric name of a variable, such as $/ or $^H (excluding the sigil) | 
| 754 |  |  |  |  |  |  | sub_name ................ Subroutine name | 
| 755 |  |  |  |  |  |  | sub_proto ............... Subroutine prototype | 
| 756 |  |  |  |  |  |  | comment ................. A #-to-newline comment (excluding the newline) | 
| 757 |  |  |  |  |  |  | scalar_sigil ............ The sigil of a scalar variable: '$' | 
| 758 |  |  |  |  |  |  | array_sigil ............. The sigil of an array variable: '@' | 
| 759 |  |  |  |  |  |  | hash_sigil .............. The sigil of a hash variable: '%' | 
| 760 |  |  |  |  |  |  | glob_sigil .............. The sigil of a glob symbol: '*' | 
| 761 |  |  |  |  |  |  | ampersand_sigil ......... The sigil of a subroutine call: '&' | 
| 762 |  |  |  |  |  |  | parenthesis_open ........ Open parenthesis: '(' | 
| 763 |  |  |  |  |  |  | parenthesis_close ....... Closed parenthesis: ')' | 
| 764 |  |  |  |  |  |  | right_bracket_open ...... Open right bracket: '[' | 
| 765 |  |  |  |  |  |  | right_bracket_close ..... Closed right bracket: ']' | 
| 766 |  |  |  |  |  |  | curly_bracket_open ...... Open curly bracket: '{' | 
| 767 |  |  |  |  |  |  | curly_bracket_close ..... Closed curly bracket: '}' | 
| 768 |  |  |  |  |  |  | substitution ............ Regex substitution: s/.../.../ | 
| 769 |  |  |  |  |  |  | transliteration.......... Transliteration: tr/.../.../ or y/.../.../ | 
| 770 |  |  |  |  |  |  | match_regex ............. Regex in matching context: m/.../ | 
| 771 |  |  |  |  |  |  | compiled_regex .......... Quoted compiled regex: qr/.../ | 
| 772 |  |  |  |  |  |  | q_string ................ Single quoted string: q/.../ | 
| 773 |  |  |  |  |  |  | qq_string ............... Double quoted string: qq/.../ | 
| 774 |  |  |  |  |  |  | qw_string ............... List of quoted words: qw/.../ | 
| 775 |  |  |  |  |  |  | qx_string ............... System command quoted string: qx/.../ | 
| 776 |  |  |  |  |  |  | backtick ................ Backtick system command quoted string: `...` | 
| 777 |  |  |  |  |  |  | single_quoted_string .... Single quoted string, as: '...' | 
| 778 |  |  |  |  |  |  | double_quoted_string .... Double quoted string, as: "..." | 
| 779 |  |  |  |  |  |  | bare_word ............... Unquoted string | 
| 780 |  |  |  |  |  |  | glob_readline ...........  or | 
| 781 |  |  |  |  |  |  | v_string ................ Version string: "vX" or "X.X.X" | 
| 782 |  |  |  |  |  |  | file_test ............... File test operator (-X), such as: "-d", "-e", etc... | 
| 783 |  |  |  |  |  |  | data .................... The content of `__DATA__` or `__END__` sections | 
| 784 |  |  |  |  |  |  | keyword ................. Regular Perl keyword, such as: `if`, `else`, etc... | 
| 785 |  |  |  |  |  |  | special_keyword ......... Special Perl keyword, such as: `__PACKAGE__`, `__FILE__`, etc... | 
| 786 |  |  |  |  |  |  | comma ................... Comma: ',' | 
| 787 |  |  |  |  |  |  | fat_comma ............... Fat comma: '=>' | 
| 788 |  |  |  |  |  |  | operator ................ Primitive operator, such as: '+', '||', etc... | 
| 789 |  |  |  |  |  |  | assignment_operator ..... '=' or any assignment operator: '+=', '||=', etc... | 
| 790 |  |  |  |  |  |  | dereference_operator .... Arrow dereference operator: '->' | 
| 791 |  |  |  |  |  |  | hex_number .............. Hexadecimal literal number: 0x... | 
| 792 |  |  |  |  |  |  | binary_number ........... Binary literal number: 0b... | 
| 793 |  |  |  |  |  |  | number .................. Decimal literal number, such as 42, 3.1e4, etc... | 
| 794 |  |  |  |  |  |  | special_fh .............. Special file-handle name, such as 'STDIN', 'STDOUT', etc... | 
| 795 |  |  |  |  |  |  | unknown_char ............ Unknown or unexpected character | 
| 796 |  |  |  |  |  |  |  | 
| 797 |  |  |  |  |  |  | =head1 EXAMPLE | 
| 798 |  |  |  |  |  |  |  | 
| 799 |  |  |  |  |  |  | For this code: | 
| 800 |  |  |  |  |  |  |  | 
| 801 |  |  |  |  |  |  | my $num = 42; | 
| 802 |  |  |  |  |  |  |  | 
| 803 |  |  |  |  |  |  | it generates the following tokens: | 
| 804 |  |  |  |  |  |  |  | 
| 805 |  |  |  |  |  |  | #  TOKEN                     POS | 
| 806 |  |  |  |  |  |  | ( keyword              => ( 0,  2) ) | 
| 807 |  |  |  |  |  |  | ( horizontal_space     => ( 2,  3) ) | 
| 808 |  |  |  |  |  |  | ( scalar_sigil         => ( 3,  4) ) | 
| 809 |  |  |  |  |  |  | ( var_name             => ( 4,  7) ) | 
| 810 |  |  |  |  |  |  | ( horizontal_space     => ( 7,  8) ) | 
| 811 |  |  |  |  |  |  | ( assignment_operator  => ( 8,  9) ) | 
| 812 |  |  |  |  |  |  | ( horizontal_space     => ( 9, 10) ) | 
| 813 |  |  |  |  |  |  | ( number               => (10, 12) ) | 
| 814 |  |  |  |  |  |  | ( semicolon            => (12, 13) ) | 
| 815 |  |  |  |  |  |  |  | 
| 816 |  |  |  |  |  |  | =head1 REPOSITORY | 
| 817 |  |  |  |  |  |  |  | 
| 818 |  |  |  |  |  |  | L | 
| 819 |  |  |  |  |  |  |  | 
| 820 |  |  |  |  |  |  | =head1 AUTHOR | 
| 821 |  |  |  |  |  |  |  | 
| 822 |  |  |  |  |  |  | Daniel "Trizen" Șuteu, Etrizen@protonmail.comE | 
| 823 |  |  |  |  |  |  |  | 
| 824 |  |  |  |  |  |  | =head1 COPYRIGHT AND LICENSE | 
| 825 |  |  |  |  |  |  |  | 
| 826 |  |  |  |  |  |  | Copyright (C) 2013-2017 | 
| 827 |  |  |  |  |  |  |  | 
| 828 |  |  |  |  |  |  | This library is free software; you can redistribute it and/or modify | 
| 829 |  |  |  |  |  |  | it under the same terms as Perl itself, either Perl version 5.22.0 or, | 
| 830 |  |  |  |  |  |  | at your option, any later version of Perl 5 you may have available. | 
| 831 |  |  |  |  |  |  |  | 
| 832 |  |  |  |  |  |  | =cut |