| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Pegex::Parser; | 
| 2 | 1 |  |  | 1 |  | 6 | use Pegex::Base; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 8 |  | 
| 3 |  |  |  |  |  |  |  | 
| 4 | 1 |  |  | 1 |  | 3525 | use Pegex::Input; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 46 |  | 
| 5 | 1 |  |  | 1 |  | 740 | use Pegex::Optimizer; | 
|  | 1 |  |  |  |  | 4 |  | 
|  | 1 |  |  |  |  | 32 |  | 
| 6 | 1 |  |  | 1 |  | 8 | use Scalar::Util; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 1430 |  | 
| 7 |  |  |  |  |  |  |  | 
| 8 |  |  |  |  |  |  | has grammar => (required => 1); | 
| 9 |  |  |  |  |  |  | has receiver => (); | 
| 10 |  |  |  |  |  |  | has input => (); | 
| 11 |  |  |  |  |  |  | has debug => ( | 
| 12 |  |  |  |  |  |  | exists($ENV{PERL_PEGEX_DEBUG}) ? $ENV{PERL_PEGEX_DEBUG} : | 
| 13 |  |  |  |  |  |  | defined($Pegex::Parser::Debug) ? $Pegex::Parser::Debug : | 
| 14 |  |  |  |  |  |  | 0 | 
| 15 |  |  |  |  |  |  | ); | 
| 16 |  |  |  |  |  |  | sub BUILD { | 
| 17 | 13 |  |  | 13 | 0 | 2032 | my ($self) = @_; | 
| 18 | 13 |  | 50 |  |  | 87 | $self->{throw_on_error} ||= 1; | 
| 19 |  |  |  |  |  |  | # $self->{rule} = undef; | 
| 20 |  |  |  |  |  |  | # $self->{parent} = undef; | 
| 21 |  |  |  |  |  |  | # $self->{error} = undef; | 
| 22 |  |  |  |  |  |  | # $self->{position} = undef; | 
| 23 |  |  |  |  |  |  | # $self->{farthest} = undef; | 
| 24 |  |  |  |  |  |  | } | 
| 25 |  |  |  |  |  |  |  | 
| 26 |  |  |  |  |  |  | # XXX Add an optional $position argument. Default to 0. This is the position | 
| 27 |  |  |  |  |  |  | # to start parsing. Set position and farthest below to this value. Allows for | 
| 28 |  |  |  |  |  |  | # sub-parsing. Need to somehow return the finishing position of a subparse. | 
| 29 |  |  |  |  |  |  | # Maybe this all goes in a subparse() method. | 
| 30 |  |  |  |  |  |  | sub parse { | 
| 31 | 14 |  |  | 14 | 0 | 221 | my ($self, $input, $start) = @_; | 
| 32 |  |  |  |  |  |  |  | 
| 33 | 14 | 100 |  |  |  | 63 | $start =~ s/-/_/g if $start; | 
| 34 |  |  |  |  |  |  |  | 
| 35 | 14 |  |  |  |  | 32 | $self->{position} = 0; | 
| 36 | 14 |  |  |  |  | 36 | $self->{farthest} = 0; | 
| 37 |  |  |  |  |  |  |  | 
| 38 | 14 | 50 |  |  |  | 102 | $self->{input} = (not ref $input) | 
| 39 |  |  |  |  |  |  | ? Pegex::Input->new(string => $input) | 
| 40 |  |  |  |  |  |  | : $input; | 
| 41 |  |  |  |  |  |  |  | 
| 42 | 14 | 50 |  |  |  | 809 | $self->{input}->open | 
| 43 |  |  |  |  |  |  | unless $self->{input}{_is_open}; | 
| 44 | 14 |  |  |  |  | 59 | $self->{buffer} = $self->{input}->read; | 
| 45 |  |  |  |  |  |  |  | 
| 46 | 14 | 50 |  |  |  | 57 | die "No 'grammar'. Can't parse" | 
| 47 |  |  |  |  |  |  | unless $self->{grammar}; | 
| 48 |  |  |  |  |  |  |  | 
| 49 | 14 |  | 66 |  |  | 113 | $self->{grammar}{tree} ||= $self->{grammar}->make_tree; | 
| 50 |  |  |  |  |  |  |  | 
| 51 | 14 | 50 | 33 |  |  | 2633 | my $start_rule_ref = $start || | 
| 52 |  |  |  |  |  |  | $self->{grammar}{tree}{'+toprule'} || | 
| 53 |  |  |  |  |  |  | $self->{grammar}{tree}{'TOP'} & 'TOP' or | 
| 54 |  |  |  |  |  |  | die "No starting rule for Pegex::Parser::parse"; | 
| 55 |  |  |  |  |  |  |  | 
| 56 | 14 | 50 |  |  |  | 78 | die "No 'receiver'. Can't parse" | 
| 57 |  |  |  |  |  |  | unless $self->{receiver}; | 
| 58 |  |  |  |  |  |  |  | 
| 59 | 14 |  |  |  |  | 128 | my $optimizer = Pegex::Optimizer->new( | 
| 60 |  |  |  |  |  |  | parser => $self, | 
| 61 |  |  |  |  |  |  | grammar => $self->{grammar}, | 
| 62 |  |  |  |  |  |  | receiver => $self->{receiver}, | 
| 63 |  |  |  |  |  |  | ); | 
| 64 |  |  |  |  |  |  |  | 
| 65 | 14 |  |  |  |  | 858 | $optimizer->optimize_grammar($start_rule_ref); | 
| 66 |  |  |  |  |  |  |  | 
| 67 |  |  |  |  |  |  | # Add circular ref and weaken it. | 
| 68 | 14 |  |  |  |  | 47 | $self->{receiver}{parser} = $self; | 
| 69 | 14 |  |  |  |  | 93 | Scalar::Util::weaken($self->{receiver}{parser}); | 
| 70 |  |  |  |  |  |  |  | 
| 71 | 14 | 50 |  |  |  | 106 | if ($self->{receiver}->can("initial")) { | 
| 72 | 0 |  |  |  |  | 0 | $self->{rule} = $start_rule_ref; | 
| 73 | 0 |  |  |  |  | 0 | $self->{parent} = {}; | 
| 74 | 0 |  |  |  |  | 0 | $self->{receiver}->initial(); | 
| 75 |  |  |  |  |  |  | } | 
| 76 |  |  |  |  |  |  |  | 
| 77 | 14 | 50 |  |  |  | 55 | my $match = $self->debug ? do { | 
| 78 | 0 |  |  |  |  | 0 | my $method = $optimizer->make_trace_wrapper(\&match_ref); | 
| 79 | 0 |  |  |  |  | 0 | $self->$method($start_rule_ref, {'+asr' => 0}); | 
| 80 |  |  |  |  |  |  | } : $self->match_ref($start_rule_ref, {}); | 
| 81 |  |  |  |  |  |  |  | 
| 82 | 14 |  |  |  |  | 85 | $self->{input}->close; | 
| 83 |  |  |  |  |  |  |  | 
| 84 | 14 | 50 | 33 |  |  | 54 | if (not $match or $self->{position} < length ${$self->{buffer}}) { | 
|  | 14 |  |  |  |  | 65 |  | 
| 85 | 0 |  |  |  |  | 0 | $self->throw_error("Parse document failed for some reason"); | 
| 86 | 0 |  |  |  |  | 0 | return;  # In case $self->throw_on_error is off | 
| 87 |  |  |  |  |  |  | } | 
| 88 |  |  |  |  |  |  |  | 
| 89 | 14 | 50 |  |  |  | 104 | if ($self->{receiver}->can("final")) { | 
| 90 | 14 |  |  |  |  | 29 | $self->{rule} = $start_rule_ref; | 
| 91 | 14 |  |  |  |  | 26 | $self->{parent} = {}; | 
| 92 | 14 |  |  |  |  | 75 | $match = [ $self->{receiver}->final(@$match) ]; | 
| 93 |  |  |  |  |  |  | } | 
| 94 |  |  |  |  |  |  |  | 
| 95 | 14 |  |  |  |  | 1080 | $match->[0]; | 
| 96 |  |  |  |  |  |  | } | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | sub match_next { | 
| 99 | 1503 |  |  | 1503 | 0 | 1890 | my ($self, $next) = @_; | 
| 100 |  |  |  |  |  |  |  | 
| 101 | 1503 |  |  |  |  | 4072 | my ($rule, $method, $kind, $min, $max, $assertion) = | 
| 102 | 1503 |  |  |  |  | 1886 | @{$next}{'rule', 'method', 'kind', '+min', '+max', '+asr'}; | 
| 103 |  |  |  |  |  |  |  | 
| 104 | 1503 |  |  |  |  | 3304 | my ($position, $match, $count) = | 
| 105 |  |  |  |  |  |  | ($self->{position}, [], 0); | 
| 106 |  |  |  |  |  |  |  | 
| 107 | 1503 |  |  |  |  | 2826 | while (my $return = $method->($self, $rule, $next)) { | 
| 108 | 502 | 50 |  |  |  | 1372 | $position = $self->{position} unless $assertion; | 
| 109 | 502 |  |  |  |  | 525 | $count++; | 
| 110 | 502 |  |  |  |  | 948 | push @$match, @$return; | 
| 111 | 502 | 100 |  |  |  | 2801 | last if $max == 1; | 
| 112 |  |  |  |  |  |  | } | 
| 113 | 1503 | 100 | 100 |  |  | 5913 | if (not $count and $min == 0 and $kind eq 'all') { | 
|  |  |  | 100 |  |  |  |  | 
| 114 | 4 |  |  |  |  | 13 | $match = [[]]; | 
| 115 |  |  |  |  |  |  | } | 
| 116 | 1503 | 100 |  |  |  | 2889 | if ($max != 1) { | 
| 117 | 40 | 100 |  |  |  | 134 | if ($next->{-flat}) { | 
| 118 | 4 | 50 |  |  |  | 9 | $match = [ map { (ref($_) eq 'ARRAY') ? (@$_) : ($_) } @$match ]; | 
|  | 4 |  |  |  |  | 18 |  | 
| 119 |  |  |  |  |  |  | } | 
| 120 |  |  |  |  |  |  | else { | 
| 121 | 36 |  |  |  |  | 123 | $match = [$match] | 
| 122 |  |  |  |  |  |  | } | 
| 123 | 40 | 50 |  |  |  | 125 | $self->{farthest} = $position | 
| 124 |  |  |  |  |  |  | if ($self->{position} = $position) > $self->{farthest}; | 
| 125 |  |  |  |  |  |  | } | 
| 126 | 1503 |  | 66 |  |  | 4665 | my $result = ($count >= $min and (not $max or $count <= $max)) | 
| 127 |  |  |  |  |  |  | ^ ($assertion == -1); | 
| 128 | 1503 | 100 | 100 |  |  | 4385 | if (not($result) or $assertion) { | 
| 129 | 978 | 50 |  |  |  | 2645 | $self->{farthest} = $position | 
| 130 |  |  |  |  |  |  | if ($self->{position} = $position) > $self->{farthest}; | 
| 131 |  |  |  |  |  |  | } | 
| 132 |  |  |  |  |  |  |  | 
| 133 | 1503 | 100 |  |  |  | 6908 | ($result ? $next->{'-skip'} ? [] : $match : 0); | 
|  |  | 100 |  |  |  |  |  | 
| 134 |  |  |  |  |  |  | } | 
| 135 |  |  |  |  |  |  |  | 
| 136 |  |  |  |  |  |  | sub match_rule { | 
| 137 | 0 |  |  | 0 | 0 | 0 | my ($self, $position, $match) = (@_, []); | 
| 138 | 0 |  |  |  |  | 0 | $self->{position} = $position; | 
| 139 | 0 | 0 |  |  |  | 0 | $self->{farthest} = $position | 
| 140 |  |  |  |  |  |  | if $position > $self->{farthest}; | 
| 141 | 0 | 0 |  |  |  | 0 | $match = [ $match ] if @$match > 1; | 
| 142 | 0 |  |  |  |  | 0 | my ($ref, $parent) = @{$self}{'rule', 'parent'}; | 
|  | 0 |  |  |  |  | 0 |  | 
| 143 | 0 | 0 |  |  |  | 0 | my $rule = $self->{grammar}{tree}{$ref} | 
| 144 |  |  |  |  |  |  | or die "No rule defined for '$ref'"; | 
| 145 |  |  |  |  |  |  |  | 
| 146 | 0 |  |  |  |  | 0 | [ $rule->{action}->($self->{receiver}, @$match) ]; | 
| 147 |  |  |  |  |  |  | } | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | sub match_ref { | 
| 150 | 754 |  |  | 754 | 0 | 1146 | my ($self, $ref, $parent) = @_; | 
| 151 | 754 | 50 |  |  |  | 2940 | my $rule = $self->{grammar}{tree}{$ref} | 
| 152 |  |  |  |  |  |  | or die "No rule defined for '$ref'"; | 
| 153 | 754 | 100 |  |  |  | 1530 | my $match = $self->match_next($rule) or return; | 
| 154 | 252 | 50 |  |  |  | 564 | return $Pegex::Constant::Dummy unless $rule->{action}; | 
| 155 | 252 |  |  |  |  | 329 | @{$self}{'rule', 'parent'} = ($ref, $parent); | 
|  | 252 |  |  |  |  | 825 |  | 
| 156 |  |  |  |  |  |  |  | 
| 157 |  |  |  |  |  |  | # XXX Possible API mismatch. | 
| 158 |  |  |  |  |  |  | # Not sure if we should "splat" the $match. | 
| 159 | 252 |  |  |  |  | 967 | [ $rule->{action}->($self->{receiver}, @$match) ]; | 
| 160 |  |  |  |  |  |  | } | 
| 161 |  |  |  |  |  |  |  | 
| 162 |  |  |  |  |  |  | sub match_rgx { | 
| 163 | 508 |  |  | 508 | 0 | 657 | my ($self, $regexp) = @_; | 
| 164 | 508 |  |  |  |  | 717 | my $buffer = $self->{buffer}; | 
| 165 |  |  |  |  |  |  |  | 
| 166 | 508 |  |  |  |  | 1343 | pos($$buffer) = $self->{position}; | 
| 167 | 508 | 100 |  |  |  | 5928 | $$buffer =~ /$regexp/g or return; | 
| 168 |  |  |  |  |  |  |  | 
| 169 | 123 |  |  |  |  | 208 | $self->{position} = pos($$buffer); | 
| 170 |  |  |  |  |  |  |  | 
| 171 | 123 | 50 |  |  |  | 315 | $self->{farthest} = $self->{position} | 
| 172 |  |  |  |  |  |  | if $self->{position} > $self->{farthest}; | 
| 173 |  |  |  |  |  |  |  | 
| 174 | 1 |  |  | 1 |  | 14 | no strict 'refs'; | 
|  | 1 |  |  |  |  | 2 |  | 
|  | 1 |  |  |  |  | 1044 |  | 
| 175 | 123 |  |  |  |  | 560 | my $captures = [ map $$_, 1..$#+ ]; | 
| 176 | 123 | 50 |  |  |  | 320 | $captures = [ $captures ] if $#+ > 1; | 
| 177 |  |  |  |  |  |  |  | 
| 178 | 123 |  |  |  |  | 372 | return $captures; | 
| 179 |  |  |  |  |  |  | } | 
| 180 |  |  |  |  |  |  |  | 
| 181 |  |  |  |  |  |  | sub match_all { | 
| 182 | 164 |  |  | 164 | 0 | 212 | my ($self, $list) = @_; | 
| 183 | 164 |  |  |  |  | 220 | my $position = $self->{position}; | 
| 184 | 164 |  |  |  |  | 247 | my $set = []; | 
| 185 | 164 |  |  |  |  | 186 | my $len = 0; | 
| 186 | 164 |  |  |  |  | 274 | for my $elem (@$list) { | 
| 187 | 316 | 100 |  |  |  | 656 | if (my $match = $self->match_next($elem)) { | 
| 188 | 214 | 100 | 100 |  |  | 941 | if (not ($elem->{'+asr'} or $elem->{'-skip'})) { | 
| 189 | 191 |  |  |  |  | 343 | push @$set, @$match; | 
| 190 | 191 |  |  |  |  | 460 | $len++; | 
| 191 |  |  |  |  |  |  | } | 
| 192 |  |  |  |  |  |  | } | 
| 193 |  |  |  |  |  |  | else { | 
| 194 | 102 | 50 |  |  |  | 240 | $self->{farthest} = $position | 
| 195 |  |  |  |  |  |  | if ($self->{position} = $position) > $self->{farthest}; | 
| 196 | 102 |  |  |  |  | 323 | return; | 
| 197 |  |  |  |  |  |  | } | 
| 198 |  |  |  |  |  |  | } | 
| 199 | 62 | 50 |  |  |  | 201 | $set = [ $set ] if $len > 1; | 
| 200 | 62 |  |  |  |  | 174 | return $set; | 
| 201 |  |  |  |  |  |  | } | 
| 202 |  |  |  |  |  |  |  | 
| 203 |  |  |  |  |  |  | sub match_any { | 
| 204 | 129 |  |  | 129 | 0 | 177 | my ($self, $list) = @_; | 
| 205 | 129 |  |  |  |  | 377 | for my $elem (@$list) { | 
| 206 | 433 | 100 |  |  |  | 933 | if (my $match = $self->match_next($elem)) { | 
| 207 | 79 |  |  |  |  | 257 | return $match; | 
| 208 |  |  |  |  |  |  | } | 
| 209 |  |  |  |  |  |  | } | 
| 210 | 50 |  |  |  |  | 161 | return; | 
| 211 |  |  |  |  |  |  | } | 
| 212 |  |  |  |  |  |  |  | 
| 213 |  |  |  |  |  |  | sub match_err { | 
| 214 | 0 |  |  | 0 | 0 |  | my ($self, $error) = @_; | 
| 215 | 0 |  |  |  |  |  | $self->throw_error($error); | 
| 216 |  |  |  |  |  |  | } | 
| 217 |  |  |  |  |  |  |  | 
| 218 |  |  |  |  |  |  | sub trace { | 
| 219 | 0 |  |  | 0 | 0 |  | my ($self, $action) = @_; | 
| 220 | 0 | 0 |  |  |  |  | my $indent = ($action =~ /^try_/) ? 1 : 0; | 
| 221 | 0 |  | 0 |  |  |  | $self->{indent} ||= 0; | 
| 222 | 0 | 0 |  |  |  |  | $self->{indent}-- unless $indent; | 
| 223 | 0 |  |  |  |  |  | print STDERR ' ' x $self->{indent}; | 
| 224 | 0 | 0 |  |  |  |  | $self->{indent}++ if $indent; | 
| 225 | 0 |  |  |  |  |  | my $snippet = substr(${$self->{buffer}}, $self->{position}); | 
|  | 0 |  |  |  |  |  |  | 
| 226 | 0 | 0 |  |  |  |  | $snippet = substr($snippet, 0, 30) . "..." | 
| 227 |  |  |  |  |  |  | if length $snippet > 30; | 
| 228 | 0 |  |  |  |  |  | $snippet =~ s/\n/\\n/g; | 
| 229 | 0 | 0 |  |  |  |  | print STDERR sprintf("%-30s", $action) . | 
| 230 |  |  |  |  |  |  | ($indent ? " >$snippet<\n" : "\n"); | 
| 231 |  |  |  |  |  |  | } | 
| 232 |  |  |  |  |  |  |  | 
| 233 |  |  |  |  |  |  | sub throw_error { | 
| 234 | 0 |  |  | 0 | 0 |  | my ($self, $msg) = @_; | 
| 235 | 0 |  |  |  |  |  | $@ = $self->{error} = $self->format_error($msg); | 
| 236 | 0 | 0 |  |  |  |  | return undef unless $self->{throw_on_error}; | 
| 237 | 0 |  |  |  |  |  | require Carp; | 
| 238 | 0 |  |  |  |  |  | Carp::croak($self->{error}); | 
| 239 |  |  |  |  |  |  | } | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  | sub format_error { | 
| 242 | 0 |  |  | 0 | 0 |  | my ($self, $msg) = @_; | 
| 243 | 0 |  |  |  |  |  | my $buffer = $self->{buffer}; | 
| 244 | 0 |  |  |  |  |  | my $position = $self->{farthest}; | 
| 245 | 0 |  |  |  |  |  | my $real_pos = $self->{position}; | 
| 246 |  |  |  |  |  |  |  | 
| 247 | 0 |  |  |  |  |  | my $line = @{[substr($$buffer, 0, $position) =~ /(\n)/g]} + 1; | 
|  | 0 |  |  |  |  |  |  | 
| 248 | 0 |  |  |  |  |  | my $column = $position - rindex($$buffer, "\n", $position); | 
| 249 |  |  |  |  |  |  |  | 
| 250 | 0 | 0 |  |  |  |  | my $pretext = substr( | 
|  |  | 0 |  |  |  |  |  | 
| 251 |  |  |  |  |  |  | $$buffer, | 
| 252 |  |  |  |  |  |  | $position < 50 ? 0 : $position - 50, | 
| 253 |  |  |  |  |  |  | $position < 50 ? $position : 50 | 
| 254 |  |  |  |  |  |  | ); | 
| 255 | 0 |  |  |  |  |  | my $context = substr($$buffer, $position, 50); | 
| 256 | 0 |  |  |  |  |  | $pretext =~ s/.*\n//gs; | 
| 257 | 0 |  |  |  |  |  | $context =~ s/\n/\\n/g; | 
| 258 |  |  |  |  |  |  |  | 
| 259 | 0 |  |  |  |  |  | return <<"..."; | 
| 260 |  |  |  |  |  |  | Error parsing Pegex document: | 
| 261 | 0 |  |  |  |  |  | msg:      $msg | 
| 262 |  |  |  |  |  |  | line:     $line | 
| 263 |  |  |  |  |  |  | column:   $column | 
| 264 |  |  |  |  |  |  | context:  $pretext$context | 
| 265 |  |  |  |  |  |  | ${\ (' ' x (length($pretext) + 10) . '^')} | 
| 266 |  |  |  |  |  |  | position: $position ($real_pos pre-lookahead) | 
| 267 |  |  |  |  |  |  | ... | 
| 268 |  |  |  |  |  |  | } | 
| 269 |  |  |  |  |  |  |  | 
| 270 |  |  |  |  |  |  | # TODO Move this to a Parser helper role/subclass | 
| 271 |  |  |  |  |  |  | sub line_column { | 
| 272 | 0 |  |  | 0 | 0 |  | my ($self, $position) = @_; | 
| 273 | 0 |  | 0 |  |  |  | $position ||= $self->{position}; | 
| 274 | 0 |  |  |  |  |  | my $buffer = $self->{buffer}; | 
| 275 | 0 |  |  |  |  |  | my $line = @{[substr($$buffer, 0, $position) =~ /(\n)/g]} + 1; | 
|  | 0 |  |  |  |  |  |  | 
| 276 | 0 |  |  |  |  |  | my $column = $position - rindex($$buffer, "\n", $position); | 
| 277 | 0 |  |  |  |  |  | return [$line, $position]; | 
| 278 |  |  |  |  |  |  | } | 
| 279 |  |  |  |  |  |  |  | 
| 280 |  |  |  |  |  |  | # XXX Need to figure out what uses this. (sample.t) | 
| 281 |  |  |  |  |  |  | { | 
| 282 |  |  |  |  |  |  | package Pegex::Constant; | 
| 283 |  |  |  |  |  |  | our $Null = []; | 
| 284 |  |  |  |  |  |  | our $Dummy = []; | 
| 285 |  |  |  |  |  |  | } | 
| 286 |  |  |  |  |  |  |  | 
| 287 |  |  |  |  |  |  | 1; |