| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
## Base Class for Creating Text Format Parsers |
|
2
|
|
|
|
|
|
|
# |
|
3
|
|
|
|
|
|
|
# Document::Parser is a base class that you can use to easily generate a |
|
4
|
|
|
|
|
|
|
# parser for text document markups (like Wiki or POD markups). |
|
5
|
|
|
|
|
|
|
# |
|
6
|
|
|
|
|
|
|
# See this parser as an example: |
|
7
|
|
|
|
|
|
|
# |
|
8
|
|
|
|
|
|
|
# http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Parser.pm |
|
9
|
|
|
|
|
|
|
# |
|
10
|
|
|
|
|
|
|
# And this module for usage of the parser: |
|
11
|
|
|
|
|
|
|
# |
|
12
|
|
|
|
|
|
|
# http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Formatter2.pm |
|
13
|
|
|
|
|
|
|
# |
|
14
|
|
|
|
|
|
|
# Copyright (c) 2007. Ingy döt Net. All rights reserved. |
|
15
|
|
|
|
|
|
|
# |
|
16
|
|
|
|
|
|
|
# Licensed under the same terms as Perl itself. |
|
17
|
|
|
|
|
|
|
## |
|
18
|
|
|
|
|
|
|
package Document::Parser; |
|
19
|
1
|
|
|
1
|
|
367
|
use strict; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
27
|
|
|
20
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
1120
|
|
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
## Synopsis: |
|
23
|
|
|
|
|
|
|
# |
|
24
|
|
|
|
|
|
|
# package MyParser; |
|
25
|
|
|
|
|
|
|
# use base 'Document::Parser'; |
|
26
|
|
|
|
|
|
|
# |
|
27
|
|
|
|
|
|
|
# sub create_grammar { |
|
28
|
|
|
|
|
|
|
# return { |
|
29
|
|
|
|
|
|
|
# # ... define a grammar hash here ... |
|
30
|
|
|
|
|
|
|
# }; |
|
31
|
|
|
|
|
|
|
# } |
|
32
|
|
|
|
|
|
|
## |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
##------------------------------------------------------------------------------ |
|
35
|
|
|
|
|
|
|
# Parser object constructor/initializer |
|
36
|
|
|
|
|
|
|
##------------------------------------------------------------------------------ |
|
37
|
|
|
|
|
|
|
sub new { |
|
38
|
0
|
|
|
0
|
0
|
|
my $class = shift; |
|
39
|
0
|
|
0
|
|
|
|
return bless { @_ }, ref($class) || $class; |
|
40
|
|
|
|
|
|
|
} |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
##------------------------------------------------------------------------------ |
|
43
|
|
|
|
|
|
|
# $parsed = $parser->parse($wikitext); |
|
44
|
|
|
|
|
|
|
##------------------------------------------------------------------------------ |
|
45
|
|
|
|
|
|
|
sub parse { |
|
46
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
47
|
0
|
|
0
|
|
|
|
$self->{input} ||= shift; |
|
48
|
0
|
|
0
|
|
|
|
$self->{grammar} ||= $self->set_grammar; |
|
49
|
0
|
|
0
|
|
|
|
$self->{receiver} ||= $self->set_receiver; |
|
50
|
0
|
|
|
|
|
|
$self->{receiver}->init; |
|
51
|
0
|
|
|
|
|
|
$self->parse_blocks('top'); |
|
52
|
0
|
|
|
|
|
|
return $self->{receiver}->content; |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
## |
|
56
|
|
|
|
|
|
|
# Call `set_receiver` to reset the receiver for a new parse. |
|
57
|
|
|
|
|
|
|
sub set_receiver { |
|
58
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
59
|
0
|
|
0
|
|
|
|
$self->{receiver} = shift || $self->create_receiver; |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
sub set_grammar { |
|
63
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
64
|
0
|
|
0
|
|
|
|
$self->{grammar} = shift || $self->create_grammar; |
|
65
|
|
|
|
|
|
|
} |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
68
|
|
|
|
|
|
|
# Parse input into a series of blocks. With each iteration the parser must |
|
69
|
|
|
|
|
|
|
# match a block at position 0 of the text, and remove that block from the |
|
70
|
|
|
|
|
|
|
# input reparse it further. This continues until there is no input left. |
|
71
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
72
|
|
|
|
|
|
|
sub parse_blocks { |
|
73
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
74
|
0
|
|
|
|
|
|
my $container_type = shift; |
|
75
|
0
|
|
|
|
|
|
my $types = $self->{grammar}{$container_type}{blocks}; |
|
76
|
0
|
|
|
|
|
|
while (my $length = length $self->{input}) { |
|
77
|
0
|
|
|
|
|
|
for my $type (@$types) { |
|
78
|
0
|
0
|
|
|
|
|
my $matched = $self->find_match(matched_block => $type) or next; |
|
79
|
0
|
|
|
|
|
|
substr($self->{input}, 0, $matched->{end}, ''); |
|
80
|
0
|
|
|
|
|
|
$self->handle_match($type, $matched); |
|
81
|
0
|
|
|
|
|
|
last; |
|
82
|
|
|
|
|
|
|
} |
|
83
|
0
|
0
|
|
|
|
|
die $self->reduction_error |
|
84
|
|
|
|
|
|
|
unless length($self->{input}) < $length; |
|
85
|
|
|
|
|
|
|
} |
|
86
|
0
|
|
|
|
|
|
return; |
|
87
|
|
|
|
|
|
|
} |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
90
|
|
|
|
|
|
|
# This code parses a chunk into interleaved pieces of plain text and |
|
91
|
|
|
|
|
|
|
# phrases. It repeatedly tries to match every possible phrase and |
|
92
|
|
|
|
|
|
|
# then takes the match closest to the start. Everything before a |
|
93
|
|
|
|
|
|
|
# match is written as text. Matched phrases are subparsed according |
|
94
|
|
|
|
|
|
|
# to their rules. This continues until the input is all eaten. |
|
95
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
96
|
|
|
|
|
|
|
sub parse_phrases { |
|
97
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
98
|
0
|
|
|
|
|
|
my $container_type = shift; |
|
99
|
0
|
|
|
|
|
|
my $types = $self->{grammar}{$container_type}{phrases}; |
|
100
|
0
|
|
|
|
|
|
while (length $self->{input}) { |
|
101
|
0
|
|
|
|
|
|
my $match; |
|
102
|
0
|
|
|
|
|
|
for my $type (@$types) { |
|
103
|
0
|
0
|
|
|
|
|
my $matched = $self->find_match(matched_phrase => $type) or next; |
|
104
|
0
|
0
|
0
|
|
|
|
if (not defined $match or $matched->{begin} < $match->{begin}) { |
|
105
|
0
|
|
|
|
|
|
$match = $matched; |
|
106
|
0
|
|
|
|
|
|
$match->{type} = $type; |
|
107
|
0
|
0
|
|
|
|
|
last if $match->{begin} == 0; |
|
108
|
|
|
|
|
|
|
} |
|
109
|
|
|
|
|
|
|
} |
|
110
|
0
|
0
|
|
|
|
|
if (! $match) { |
|
111
|
0
|
|
|
|
|
|
$self->{receiver}->text_node($self->{input}); |
|
112
|
0
|
|
|
|
|
|
last; |
|
113
|
|
|
|
|
|
|
} |
|
114
|
0
|
|
|
|
|
|
my ($begin, $end, $type) = @{$match}{qw(begin end type)}; |
|
|
0
|
|
|
|
|
|
|
|
115
|
0
|
0
|
|
|
|
|
$self->{receiver}->text_node(substr($self->{input}, 0, $begin)) |
|
116
|
|
|
|
|
|
|
unless $begin == 0; |
|
117
|
0
|
|
|
|
|
|
substr($self->{input}, 0, $end, ''); |
|
118
|
0
|
|
|
|
|
|
$type = $match->{type}; |
|
119
|
0
|
|
|
|
|
|
$self->handle_match($type, $match); |
|
120
|
|
|
|
|
|
|
} |
|
121
|
0
|
|
|
|
|
|
return; |
|
122
|
|
|
|
|
|
|
} |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub find_match { |
|
125
|
0
|
|
|
0
|
0
|
|
my ($self, $matched_func, $type) = @_; |
|
126
|
0
|
|
|
|
|
|
my $matched; |
|
127
|
0
|
0
|
|
|
|
|
if (my $regexp = $self->{grammar}{$type}{match}) { |
|
128
|
0
|
0
|
|
|
|
|
if (ref($regexp) eq 'ARRAY') { |
|
129
|
0
|
|
|
|
|
|
for my $re (@$regexp) { |
|
130
|
0
|
0
|
|
|
|
|
if ($self->{input} =~ $re) { |
|
131
|
0
|
|
|
|
|
|
$matched = $self->$matched_func; |
|
132
|
0
|
|
|
|
|
|
last; |
|
133
|
|
|
|
|
|
|
} |
|
134
|
|
|
|
|
|
|
} |
|
135
|
0
|
0
|
|
|
|
|
return unless $matched; |
|
136
|
|
|
|
|
|
|
} |
|
137
|
|
|
|
|
|
|
else { |
|
138
|
0
|
0
|
|
|
|
|
return unless $self->{input} =~ $regexp; |
|
139
|
0
|
|
|
|
|
|
$matched = $self->$matched_func; |
|
140
|
|
|
|
|
|
|
} |
|
141
|
|
|
|
|
|
|
} |
|
142
|
|
|
|
|
|
|
else { |
|
143
|
0
|
|
|
|
|
|
my $func = "match_$type"; |
|
144
|
0
|
0
|
|
|
|
|
$matched = $self->$func or return; |
|
145
|
|
|
|
|
|
|
} |
|
146
|
0
|
|
|
|
|
|
return $matched; |
|
147
|
|
|
|
|
|
|
} |
|
148
|
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
sub handle_match { |
|
150
|
0
|
|
|
0
|
0
|
|
my ($self, $type, $match) = @_; |
|
151
|
0
|
|
|
|
|
|
my $func = "handle_$type"; |
|
152
|
0
|
0
|
|
|
|
|
if ($self->can($func)) { |
|
153
|
0
|
|
|
|
|
|
$self->$func($match, $type); |
|
154
|
|
|
|
|
|
|
} |
|
155
|
|
|
|
|
|
|
else { |
|
156
|
0
|
|
|
|
|
|
my $grammar = $self->{grammar}{$type}; |
|
157
|
0
|
0
|
|
|
|
|
my $parse = $grammar->{blocks} |
|
158
|
|
|
|
|
|
|
? 'parse_blocks' |
|
159
|
|
|
|
|
|
|
: 'parse_phrases'; |
|
160
|
0
|
0
|
|
|
|
|
my @filter = $grammar->{filter} |
|
161
|
|
|
|
|
|
|
? ($grammar->{filter}) |
|
162
|
|
|
|
|
|
|
: (); |
|
163
|
0
|
|
|
|
|
|
$self->subparse($parse, $match, $type, @filter); |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
} |
|
166
|
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
sub subparse { |
|
168
|
0
|
|
|
0
|
0
|
|
my ($self, $func, $match, $type, $filter) = @_; |
|
169
|
0
|
|
|
|
|
|
$self->{receiver}->begin_node($type); |
|
170
|
|
|
|
|
|
|
my $parser = $self->new( |
|
171
|
|
|
|
|
|
|
grammar => $self->{grammar}, |
|
172
|
|
|
|
|
|
|
receiver => $self->{receiver}->new, |
|
173
|
|
|
|
|
|
|
input => $filter |
|
174
|
0
|
0
|
|
|
|
|
? do { $_ = $match->{text}; &$filter(); $_ } |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
: $match->{text}, |
|
176
|
|
|
|
|
|
|
); |
|
177
|
0
|
|
|
|
|
|
$parser->$func($type); |
|
178
|
0
|
|
|
|
|
|
$self->{receiver}->insert($parser->{receiver}); |
|
179
|
0
|
|
|
|
|
|
$self->{receiver}->end_node($type); |
|
180
|
|
|
|
|
|
|
} |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
183
|
|
|
|
|
|
|
# Helper functions |
|
184
|
|
|
|
|
|
|
# |
|
185
|
|
|
|
|
|
|
# These are the odds and ends called by the code above. |
|
186
|
|
|
|
|
|
|
#------------------------------------------------------------------------------- |
|
187
|
|
|
|
|
|
|
|
|
188
|
|
|
|
|
|
|
sub reduction_error { |
|
189
|
0
|
|
|
0
|
0
|
|
my $self = shift; |
|
190
|
0
|
|
|
|
|
|
return ref($self) . qq[ reduction error for:\n"$self->{input}"]; |
|
191
|
|
|
|
|
|
|
} |
|
192
|
|
|
|
|
|
|
|
|
193
|
|
|
|
|
|
|
sub matched_block { |
|
194
|
0
|
0
|
|
0
|
0
|
|
my $begin = defined $_[2] ? $_[2] : $-[0]; |
|
195
|
0
|
0
|
|
|
|
|
die "All blocks must match at position 0" |
|
196
|
|
|
|
|
|
|
if "$begin" ne "0"; |
|
197
|
|
|
|
|
|
|
|
|
198
|
|
|
|
|
|
|
return +{ |
|
199
|
0
|
|
0
|
|
|
|
text => ($_[1] || $1), |
|
|
|
|
0
|
|
|
|
|
|
200
|
|
|
|
|
|
|
end => ($_[3] || $+[0]), |
|
201
|
|
|
|
|
|
|
}; |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
sub matched_phrase { |
|
205
|
|
|
|
|
|
|
return +{ |
|
206
|
0
|
0
|
0
|
0
|
0
|
|
text => ($_[1] || $1), |
|
|
|
|
0
|
|
|
|
|
|
207
|
|
|
|
|
|
|
begin => (defined $_[2] ? $_[2] : $-[0]), |
|
208
|
|
|
|
|
|
|
end => ($_[3] || $+[0]), |
|
209
|
|
|
|
|
|
|
}; |
|
210
|
|
|
|
|
|
|
} |
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
1; |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=for perldoc |
|
215
|
|
|
|
|
|
|
This POD generated by Perldoc-0.21. |
|
216
|
|
|
|
|
|
|
DO NOT EDIT. Your changes will be lost. |
|
217
|
|
|
|
|
|
|
|
|
218
|
|
|
|
|
|
|
=encoding utf8 |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
=head1 NAME |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
Document::Parser - Base Class for Creating Text Format Parsers |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
225
|
|
|
|
|
|
|
|
|
226
|
|
|
|
|
|
|
package MyParser; |
|
227
|
|
|
|
|
|
|
use base 'Document::Parser'; |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub create_grammar { |
|
230
|
|
|
|
|
|
|
return { |
|
231
|
|
|
|
|
|
|
# ... define a grammar hash here ... |
|
232
|
|
|
|
|
|
|
}; |
|
233
|
|
|
|
|
|
|
} |
|
234
|
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
Document::Parser is a base class that you can use to easily generate a |
|
238
|
|
|
|
|
|
|
parser for text document markups (like Wiki or POD markups). |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
See this parser as an example: |
|
241
|
|
|
|
|
|
|
|
|
242
|
|
|
|
|
|
|
http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Parser.pm |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
And this module for usage of the parser: |
|
245
|
|
|
|
|
|
|
|
|
246
|
|
|
|
|
|
|
http://svn.kwiki.org/kwiki/trunk/src/core/Spork/lib/Spork/Formatter2.pm |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
=head1 AUTHOR |
|
249
|
|
|
|
|
|
|
|
|
250
|
|
|
|
|
|
|
Ingy döt Net |
|
251
|
|
|
|
|
|
|
|
|
252
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
253
|
|
|
|
|
|
|
|
|
254
|
|
|
|
|
|
|
Copyright (c) 2007. Ingy döt Net. All rights reserved. |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
|
257
|
|
|
|
|
|
|
under the same terms as Perl itself. |
|
258
|
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
See http://www.perl.com/perl/misc/Artistic.html |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=cut |