| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Text::MediawikiFormat; |
|
2
|
|
|
|
|
|
|
|
|
3
|
14
|
|
|
14
|
|
190398
|
use strict; |
|
|
14
|
|
|
|
|
30
|
|
|
|
14
|
|
|
|
|
632
|
|
|
4
|
14
|
|
|
14
|
|
97
|
use warnings::register; |
|
|
14
|
|
|
|
|
22
|
|
|
|
14
|
|
|
|
|
2453
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
=head1 NAME |
|
7
|
|
|
|
|
|
|
|
|
8
|
|
|
|
|
|
|
Text::MediawikiFormat - Translate Wiki markup into other text formats |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
=head1 VERSION |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
Version 1.02 |
|
13
|
|
|
|
|
|
|
|
|
14
|
|
|
|
|
|
|
=cut |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
our $VERSION = '1.02'; |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
use Text::MediawikiFormat 'wikiformat'; |
|
21
|
|
|
|
|
|
|
my $html = wikiformat ($raw); |
|
22
|
|
|
|
|
|
|
my $text = wikiformat ($raw, {}, {implicit_links => 1}); |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
L and its sister projects use the PHP Mediawiki to format |
|
27
|
|
|
|
|
|
|
their pages. This module attempts to duplicate the Mediawiki formatting rules. |
|
28
|
|
|
|
|
|
|
Those formatting rules can be simple and easy to use, while providing more |
|
29
|
|
|
|
|
|
|
advanced options for the power user. They are also easy to translate into |
|
30
|
|
|
|
|
|
|
other, more complicated markup languages with this module. It creates HTML by |
|
31
|
|
|
|
|
|
|
default, but could produce valid POD, DocBook, XML, or any other format |
|
32
|
|
|
|
|
|
|
imaginable. |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
The most important function is C. It is |
|
35
|
|
|
|
|
|
|
not exported by default, but will be exported as C if any |
|
36
|
|
|
|
|
|
|
options at all are passed to the exporter, unless the name is overridden |
|
37
|
|
|
|
|
|
|
explicitly. See L<"EXPORT"> for more information. |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
It should be noted that this module is written as a drop in replacement for |
|
40
|
|
|
|
|
|
|
L that expands on that modules functionality and provides |
|
41
|
|
|
|
|
|
|
a default rule set that may be used to format text like the PHP Mediawiki. It |
|
42
|
|
|
|
|
|
|
is also well to note early that if you just want a Mediawiki clone (you don't |
|
43
|
|
|
|
|
|
|
need to customize it heavily and you want integration with a back end |
|
44
|
|
|
|
|
|
|
database), you should look at L. |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
=cut |
|
47
|
|
|
|
|
|
|
|
|
48
|
14
|
|
|
14
|
|
75
|
use Carp qw(carp confess croak); |
|
|
14
|
|
|
|
|
22
|
|
|
|
14
|
|
|
|
|
990
|
|
|
49
|
14
|
|
|
14
|
|
18971
|
use CGI qw(:standard); |
|
|
14
|
|
|
|
|
192132
|
|
|
|
14
|
|
|
|
|
101
|
|
|
50
|
14
|
|
|
14
|
|
41488
|
use Scalar::Util qw(blessed); |
|
|
14
|
|
|
|
|
26
|
|
|
|
14
|
|
|
|
|
1430
|
|
|
51
|
14
|
|
|
14
|
|
6409
|
use Text::MediawikiFormat::Blocks; |
|
|
14
|
|
|
|
|
30
|
|
|
|
14
|
|
|
|
|
84
|
|
|
52
|
14
|
|
|
14
|
|
22461
|
use URI; |
|
|
14
|
|
|
|
|
55564
|
|
|
|
14
|
|
|
|
|
496
|
|
|
53
|
14
|
|
|
14
|
|
105
|
use URI::Escape qw(uri_escape uri_escape_utf8); |
|
|
14
|
|
|
|
|
21
|
|
|
|
14
|
|
|
|
|
973
|
|
|
54
|
|
|
|
|
|
|
|
|
55
|
14
|
|
|
|
|
1593
|
use vars qw($missing_html_packages %tags %opts %merge_matrix |
|
56
|
14
|
|
|
14
|
|
66
|
$uric $uricCheat $uriCruft); |
|
|
14
|
|
|
|
|
20
|
|
|
57
|
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
BEGIN |
|
59
|
|
|
|
|
|
|
{ |
|
60
|
|
|
|
|
|
|
# Try to load optional HTML packages, recording any errors. |
|
61
|
14
|
|
|
14
|
|
28
|
eval {require HTML::Parser}; |
|
|
14
|
|
|
|
|
10287
|
|
|
62
|
14
|
|
|
|
|
72543
|
$missing_html_packages = $@; |
|
63
|
14
|
|
|
|
|
30
|
eval {require HTML::Tagset}; |
|
|
14
|
|
|
|
|
8192
|
|
|
64
|
14
|
|
|
|
|
32582
|
$missing_html_packages .= $@; |
|
65
|
|
|
|
|
|
|
} |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
### |
|
70
|
|
|
|
|
|
|
### Defaults |
|
71
|
|
|
|
|
|
|
### |
|
72
|
|
|
|
|
|
|
%tags = |
|
73
|
|
|
|
|
|
|
( |
|
74
|
|
|
|
|
|
|
indent => qr/^(?:[:*#;]*)(?=[:*#;])/, |
|
75
|
|
|
|
|
|
|
link => \&_make_html_link, |
|
76
|
|
|
|
|
|
|
strong => sub {"$_[0]"}, |
|
77
|
|
|
|
|
|
|
emphasized => sub {"$_[0]"}, |
|
78
|
|
|
|
|
|
|
strong_tag => qr/'''(.+?)'''/, |
|
79
|
|
|
|
|
|
|
emphasized_tag => qr/''(.+?)''/, |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
code => ['', " \n", '', "\n"], |
|
82
|
|
|
|
|
|
|
line => ['', '', ' ', "\n"], |
|
83
|
|
|
|
|
|
|
paragraph => [" ", " \n", '', "\n", 1], |
|
84
|
|
|
|
|
|
|
paragraph_break => ['', '', '', "\n"], |
|
85
|
|
|
|
|
|
|
unordered => ["\n", '', "\n"], |
|
86
|
|
|
|
|
|
|
ordered => ["\n", " \n", '', "\n"], |
|
87
|
|
|
|
|
|
|
definition => ["\n", " \n", \&_dl], |
|
88
|
|
|
|
|
|
|
header => ['', "\n", \&_make_header], |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
blocks => |
|
91
|
|
|
|
|
|
|
{ |
|
92
|
|
|
|
|
|
|
code => qr/^ /, |
|
93
|
|
|
|
|
|
|
header => qr/^(=+)\s*(.+?)\s*\1$/, |
|
94
|
|
|
|
|
|
|
line => qr/^-{4,}$/, |
|
95
|
|
|
|
|
|
|
ordered => qr/^#\s*/, |
|
96
|
|
|
|
|
|
|
unordered => qr/^\*\s*/, |
|
97
|
|
|
|
|
|
|
definition => qr/^([;:])\s*/, |
|
98
|
|
|
|
|
|
|
paragraph => qr/^/, |
|
99
|
|
|
|
|
|
|
paragraph_break => qr/^\s*$/, |
|
100
|
|
|
|
|
|
|
}, |
|
101
|
|
|
|
|
|
|
|
|
102
|
|
|
|
|
|
|
indented => {map {$_ => 1} qw(ordered unordered definition)}, |
|
103
|
|
|
|
|
|
|
nests => {map {$_ => 1} qw(ordered unordered definition)}, |
|
104
|
|
|
|
|
|
|
nests_anywhere => {map {$_ => 1} qw(nowiki)}, |
|
105
|
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
blockorder => [qw(code header line ordered unordered definition |
|
107
|
|
|
|
|
|
|
paragraph_break paragraph)], |
|
108
|
|
|
|
|
|
|
implicit_link_delimiters |
|
109
|
|
|
|
|
|
|
=> qr!\b(?:[A-Z][a-z0-9]\w*){2,}!, |
|
110
|
|
|
|
|
|
|
extended_link_delimiters |
|
111
|
|
|
|
|
|
|
=> qr!\[(?:\[[^][]*\]|[^][]*)\]!, |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
schemas => [qw(http https ftp mailto gopher)], |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
unformatted_blocks => [qw(header nowiki pre)], |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
allowed_tags => [#HTML |
|
118
|
|
|
|
|
|
|
qw(b big blockquote br caption center cite code dd |
|
119
|
|
|
|
|
|
|
div dl dt em font h1 h2 h3 h4 h5 h6 hr i li ol p |
|
120
|
|
|
|
|
|
|
pre rb rp rt ruby s samp small strike strong sub |
|
121
|
|
|
|
|
|
|
sup table td th tr tt u ul var), |
|
122
|
|
|
|
|
|
|
# Mediawiki Specific |
|
123
|
|
|
|
|
|
|
qw(nowiki),], |
|
124
|
|
|
|
|
|
|
allowed_attrs => [qw(title align lang dir width height bgcolor), |
|
125
|
|
|
|
|
|
|
qw(clear), # BR |
|
126
|
|
|
|
|
|
|
qw(noshade), # HR |
|
127
|
|
|
|
|
|
|
qw(cite), # BLOCKQUOTE, Q |
|
128
|
|
|
|
|
|
|
qw(size face color), # FONT |
|
129
|
|
|
|
|
|
|
# For various lists, mostly deprecated but safe |
|
130
|
|
|
|
|
|
|
qw(type start value compact), |
|
131
|
|
|
|
|
|
|
# Tables |
|
132
|
|
|
|
|
|
|
qw(summary width border frame rules cellspacing |
|
133
|
|
|
|
|
|
|
cellpadding valign char charoff colgroup col |
|
134
|
|
|
|
|
|
|
span abbr axis headers scope rowspan colspan), |
|
135
|
|
|
|
|
|
|
qw(id class name style), # For CSS |
|
136
|
|
|
|
|
|
|
], |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
_toc => [], |
|
139
|
|
|
|
|
|
|
); |
|
140
|
|
|
|
|
|
|
|
|
141
|
|
|
|
|
|
|
%opts = |
|
142
|
|
|
|
|
|
|
( |
|
143
|
|
|
|
|
|
|
extended => 1, |
|
144
|
|
|
|
|
|
|
implicit_links => 0, |
|
145
|
|
|
|
|
|
|
absolute_links => 1, |
|
146
|
|
|
|
|
|
|
prefix => '', |
|
147
|
|
|
|
|
|
|
process_html => 1, |
|
148
|
|
|
|
|
|
|
charset => 'utf-8', |
|
149
|
|
|
|
|
|
|
); |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
# Make sure import's argument hash contains an `as' entry. `as' defaults to |
|
152
|
|
|
|
|
|
|
# `wikiformat' when none is given. |
|
153
|
|
|
|
|
|
|
sub _process_args |
|
154
|
|
|
|
|
|
|
{ |
|
155
|
15
|
|
|
15
|
|
25
|
shift; # Class |
|
156
|
15
|
100
|
|
|
|
53
|
return as => shift if @_ == 1; |
|
157
|
14
|
|
|
|
|
97
|
return as => 'wikiformat', @_; |
|
158
|
|
|
|
|
|
|
} |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# Delete the options (prefix, extended, implicit_links, ...) from a hash, |
|
161
|
|
|
|
|
|
|
# returning a new hash with the deleted options. |
|
162
|
|
|
|
|
|
|
sub _extract_opts |
|
163
|
|
|
|
|
|
|
{ |
|
164
|
15
|
|
|
15
|
|
23
|
my %newopts; |
|
165
|
|
|
|
|
|
|
|
|
166
|
15
|
|
|
|
|
40
|
for my $key (qw{prefix extended implicit_links absolute_links |
|
167
|
|
|
|
|
|
|
process_html debug}) |
|
168
|
|
|
|
|
|
|
{ |
|
169
|
90
|
100
|
|
|
|
260
|
if (defined (my $val = delete $_[0]->{$key})) |
|
170
|
|
|
|
|
|
|
{ |
|
171
|
19
|
|
|
|
|
39
|
$newopts{$key} = $val; |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
|
|
175
|
15
|
|
|
|
|
58
|
return \%newopts; |
|
176
|
|
|
|
|
|
|
} |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
# Shamelessly ripped from Hash::Merge, which doesn't work in a threaded |
|
179
|
|
|
|
|
|
|
# environment with two threads trying to use different merge matrices. |
|
180
|
|
|
|
|
|
|
%merge_matrix = |
|
181
|
|
|
|
|
|
|
( |
|
182
|
|
|
|
|
|
|
SCALAR => |
|
183
|
|
|
|
|
|
|
{ |
|
184
|
|
|
|
|
|
|
SCALAR => sub {return $_[0]}, |
|
185
|
|
|
|
|
|
|
ARRAY => sub {# Need to be able to replace scalar with array |
|
186
|
|
|
|
|
|
|
# for extended_link_delimiters (could be array |
|
187
|
|
|
|
|
|
|
# or regex). |
|
188
|
|
|
|
|
|
|
return $_[0];}, |
|
189
|
|
|
|
|
|
|
HASH => sub {confess "Attempt to replace hash with scalar" |
|
190
|
|
|
|
|
|
|
if defined $_[0]; |
|
191
|
|
|
|
|
|
|
return _clone ($_[1]);} |
|
192
|
|
|
|
|
|
|
}, |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
ARRAY => |
|
195
|
|
|
|
|
|
|
{ |
|
196
|
|
|
|
|
|
|
SCALAR => sub {# Need to be able to replace array with scalar |
|
197
|
|
|
|
|
|
|
# for extended_link_delimiters (could be array |
|
198
|
|
|
|
|
|
|
# or regex). |
|
199
|
|
|
|
|
|
|
return _clone ($_[0]);}, |
|
200
|
|
|
|
|
|
|
ARRAY => sub {return _clone ($_[0]);}, |
|
201
|
|
|
|
|
|
|
HASH => sub {confess "Attempt to replace hash with array"} |
|
202
|
|
|
|
|
|
|
}, |
|
203
|
|
|
|
|
|
|
|
|
204
|
|
|
|
|
|
|
HASH => |
|
205
|
|
|
|
|
|
|
{ |
|
206
|
|
|
|
|
|
|
SCALAR => sub {confess "Attempt to replace scalar with hash"}, |
|
207
|
|
|
|
|
|
|
ARRAY => sub {confess "Attempt to replace array with hash"}, |
|
208
|
|
|
|
|
|
|
HASH => sub {_merge_hash_elements ($_[0], $_[1])} |
|
209
|
|
|
|
|
|
|
} |
|
210
|
|
|
|
|
|
|
); |
|
211
|
|
|
|
|
|
|
# Return arrays and a deep copy of hashes. |
|
212
|
|
|
|
|
|
|
sub _clone |
|
213
|
|
|
|
|
|
|
{ |
|
214
|
2890
|
|
|
2890
|
|
2298
|
my ($obj) = @_; |
|
215
|
2890
|
|
|
|
|
1807
|
my $type; |
|
216
|
2890
|
50
|
|
|
|
5772
|
if (!defined $obj) { # Perl 5.005 compatibility |
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
217
|
0
|
|
|
|
|
0
|
$type = 'SCALAR'; |
|
218
|
|
|
|
|
|
|
} elsif (ref $obj eq 'HASH') { |
|
219
|
231
|
|
|
|
|
205
|
$type = 'HASH'; |
|
220
|
|
|
|
|
|
|
} elsif (ref $obj eq 'ARRAY') { |
|
221
|
913
|
|
|
|
|
719
|
$type = 'ARRAY'; |
|
222
|
|
|
|
|
|
|
} else { |
|
223
|
1746
|
|
|
|
|
1398
|
$type = 'SCALAR'; |
|
224
|
|
|
|
|
|
|
} |
|
225
|
|
|
|
|
|
|
|
|
226
|
2890
|
100
|
|
|
|
5759
|
return $obj if $type eq 'SCALAR'; |
|
227
|
1144
|
100
|
|
|
|
2389
|
return $obj if $type eq 'ARRAY'; |
|
228
|
|
|
|
|
|
|
|
|
229
|
231
|
|
|
|
|
171
|
my %copy; |
|
230
|
231
|
|
|
|
|
582
|
foreach my $key (keys %$obj) |
|
231
|
|
|
|
|
|
|
{ |
|
232
|
800
|
|
|
|
|
1199
|
$copy{$key} = _clone ($obj->{$key}); |
|
233
|
|
|
|
|
|
|
} |
|
234
|
231
|
|
|
|
|
501
|
return \%copy; |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
# This does a straight merge of hashes, delegating the merge-specific |
|
237
|
|
|
|
|
|
|
# work to '_merge_hashes'. |
|
238
|
|
|
|
|
|
|
sub _merge_hash_elements |
|
239
|
|
|
|
|
|
|
{ |
|
240
|
169
|
|
|
169
|
|
166
|
my ($left, $right) = @_; |
|
241
|
169
|
50
|
33
|
|
|
927
|
die "Arguments for _merge_hash_elements must be hash references" unless |
|
242
|
|
|
|
|
|
|
UNIVERSAL::isa ($left, 'HASH') && UNIVERSAL::isa ($right, 'HASH'); |
|
243
|
|
|
|
|
|
|
|
|
244
|
169
|
|
|
|
|
157
|
my %newhash; |
|
245
|
169
|
|
|
|
|
409
|
foreach my $leftkey (keys %$left) |
|
246
|
|
|
|
|
|
|
{ |
|
247
|
243
|
100
|
|
|
|
352
|
if (exists $right->{$leftkey}) |
|
248
|
|
|
|
|
|
|
{ |
|
249
|
235
|
|
|
|
|
394
|
$newhash{$leftkey} = |
|
250
|
|
|
|
|
|
|
_merge_hashes ($left->{$leftkey}, $right->{$leftkey}); |
|
251
|
|
|
|
|
|
|
} |
|
252
|
|
|
|
|
|
|
else |
|
253
|
|
|
|
|
|
|
{ |
|
254
|
8
|
|
|
|
|
14
|
$newhash{$leftkey} = _clone ($left->{$leftkey}); |
|
255
|
|
|
|
|
|
|
} |
|
256
|
|
|
|
|
|
|
} |
|
257
|
169
|
|
|
|
|
671
|
foreach my $rightkey (keys %$right) |
|
258
|
|
|
|
|
|
|
{ |
|
259
|
2266
|
100
|
|
|
|
4505
|
$newhash{$rightkey} = _clone ($right->{$rightkey}) |
|
260
|
|
|
|
|
|
|
if !exists $left->{$rightkey}; |
|
261
|
|
|
|
|
|
|
} |
|
262
|
169
|
|
|
|
|
480
|
return \%newhash; |
|
263
|
|
|
|
|
|
|
} |
|
264
|
|
|
|
|
|
|
sub _merge_hashes |
|
265
|
|
|
|
|
|
|
{ |
|
266
|
371
|
|
|
371
|
|
4481
|
my ($left, $right) = @_; |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
# if one argument or the other is undefined or empty, don't worry about |
|
269
|
|
|
|
|
|
|
# copying, just return the original. |
|
270
|
371
|
50
|
|
|
|
541
|
return $right unless defined $left; |
|
271
|
371
|
50
|
|
|
|
517
|
return $left unless defined $right; |
|
272
|
|
|
|
|
|
|
|
|
273
|
|
|
|
|
|
|
# For the general use of this function, we want to create duplicates |
|
274
|
|
|
|
|
|
|
# of all data that is merged. |
|
275
|
|
|
|
|
|
|
|
|
276
|
371
|
|
|
|
|
278
|
my ($lefttype, $righttype); |
|
277
|
371
|
100
|
|
|
|
720
|
if (ref $left eq 'HASH') { |
|
|
|
100
|
|
|
|
|
|
|
278
|
169
|
|
|
|
|
307
|
$lefttype = 'HASH'; |
|
279
|
|
|
|
|
|
|
} elsif (ref $left eq 'ARRAY') { |
|
280
|
51
|
|
|
|
|
54
|
$lefttype = 'ARRAY'; |
|
281
|
|
|
|
|
|
|
} else { |
|
282
|
151
|
|
|
|
|
151
|
$lefttype = 'SCALAR'; |
|
283
|
|
|
|
|
|
|
} |
|
284
|
|
|
|
|
|
|
|
|
285
|
371
|
100
|
|
|
|
599
|
if (ref $right eq 'HASH') { |
|
|
|
100
|
|
|
|
|
|
|
286
|
169
|
|
|
|
|
149
|
$righttype = 'HASH'; |
|
287
|
|
|
|
|
|
|
} elsif (ref $right eq 'ARRAY') { |
|
288
|
49
|
|
|
|
|
36
|
$righttype = 'ARRAY'; |
|
289
|
|
|
|
|
|
|
} else { |
|
290
|
153
|
|
|
|
|
153
|
$righttype = 'SCALAR'; |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
371
|
|
|
|
|
734
|
return $merge_matrix{$lefttype}->{$righttype} ($left, $right); |
|
294
|
|
|
|
|
|
|
} |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
sub _require_html_packages |
|
297
|
|
|
|
|
|
|
{ |
|
298
|
1
|
50
|
|
1
|
|
3
|
croak "$missing_html_packages\n" |
|
299
|
|
|
|
|
|
|
. "HTML::Parser & HTML::Tagset is required for process_html\n" |
|
300
|
|
|
|
|
|
|
if $missing_html_packages; |
|
301
|
|
|
|
|
|
|
} |
|
302
|
|
|
|
|
|
|
|
|
303
|
|
|
|
|
|
|
sub import |
|
304
|
|
|
|
|
|
|
{ |
|
305
|
18
|
100
|
|
18
|
|
4362
|
return unless @_ > 1; |
|
306
|
|
|
|
|
|
|
|
|
307
|
15
|
|
|
|
|
28
|
my $class = shift; |
|
308
|
15
|
|
|
|
|
57
|
my %args = $class->_process_args (@_); |
|
309
|
15
|
|
|
|
|
42
|
my $name = delete $args{as}; |
|
310
|
|
|
|
|
|
|
|
|
311
|
15
|
|
|
|
|
40
|
my $caller = caller(); |
|
312
|
15
|
|
|
|
|
62
|
my $iopts = _merge_hashes _extract_opts (\%args), \%opts; |
|
313
|
15
|
|
|
|
|
54
|
my $itags = _merge_hashes \%args, \%tags; |
|
314
|
|
|
|
|
|
|
|
|
315
|
15
|
100
|
|
|
|
68
|
_require_html_packages |
|
316
|
|
|
|
|
|
|
if $iopts->{process_html}; |
|
317
|
|
|
|
|
|
|
|
|
318
|
|
|
|
|
|
|
# Could verify ITAGS here via _check_blocks, but what if a user |
|
319
|
|
|
|
|
|
|
# wants to add a block to block_order that they intend to override |
|
320
|
|
|
|
|
|
|
# the implementation of with every call to format()? |
|
321
|
|
|
|
|
|
|
|
|
322
|
14
|
|
|
14
|
|
102
|
no strict 'refs'; |
|
|
14
|
|
|
|
|
118
|
|
|
|
14
|
|
|
|
|
49162
|
|
|
323
|
15
|
|
|
|
|
17715
|
*{ $caller . "::" . $name } = sub |
|
324
|
|
|
|
|
|
|
{ |
|
325
|
45
|
|
|
45
|
|
33441
|
Text::MediawikiFormat::_format ($itags, $iopts, @_); |
|
326
|
|
|
|
|
|
|
} |
|
327
|
15
|
|
|
|
|
102
|
} |
|
328
|
|
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
|
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
=head1 FUNCTIONS |
|
332
|
|
|
|
|
|
|
|
|
333
|
|
|
|
|
|
|
=head2 format |
|
334
|
|
|
|
|
|
|
|
|
335
|
|
|
|
|
|
|
C takes one required argument, the text to convert, and returns the |
|
336
|
|
|
|
|
|
|
converted text. It allows two optional arguments. The first is a reference to |
|
337
|
|
|
|
|
|
|
a hash of tags used to override the function's default behavior. Anything |
|
338
|
|
|
|
|
|
|
passed in here will override the default tags. The second argument is a hash |
|
339
|
|
|
|
|
|
|
reference of options. The options are currently: |
|
340
|
|
|
|
|
|
|
|
|
341
|
|
|
|
|
|
|
=over 4 |
|
342
|
|
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
=item prefix |
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
The prefix of any links to wiki pages. In HTML mode, this is the path to the |
|
346
|
|
|
|
|
|
|
Wiki. The actual linked item itself will be appended to the prefix. This is |
|
347
|
|
|
|
|
|
|
useful to create full URIs: |
|
348
|
|
|
|
|
|
|
|
|
349
|
|
|
|
|
|
|
{prefix => 'http://example.com/wiki.pl?page='} |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
=item extended |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
A boolean flag, true by default, to let square brackets mark links. |
|
354
|
|
|
|
|
|
|
An optional title may occur after the Wiki targets, preceded by an open pipe. |
|
355
|
|
|
|
|
|
|
URI titles are separated from their title with a space. These are valid |
|
356
|
|
|
|
|
|
|
extended links: |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
[[A wiki page|and the title to display]] |
|
359
|
|
|
|
|
|
|
[http://ximbiot.com URI title] |
|
360
|
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
Where the linking semantics of the destination format allow it, the result will |
|
362
|
|
|
|
|
|
|
display the title instead of the URI. In HTML terms, the title is the content |
|
363
|
|
|
|
|
|
|
of an C element (not the content of its C attribute). |
|
364
|
|
|
|
|
|
|
|
|
365
|
|
|
|
|
|
|
You can use delimiters other than single square brackets for marking extended |
|
366
|
|
|
|
|
|
|
links by passing a value for C in the C<%tags> hash |
|
367
|
|
|
|
|
|
|
when calling C. |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
Note that if you disable this flag, you should probably enable |
|
370
|
|
|
|
|
|
|
C or there will be no automated way to link to other pages in |
|
371
|
|
|
|
|
|
|
your wiki. |
|
372
|
|
|
|
|
|
|
|
|
373
|
|
|
|
|
|
|
=item implicit_links |
|
374
|
|
|
|
|
|
|
|
|
375
|
|
|
|
|
|
|
A boolean flag, false by default, to create links from StudlyCapsStrings. |
|
376
|
|
|
|
|
|
|
|
|
377
|
|
|
|
|
|
|
=item absolute_links |
|
378
|
|
|
|
|
|
|
|
|
379
|
|
|
|
|
|
|
A boolean flag, true by default, which treats any links that are absolute URIs |
|
380
|
|
|
|
|
|
|
(such as C) specially. Any prefix will not apply. |
|
381
|
|
|
|
|
|
|
This should maybe be called implicit_absolute_links since the C |
|
382
|
|
|
|
|
|
|
option enables absolute links inside square brackets by default. |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
A link is any text that starts with a known schema followed by a colon and one |
|
385
|
|
|
|
|
|
|
or more non-whitespace characters. This is a distinct subset of what L |
|
386
|
|
|
|
|
|
|
recognizes as a URI, but is a good first-order approximation. If you need to |
|
387
|
|
|
|
|
|
|
recognize more complex URIs, use the standard wiki formatting explained |
|
388
|
|
|
|
|
|
|
earlier. |
|
389
|
|
|
|
|
|
|
|
|
390
|
|
|
|
|
|
|
The recognized schemas are those defined in the C value in the C<%tags> |
|
391
|
|
|
|
|
|
|
hash. C defaults to C, C, C, C, and |
|
392
|
|
|
|
|
|
|
C. |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
=item process_html |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
This flag, true by default, causes the formatter to ignore block level wiki |
|
397
|
|
|
|
|
|
|
markup (code, ordered, unordered, etc...) when they occur on lines which also |
|
398
|
|
|
|
|
|
|
contain allowed block-level HTML tags (, , , etc...). |
|
399
|
|
|
|
|
|
|
Phrase level wiki markup (emphasis, strong, & links) is unaffected by this |
|
400
|
|
|
|
|
|
|
flag. |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
=back |
|
403
|
|
|
|
|
|
|
|
|
404
|
|
|
|
|
|
|
=cut |
|
405
|
|
|
|
|
|
|
|
|
406
|
|
|
|
|
|
|
sub format |
|
407
|
|
|
|
|
|
|
{ |
|
408
|
7
|
|
|
7
|
1
|
9586
|
_format (\%tags, \%opts, @_); |
|
409
|
|
|
|
|
|
|
} |
|
410
|
|
|
|
|
|
|
|
|
411
|
|
|
|
|
|
|
# Turn the contents after a ; or : into a dictionary list. |
|
412
|
|
|
|
|
|
|
# Using : without ; just looks like an indent. |
|
413
|
|
|
|
|
|
|
sub _dl |
|
414
|
|
|
|
|
|
|
{ |
|
415
|
|
|
|
|
|
|
#my ($line, $indent, $lead) = @_; |
|
416
|
23
|
|
|
23
|
|
16
|
my ($term, $def); |
|
417
|
|
|
|
|
|
|
|
|
418
|
23
|
100
|
|
|
|
29
|
if ($_[2] eq ';') |
|
419
|
|
|
|
|
|
|
{ |
|
420
|
11
|
100
|
|
|
|
32
|
if ($_[0] =~ /^(.*?)\s+:\s+(.*)$/) |
|
421
|
|
|
|
|
|
|
{ |
|
422
|
6
|
|
|
|
|
10
|
$term = $1; |
|
423
|
6
|
|
|
|
|
8
|
$def = $2; |
|
424
|
|
|
|
|
|
|
} |
|
425
|
|
|
|
|
|
|
else |
|
426
|
|
|
|
|
|
|
{ |
|
427
|
5
|
|
|
|
|
8
|
$term = $_[0]; |
|
428
|
|
|
|
|
|
|
} |
|
429
|
|
|
|
|
|
|
} |
|
430
|
|
|
|
|
|
|
else |
|
431
|
|
|
|
|
|
|
{ |
|
432
|
12
|
|
|
|
|
11
|
$def = $_[0]; |
|
433
|
|
|
|
|
|
|
} |
|
434
|
|
|
|
|
|
|
|
|
435
|
23
|
|
|
|
|
18
|
my @retval; |
|
436
|
23
|
100
|
|
|
|
40
|
push @retval, "", $term, "\n" if defined $term; |
|
437
|
23
|
100
|
|
|
|
54
|
push @retval, "", $def, "\n" if defined $def; |
|
438
|
23
|
|
|
|
|
48
|
return @retval; |
|
439
|
|
|
|
|
|
|
} |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
# Makes a regex out of the allowed schema array. |
|
442
|
|
|
|
|
|
|
sub _make_schema_regex |
|
443
|
|
|
|
|
|
|
{ |
|
444
|
47
|
|
|
47
|
|
70
|
my $re = join "|", map {qr/\Q$_\E/} @_; |
|
|
231
|
|
|
|
|
1697
|
|
|
445
|
47
|
|
|
|
|
844
|
return qr/(?:$re)/; |
|
446
|
|
|
|
|
|
|
} |
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
$uric = $URI::uric; |
|
449
|
|
|
|
|
|
|
$uricCheat = $uric; |
|
450
|
|
|
|
|
|
|
|
|
451
|
|
|
|
|
|
|
# We need to avoid picking up 'HTTP::Request::Common' so we have a |
|
452
|
|
|
|
|
|
|
# subset of uric without a colon. |
|
453
|
|
|
|
|
|
|
$uricCheat =~ tr/://d; |
|
454
|
|
|
|
|
|
|
|
|
455
|
|
|
|
|
|
|
# Identifying characters often accidentally picked up trailing a URI. |
|
456
|
|
|
|
|
|
|
$uriCruft = q/]),.!'";}/; |
|
457
|
|
|
|
|
|
|
|
|
458
|
|
|
|
|
|
|
# escape a URI based on our charset. |
|
459
|
|
|
|
|
|
|
sub _escape_uri |
|
460
|
|
|
|
|
|
|
{ |
|
461
|
28
|
|
|
28
|
|
40
|
my ($opts, $uri) = @_; |
|
462
|
28
|
50
|
|
|
|
65
|
confess "charset not initialized" unless $opts->{charset}; |
|
463
|
28
|
50
|
|
|
|
192
|
return uri_escape_utf8 $uri if $opts->{charset} =~ /^utf-?8$/i; |
|
464
|
0
|
|
|
|
|
0
|
return uri_escape $uri; |
|
465
|
|
|
|
|
|
|
} |
|
466
|
|
|
|
|
|
|
|
|
467
|
|
|
|
|
|
|
# Turn [[Wiki Link|Title]], [URI Title], scheme:url, or StudlyCaps into links. |
|
468
|
|
|
|
|
|
|
sub _make_html_link |
|
469
|
|
|
|
|
|
|
{ |
|
470
|
32
|
|
|
32
|
|
118
|
my ($tag, $opts, $tags) = @_; |
|
471
|
|
|
|
|
|
|
|
|
472
|
32
|
|
|
|
|
53
|
my ($class, $trailing) = ('', ''); |
|
473
|
32
|
|
|
|
|
30
|
my ($href, $title); |
|
474
|
32
|
100
|
|
|
|
157
|
if ($tag =~ /^\[\[([^|#]*)(?:(#)([^|]*))?(?:(\|)(.*))?\]\]$/) |
|
|
|
100
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
{ |
|
476
|
|
|
|
|
|
|
# Wiki link |
|
477
|
7
|
50
|
|
|
|
35
|
$href = $opts->{prefix} . _escape_uri $opts, $1 if $1; |
|
478
|
7
|
50
|
|
|
|
170
|
$href .= $2 . _escape_uri $opts, $3 if $2; |
|
479
|
|
|
|
|
|
|
|
|
480
|
7
|
100
|
|
|
|
20
|
if ($4) |
|
481
|
|
|
|
|
|
|
{ |
|
482
|
|
|
|
|
|
|
# Title specified explicitly. |
|
483
|
3
|
50
|
|
|
|
7
|
if (length $5) |
|
484
|
|
|
|
|
|
|
{ |
|
485
|
3
|
|
|
|
|
6
|
$title = $5; |
|
486
|
|
|
|
|
|
|
} |
|
487
|
|
|
|
|
|
|
else |
|
488
|
|
|
|
|
|
|
{ |
|
489
|
|
|
|
|
|
|
# An empty title asks Mediawiki to strip any parens off the end |
|
490
|
|
|
|
|
|
|
# of the node name. |
|
491
|
0
|
|
|
|
|
0
|
$1 =~ /^([^(]*)(?:\s*\()?/; |
|
492
|
0
|
|
|
|
|
0
|
$title = $1; |
|
493
|
|
|
|
|
|
|
} |
|
494
|
|
|
|
|
|
|
} |
|
495
|
|
|
|
|
|
|
else |
|
496
|
|
|
|
|
|
|
{ |
|
497
|
|
|
|
|
|
|
# Title defaults to the node name. |
|
498
|
4
|
|
|
|
|
6
|
$title = $1; |
|
499
|
|
|
|
|
|
|
} |
|
500
|
|
|
|
|
|
|
} |
|
501
|
|
|
|
|
|
|
elsif ($tag =~ /^\[(\S*)(?:(\s+)(.*))?\]$/) |
|
502
|
|
|
|
|
|
|
{ |
|
503
|
|
|
|
|
|
|
# URI |
|
504
|
5
|
|
|
|
|
12
|
$href = $1; |
|
505
|
5
|
50
|
|
|
|
18
|
if ($2) |
|
506
|
|
|
|
|
|
|
{ |
|
507
|
5
|
|
|
|
|
10
|
$title = $3; |
|
508
|
|
|
|
|
|
|
} |
|
509
|
|
|
|
|
|
|
else |
|
510
|
|
|
|
|
|
|
{ |
|
511
|
0
|
|
|
|
|
0
|
$title = ++$opts->{_uri_refs}; |
|
512
|
|
|
|
|
|
|
} |
|
513
|
5
|
|
|
|
|
8
|
$href =~ s/'/%27/g; |
|
514
|
|
|
|
|
|
|
} |
|
515
|
|
|
|
|
|
|
else |
|
516
|
|
|
|
|
|
|
{ |
|
517
|
|
|
|
|
|
|
# Shouldn't be able to get here without either $opts->{absolute_links} |
|
518
|
|
|
|
|
|
|
# or $opts->{implicit_links}; |
|
519
|
20
|
|
33
|
|
|
63
|
$tags->{_schema_regex} ||= _make_schema_regex @{$tags->{schemas}}; |
|
|
0
|
|
|
|
|
0
|
|
|
520
|
20
|
|
|
|
|
29
|
my $s = $tags->{_schema_regex}; |
|
521
|
|
|
|
|
|
|
|
|
522
|
20
|
100
|
|
|
|
560
|
if ($tag =~ /^$s:[$uricCheat][$uric]*$/) |
|
523
|
|
|
|
|
|
|
{ |
|
524
|
|
|
|
|
|
|
# absolute link |
|
525
|
8
|
|
|
|
|
19
|
$href = $&; |
|
526
|
8
|
100
|
|
|
|
68
|
$trailing = $& if $href =~ s/[$uriCruft]$//; |
|
527
|
8
|
|
|
|
|
18
|
$title = $href; |
|
528
|
|
|
|
|
|
|
} |
|
529
|
|
|
|
|
|
|
else |
|
530
|
|
|
|
|
|
|
{ |
|
531
|
|
|
|
|
|
|
# StudlyCaps |
|
532
|
12
|
|
|
|
|
30
|
$href = $opts->{prefix} . _escape_uri $opts, $tag; |
|
533
|
12
|
|
|
|
|
273
|
$title = $tag; |
|
534
|
|
|
|
|
|
|
} |
|
535
|
|
|
|
|
|
|
} |
|
536
|
|
|
|
|
|
|
|
|
537
|
32
|
|
|
|
|
213
|
return "$title$trailing"; |
|
538
|
|
|
|
|
|
|
} |
|
539
|
|
|
|
|
|
|
|
|
540
|
|
|
|
|
|
|
# Store a TOC line for later. |
|
541
|
|
|
|
|
|
|
# |
|
542
|
|
|
|
|
|
|
# ASSUMPTIONS |
|
543
|
|
|
|
|
|
|
# $level >= 1 |
|
544
|
|
|
|
|
|
|
sub _store_toc_line |
|
545
|
|
|
|
|
|
|
{ |
|
546
|
12
|
|
|
12
|
|
20
|
my ($toc, $level, $title, $name) = @_; |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
# TODO: Strip formatting from $title. |
|
549
|
|
|
|
|
|
|
|
|
550
|
12
|
100
|
100
|
|
|
53
|
if (@$toc && $level > $toc->[-1]->{level}) |
|
551
|
|
|
|
|
|
|
{ |
|
552
|
|
|
|
|
|
|
# Nest a sublevel. |
|
553
|
3
|
100
|
|
|
|
23
|
$toc->[-1]->{sublevel} = [] |
|
554
|
|
|
|
|
|
|
unless exists $toc->[-1]->{sublevel}; |
|
555
|
3
|
|
|
|
|
10
|
_store_toc_line ($toc->[-1]->{sublevel}, $level, $title, $name); |
|
556
|
|
|
|
|
|
|
} |
|
557
|
|
|
|
|
|
|
else |
|
558
|
|
|
|
|
|
|
{ |
|
559
|
9
|
|
|
|
|
40
|
push @$toc, {level => $level, title => $title, name => $name}; |
|
560
|
|
|
|
|
|
|
} |
|
561
|
|
|
|
|
|
|
|
|
562
|
12
|
|
|
|
|
17
|
return $level; |
|
563
|
|
|
|
|
|
|
} |
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
# Make header text, storing the line for the TOC. |
|
566
|
|
|
|
|
|
|
# |
|
567
|
|
|
|
|
|
|
# ASSUMPTIONS |
|
568
|
|
|
|
|
|
|
# $tags->{_toc} has been initialized to an array ref. |
|
569
|
|
|
|
|
|
|
sub _make_header |
|
570
|
|
|
|
|
|
|
{ |
|
571
|
9
|
|
|
9
|
|
13
|
my $level = length $_[2]; |
|
572
|
9
|
|
|
|
|
24
|
my $n = _escape_uri $_[-1], $_[3]; |
|
573
|
|
|
|
|
|
|
|
|
574
|
9
|
|
|
|
|
209
|
_store_toc_line ($_[-2]->{_toc}, $level, $_[3], $n); |
|
575
|
|
|
|
|
|
|
|
|
576
|
9
|
|
|
|
|
40
|
return "", |
|
577
|
|
|
|
|
|
|
Text::MediawikiFormat::format_line ($_[3], @_[-2, -1]), |
|
578
|
|
|
|
|
|
|
"\n"; |
|
579
|
|
|
|
|
|
|
} |
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
sub _format |
|
582
|
|
|
|
|
|
|
{ |
|
583
|
50
|
|
|
50
|
|
106
|
my ($itags, $iopts, $text, $tags, $opts) = @_; |
|
584
|
|
|
|
|
|
|
|
|
585
|
|
|
|
|
|
|
# Overwriting the caller's hashes locally after merging its contents |
|
586
|
|
|
|
|
|
|
# is okay. |
|
587
|
50
|
|
100
|
|
|
235
|
$tags = _merge_hashes ($tags || {}, $itags); |
|
588
|
50
|
|
100
|
|
|
210
|
$opts = _merge_hashes ($opts || {}, $iopts); |
|
589
|
|
|
|
|
|
|
|
|
590
|
50
|
50
|
|
|
|
152
|
_require_html_packages |
|
591
|
|
|
|
|
|
|
if $opts->{process_html}; |
|
592
|
|
|
|
|
|
|
|
|
593
|
|
|
|
|
|
|
# Always verify the blocks since the user may have slagged the |
|
594
|
|
|
|
|
|
|
# default hash on import. |
|
595
|
50
|
|
|
|
|
147
|
_check_blocks ($tags); |
|
596
|
|
|
|
|
|
|
|
|
597
|
50
|
|
|
|
|
156
|
my @blocks = _find_blocks ($text, $tags, $opts); |
|
598
|
50
|
|
|
|
|
128
|
@blocks = _nest_blocks (\@blocks); |
|
599
|
50
|
|
|
|
|
133
|
return _process_blocks (\@blocks, $tags, $opts); |
|
600
|
|
|
|
|
|
|
} |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
sub _check_blocks |
|
603
|
|
|
|
|
|
|
{ |
|
604
|
52
|
|
|
52
|
|
1783
|
my $tags = shift; |
|
605
|
52
|
|
|
|
|
56
|
my %blocks = %{$tags->{blocks}}; |
|
|
52
|
|
|
|
|
277
|
|
|
606
|
52
|
|
|
|
|
83
|
delete @blocks{@{$tags->{blockorder}}}; |
|
|
52
|
|
|
|
|
190
|
|
|
607
|
|
|
|
|
|
|
|
|
608
|
52
|
100
|
|
|
|
578
|
carp |
|
609
|
|
|
|
|
|
|
"No order specified for blocks: " |
|
610
|
|
|
|
|
|
|
. join (', ', keys %blocks) |
|
611
|
|
|
|
|
|
|
. ".\n" |
|
612
|
|
|
|
|
|
|
if keys %blocks; |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
|
|
615
|
|
|
|
|
|
|
# This sub recognizes three states: |
|
616
|
|
|
|
|
|
|
# |
|
617
|
|
|
|
|
|
|
# 1. undef |
|
618
|
|
|
|
|
|
|
# Normal wiki processing will be done on this line. |
|
619
|
|
|
|
|
|
|
# |
|
620
|
|
|
|
|
|
|
# 2. html |
|
621
|
|
|
|
|
|
|
# Links and phrasal processing will be done, but formatting should be |
|
622
|
|
|
|
|
|
|
# ignored. |
|
623
|
|
|
|
|
|
|
# |
|
624
|
|
|
|
|
|
|
# 3. nowiki |
|
625
|
|
|
|
|
|
|
# No further wiki processing should be done. |
|
626
|
|
|
|
|
|
|
# |
|
627
|
|
|
|
|
|
|
# Each state may override the lower ones if already set on a given line. |
|
628
|
|
|
|
|
|
|
# |
|
629
|
|
|
|
|
|
|
sub _append_processed_line |
|
630
|
|
|
|
|
|
|
{ |
|
631
|
0
|
|
|
0
|
|
0
|
my ($parser, $text, $state) = @_; |
|
632
|
0
|
|
|
|
|
0
|
my $lines = $parser->{processed_lines}; |
|
633
|
|
|
|
|
|
|
|
|
634
|
0
|
|
0
|
|
|
0
|
$state ||= ''; |
|
635
|
|
|
|
|
|
|
|
|
636
|
0
|
|
|
|
|
0
|
my @newlines = split /(?<=\n)/, $text; |
|
637
|
0
|
0
|
0
|
|
|
0
|
if (@$lines && $lines->[-1]->[1] !~ /\n$/ |
|
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
638
|
|
|
|
|
|
|
&& # State not changing from or to 'nowiki' |
|
639
|
|
|
|
|
|
|
!($state ne $lines->[-1]->[0] |
|
640
|
|
|
|
|
|
|
&& grep /^nowiki$/, $state, $lines->[-1]->[0])) |
|
641
|
|
|
|
|
|
|
{ |
|
642
|
0
|
|
|
|
|
0
|
$lines->[-1]->[1] .= shift @newlines; |
|
643
|
0
|
0
|
|
|
|
0
|
$lines->[-1]->[0] = $state if $state eq 'html'; |
|
644
|
|
|
|
|
|
|
} |
|
645
|
|
|
|
|
|
|
|
|
646
|
0
|
|
|
|
|
0
|
foreach my $line (@newlines) |
|
647
|
|
|
|
|
|
|
{ |
|
648
|
0
|
0
|
|
|
|
0
|
$lines->[-1]->[2] = '1' if @$lines; |
|
649
|
0
|
|
|
|
|
0
|
push @$lines, [$state, $line]; |
|
650
|
|
|
|
|
|
|
} |
|
651
|
0
|
0
|
0
|
|
|
0
|
$lines->[-1]->[2] = '1' |
|
652
|
|
|
|
|
|
|
if @$lines && $lines->[-1]->[1] =~ /\n$/; |
|
653
|
|
|
|
|
|
|
} |
|
654
|
|
|
|
|
|
|
|
|
655
|
|
|
|
|
|
|
sub _html_tag |
|
656
|
|
|
|
|
|
|
{ |
|
657
|
0
|
|
|
0
|
|
0
|
my ($parser, $type, $tagname, $orig, $attr) = @_; |
|
658
|
0
|
|
|
|
|
0
|
my $tags = $parser->{tags}; |
|
659
|
|
|
|
|
|
|
|
|
660
|
|
|
|
|
|
|
# $tagname may have been generated by an empty tag. If so, HTML::Parser |
|
661
|
|
|
|
|
|
|
# will sometimes include the trailing / in the tag name. |
|
662
|
0
|
|
|
|
|
0
|
my $isEmptyTag = $orig =~ m#/>$#; |
|
663
|
0
|
0
|
|
|
|
0
|
$tagname =~ s#/$## if $isEmptyTag; |
|
664
|
|
|
|
|
|
|
|
|
665
|
0
|
0
|
|
|
|
0
|
unless (grep /^\Q$tagname\E$/, @{$tags->{allowed_tags}}) |
|
|
0
|
|
|
|
|
0
|
|
|
666
|
|
|
|
|
|
|
{ |
|
667
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, CGI::escapeHTML $orig; |
|
668
|
0
|
|
|
|
|
0
|
return; |
|
669
|
|
|
|
|
|
|
} |
|
670
|
|
|
|
|
|
|
# Any $tagname must now be in the allowed list, including . |
|
671
|
|
|
|
|
|
|
|
|
672
|
0
|
|
|
|
|
0
|
my $tagstack = $parser->{tag_stack}; |
|
673
|
0
|
0
|
|
|
|
0
|
my $stacktop = @$tagstack ? $tagstack->[-1] : ''; |
|
674
|
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
# First, process end tags, since they can change our state. |
|
676
|
0
|
0
|
0
|
|
|
0
|
if ($type eq 'E' && $stacktop eq $tagname) |
|
677
|
|
|
|
|
|
|
{ |
|
678
|
|
|
|
|
|
|
# The closing tag is at the top of the stack, like it should be. |
|
679
|
|
|
|
|
|
|
# Pop it and append the close tag to the output. |
|
680
|
0
|
|
|
|
|
0
|
pop @$tagstack; |
|
681
|
0
|
|
|
|
|
0
|
my $newtag; |
|
682
|
|
|
|
|
|
|
|
|
683
|
0
|
0
|
|
|
|
0
|
if ($tagname eq 'nowiki') |
|
684
|
|
|
|
|
|
|
{ |
|
685
|
|
|
|
|
|
|
# The browser doesn't need to see the tag. |
|
686
|
0
|
|
|
|
|
0
|
$newtag = ''; |
|
687
|
|
|
|
|
|
|
} |
|
688
|
|
|
|
|
|
|
else |
|
689
|
|
|
|
|
|
|
{ |
|
690
|
0
|
|
|
|
|
0
|
$newtag = "$tagname>"; |
|
691
|
|
|
|
|
|
|
} |
|
692
|
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
# Can't close a state into or |
|
694
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, $newtag, 'html'; |
|
695
|
0
|
|
|
|
|
0
|
return; |
|
696
|
|
|
|
|
|
|
} |
|
697
|
|
|
|
|
|
|
|
|
698
|
0
|
0
|
0
|
|
|
0
|
if (@$tagstack && grep /^\Q$stacktop\E$/, qw{nowiki pre}) |
|
699
|
|
|
|
|
|
|
{ |
|
700
|
|
|
|
|
|
|
# Ignore all markup within or tags. |
|
701
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, CGI::escapeHTML ($orig), 'nowiki'; |
|
702
|
0
|
|
|
|
|
0
|
return; |
|
703
|
|
|
|
|
|
|
} |
|
704
|
|
|
|
|
|
|
|
|
705
|
0
|
0
|
0
|
|
|
0
|
if ($type eq 'E' && $HTML::Tagset::isPhraseMarkup{$tagname}) |
|
706
|
|
|
|
|
|
|
# If we ask for artificial end element events for self-closed elements, |
|
707
|
|
|
|
|
|
|
# then we need to check $HTML::Tagset::emptyElement($tagname) here too. |
|
708
|
|
|
|
|
|
|
{ |
|
709
|
|
|
|
|
|
|
# We didn't record phrase markup on the stack, so it's okay to just |
|
710
|
|
|
|
|
|
|
# let it close. |
|
711
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, "$tagname>"; |
|
712
|
0
|
|
|
|
|
0
|
return; |
|
713
|
|
|
|
|
|
|
} |
|
714
|
|
|
|
|
|
|
|
|
715
|
0
|
0
|
|
|
|
0
|
if ($type eq 'E') |
|
716
|
|
|
|
|
|
|
{ |
|
717
|
|
|
|
|
|
|
# We got a non-phrase end tag that wasn't on the stack. Escape it. |
|
718
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, CGI::escapeHTML ($orig); |
|
719
|
0
|
|
|
|
|
0
|
return; |
|
720
|
|
|
|
|
|
|
} |
|
721
|
|
|
|
|
|
|
|
|
722
|
|
|
|
|
|
|
|
|
723
|
|
|
|
|
|
|
### |
|
724
|
|
|
|
|
|
|
### $type must now eq 'S'. |
|
725
|
|
|
|
|
|
|
### |
|
726
|
|
|
|
|
|
|
|
|
727
|
|
|
|
|
|
|
# The browser doesn't need to see the tag. |
|
728
|
0
|
0
|
|
|
|
0
|
if ($tagname eq 'nowiki') |
|
729
|
|
|
|
|
|
|
{ |
|
730
|
0
|
0
|
|
|
|
0
|
push @$tagstack, $tagname |
|
731
|
|
|
|
|
|
|
unless $isEmptyTag; |
|
732
|
0
|
|
|
|
|
0
|
return; |
|
733
|
|
|
|
|
|
|
} |
|
734
|
|
|
|
|
|
|
|
|
735
|
|
|
|
|
|
|
# Strip disallowed attributes. |
|
736
|
0
|
|
|
|
|
0
|
my $newtag = "<$tagname"; |
|
737
|
0
|
|
|
|
|
0
|
foreach (@{$tags->{allowed_attrs}}) |
|
|
0
|
|
|
|
|
0
|
|
|
738
|
|
|
|
|
|
|
{ |
|
739
|
0
|
0
|
|
|
|
0
|
if (defined $attr->{$_}) |
|
740
|
|
|
|
|
|
|
{ |
|
741
|
0
|
|
|
|
|
0
|
$newtag .= " $_"; |
|
742
|
0
|
0
|
|
|
|
0
|
unless ($attr->{$_} |
|
743
|
|
|
|
|
|
|
eq '__TEXT_MEDIAWIKIFORMAT_BOOL__') |
|
744
|
|
|
|
|
|
|
{ |
|
745
|
|
|
|
|
|
|
# CGI::escapeHTML escapes single quotes. |
|
746
|
0
|
|
|
|
|
0
|
$attr->{$_} = CGI::escapeHTML $attr->{$_}; |
|
747
|
0
|
|
|
|
|
0
|
$newtag .= "='" . $attr->{$_} . "'"; |
|
748
|
|
|
|
|
|
|
} |
|
749
|
|
|
|
|
|
|
} |
|
750
|
|
|
|
|
|
|
} |
|
751
|
0
|
0
|
0
|
|
|
0
|
$newtag .= " /" if $HTML::Tagset::emptyElement{$tagname} || $isEmptyTag; |
|
752
|
0
|
|
|
|
|
0
|
$newtag .= ">"; |
|
753
|
|
|
|
|
|
|
|
|
754
|
|
|
|
|
|
|
# If this isn't a block level element, there's no need to track nesting. |
|
755
|
0
|
0
|
0
|
|
|
0
|
if ($HTML::Tagset::isPhraseMarkup{$tagname} |
|
756
|
|
|
|
|
|
|
|| $HTML::Tagset::emptyElement{$tagname}) |
|
757
|
|
|
|
|
|
|
{ |
|
758
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, $newtag; |
|
759
|
0
|
|
|
|
|
0
|
return; |
|
760
|
|
|
|
|
|
|
} |
|
761
|
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
# Some elements can close implicitly |
|
763
|
0
|
0
|
|
|
|
0
|
if (@$tagstack) |
|
764
|
|
|
|
|
|
|
{ |
|
765
|
0
|
0
|
0
|
|
|
0
|
if ($tagname eq $stacktop |
|
|
|
0
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
&& $HTML::Tagset::optionalEndTag{$tagname}) |
|
767
|
|
|
|
|
|
|
{ |
|
768
|
0
|
|
|
|
|
0
|
pop @$tagstack; |
|
769
|
|
|
|
|
|
|
} |
|
770
|
|
|
|
|
|
|
elsif (!$HTML::Tagset::is_Possible_Strict_P_Content{$tagname}) |
|
771
|
|
|
|
|
|
|
{ |
|
772
|
|
|
|
|
|
|
# Need to check more than the last item for paragraphs. |
|
773
|
0
|
|
|
|
|
0
|
for (my $i = $#{$tagstack}; $i >= 0; $i--) |
|
|
0
|
|
|
|
|
0
|
|
|
774
|
|
|
|
|
|
|
{ |
|
775
|
0
|
|
|
|
|
0
|
my $checking = $tagstack->[$i]; |
|
776
|
0
|
0
|
|
|
|
0
|
last if grep /^\Q$checking\E$/, |
|
777
|
|
|
|
|
|
|
@HTML::Tagset::p_closure_barriers; |
|
778
|
|
|
|
|
|
|
|
|
779
|
0
|
0
|
|
|
|
0
|
if ($checking eq 'p') |
|
780
|
|
|
|
|
|
|
{ |
|
781
|
|
|
|
|
|
|
# pop 'em all. |
|
782
|
0
|
|
|
|
|
0
|
splice @$tagstack, $i; |
|
783
|
0
|
|
|
|
|
0
|
last; |
|
784
|
|
|
|
|
|
|
} |
|
785
|
|
|
|
|
|
|
} |
|
786
|
|
|
|
|
|
|
} |
|
787
|
|
|
|
|
|
|
} |
|
788
|
|
|
|
|
|
|
|
|
789
|
|
|
|
|
|
|
# Could verify here that and sub-elements only appear where
|
790
|
|
|
|
|
|
|
# they belong. |
|
791
|
|
|
|
|
|
|
|
|
792
|
|
|
|
|
|
|
# Push the new tag onto the stack. |
|
793
|
0
|
0
|
|
|
|
0
|
push @$tagstack, $tagname |
|
794
|
|
|
|
|
|
|
unless $isEmptyTag; |
|
795
|
|
|
|
|
|
|
|
|
796
|
0
|
0
|
|
|
|
0
|
_append_processed_line $parser, $newtag, |
|
797
|
|
|
|
|
|
|
$tagname eq 'pre' ? 'nowiki' : 'html'; |
|
798
|
0
|
|
|
|
|
0
|
return; |
|
799
|
|
|
|
|
|
|
} |
|
800
|
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
sub _html_comment |
|
802
|
|
|
|
|
|
|
{ |
|
803
|
0
|
|
|
0
|
|
0
|
my ($parser, $text) = @_; |
|
804
|
|
|
|
|
|
|
|
|
805
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, $text, 'nowiki'; |
|
806
|
|
|
|
|
|
|
} |
|
807
|
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
sub _html_text |
|
809
|
|
|
|
|
|
|
{ |
|
810
|
0
|
|
|
0
|
|
0
|
my ($parser, $dtext, $skipped_text, $is_cdata) = @_; |
|
811
|
0
|
|
|
|
|
0
|
my $tagstack = $parser->{tag_stack}; |
|
812
|
0
|
|
|
|
|
0
|
my ($newtext, $newstate); |
|
813
|
|
|
|
|
|
|
|
|
814
|
0
|
0
|
|
|
|
0
|
warnings::warnif ("Got skipped_text: `$skipped_text'") |
|
815
|
|
|
|
|
|
|
if $skipped_text; |
|
816
|
|
|
|
|
|
|
|
|
817
|
0
|
0
|
|
|
|
0
|
if (@$tagstack) |
|
818
|
|
|
|
|
|
|
{ |
|
819
|
0
|
0
|
0
|
|
|
0
|
if (grep /\Q$tagstack->[-1]\E/, qw{nowiki pre}) |
|
|
|
0
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
{ |
|
821
|
0
|
|
|
|
|
0
|
$newstate = 'nowiki' |
|
822
|
|
|
|
|
|
|
} |
|
823
|
|
|
|
|
|
|
elsif ($is_cdata && $HTML::Tagset::isCDATA_Parent{$tagstack->[-1]}) |
|
824
|
|
|
|
|
|
|
{ |
|
825
|
|
|
|
|
|
|
# If the user hadn't specifically allowed a tag which contains |
|
826
|
|
|
|
|
|
|
# CDATA, then it won't be on the tag stack. |
|
827
|
0
|
|
|
|
|
0
|
$newtext = $dtext; |
|
828
|
|
|
|
|
|
|
} |
|
829
|
|
|
|
|
|
|
} |
|
830
|
|
|
|
|
|
|
|
|
831
|
0
|
0
|
|
|
|
0
|
unless (defined $newtext) |
|
832
|
|
|
|
|
|
|
{ |
|
833
|
0
|
0
|
|
|
|
0
|
$newtext = CGI::escapeHTML $dtext unless defined $newtext; |
|
834
|
|
|
|
|
|
|
# CGI::escapeHTML escapes single quotes so the text may be included |
|
835
|
|
|
|
|
|
|
# in attribute values, but we know we aren't processing an attribute |
|
836
|
|
|
|
|
|
|
# value here. |
|
837
|
0
|
|
|
|
|
0
|
$newtext =~ s/'/'/g; |
|
838
|
|
|
|
|
|
|
} |
|
839
|
|
|
|
|
|
|
|
|
840
|
0
|
|
|
|
|
0
|
_append_processed_line $parser, $newtext, $newstate; |
|
841
|
|
|
|
|
|
|
} |
|
842
|
|
|
|
|
|
|
|
|
843
|
|
|
|
|
|
|
sub _find_blocks_in_html |
|
844
|
|
|
|
|
|
|
{ |
|
845
|
0
|
|
|
0
|
|
0
|
my ($text, $tags, $opts) = @_; |
|
846
|
|
|
|
|
|
|
|
|
847
|
0
|
|
|
|
|
0
|
my $parser = HTML::Parser->new |
|
848
|
|
|
|
|
|
|
(start_h => [\&_html_tag, 'self, "S", tagname, text, attr'], |
|
849
|
|
|
|
|
|
|
end_h => [\&_html_tag, 'self, "E", tagname, text'], |
|
850
|
|
|
|
|
|
|
comment_h => [\&_html_comment, 'self, text'], |
|
851
|
|
|
|
|
|
|
text_h => [\&_html_text, 'self, dtext, skipped_text, is_cdata'], |
|
852
|
|
|
|
|
|
|
marked_sections => 1, |
|
853
|
|
|
|
|
|
|
boolean_attribute_value => '__TEXT_MEDIAWIKIFORMAT_BOOL__', |
|
854
|
|
|
|
|
|
|
); |
|
855
|
0
|
|
|
|
|
0
|
$parser->{opts} = $opts; |
|
856
|
0
|
|
|
|
|
0
|
$parser->{tags} = $tags; |
|
857
|
0
|
|
|
|
|
0
|
$parser->{processed_lines} = []; |
|
858
|
0
|
|
|
|
|
0
|
$parser->{tag_stack} = []; |
|
859
|
|
|
|
|
|
|
|
|
860
|
0
|
|
|
|
|
0
|
my @blocks; |
|
861
|
0
|
|
|
|
|
0
|
my @lines = split /\r?\n/, $text; |
|
862
|
0
|
|
|
|
|
0
|
for (my $i = 0; $i < @lines; $i++) |
|
863
|
|
|
|
|
|
|
{ |
|
864
|
0
|
|
|
|
|
0
|
$parser->parse ($lines[$i]); |
|
865
|
0
|
|
|
|
|
0
|
$parser->parse ("\n"); |
|
866
|
0
|
0
|
|
|
|
0
|
$parser->eof if $i == $#lines; |
|
867
|
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
# @{$parser->{processed_lines}} may be empty when tags are |
|
869
|
|
|
|
|
|
|
# still open. |
|
870
|
0
|
|
0
|
|
|
0
|
while (@{$parser->{processed_lines}} |
|
|
0
|
|
|
|
|
0
|
|
|
871
|
|
|
|
|
|
|
&& $parser->{processed_lines}->[0]->[2]) |
|
872
|
|
|
|
|
|
|
{ |
|
873
|
0
|
|
|
|
|
0
|
my ($type, $dtext) |
|
874
|
0
|
|
|
|
|
0
|
= @{shift @{$parser->{processed_lines}}}; |
|
|
0
|
|
|
|
|
0
|
|
|
875
|
|
|
|
|
|
|
|
|
876
|
0
|
|
|
|
|
0
|
my $block; |
|
877
|
0
|
0
|
|
|
|
0
|
if ($type) |
|
878
|
|
|
|
|
|
|
{ |
|
879
|
0
|
|
|
|
|
0
|
$block = _start_block ($dtext, $tags, $opts, $type); |
|
880
|
|
|
|
|
|
|
} |
|
881
|
|
|
|
|
|
|
else |
|
882
|
|
|
|
|
|
|
{ |
|
883
|
0
|
|
|
|
|
0
|
chomp $dtext; |
|
884
|
0
|
|
|
|
|
0
|
$block = _start_block ($dtext, $tags, $opts); |
|
885
|
|
|
|
|
|
|
} |
|
886
|
0
|
0
|
|
|
|
0
|
push @blocks, $block if $block; |
|
887
|
|
|
|
|
|
|
} |
|
888
|
|
|
|
|
|
|
} |
|
889
|
|
|
|
|
|
|
|
|
890
|
0
|
|
|
|
|
0
|
return @blocks; |
|
891
|
|
|
|
|
|
|
} |
|
892
|
|
|
|
|
|
|
|
|
893
|
|
|
|
|
|
|
sub _find_blocks |
|
894
|
|
|
|
|
|
|
{ |
|
895
|
50
|
|
|
50
|
|
67
|
my ($text, $tags, $opts) = @_; |
|
896
|
50
|
|
|
|
|
61
|
my @blocks; |
|
897
|
|
|
|
|
|
|
|
|
898
|
50
|
50
|
|
|
|
110
|
if ($opts->{process_html}) |
|
899
|
|
|
|
|
|
|
{ |
|
900
|
0
|
|
|
|
|
0
|
@blocks = _find_blocks_in_html $text, $tags, $opts; |
|
901
|
|
|
|
|
|
|
} |
|
902
|
|
|
|
|
|
|
else |
|
903
|
|
|
|
|
|
|
{ |
|
904
|
|
|
|
|
|
|
# The original behavior. |
|
905
|
50
|
|
|
|
|
521
|
for my $line (split /\r?\n/, $text) |
|
906
|
|
|
|
|
|
|
{ |
|
907
|
300
|
|
|
|
|
438
|
my $block = _start_block ($line, $tags, $opts); |
|
908
|
300
|
100
|
|
|
|
886
|
push @blocks, $block if $block; |
|
909
|
|
|
|
|
|
|
} |
|
910
|
|
|
|
|
|
|
} |
|
911
|
|
|
|
|
|
|
|
|
912
|
50
|
|
|
|
|
189
|
return @blocks; |
|
913
|
|
|
|
|
|
|
} |
|
914
|
|
|
|
|
|
|
|
|
915
|
|
|
|
|
|
|
sub _start_block |
|
916
|
|
|
|
|
|
|
{ |
|
917
|
305
|
|
|
305
|
|
1458
|
my ($text, $tags, $opts, $type) = @_; |
|
918
|
|
|
|
|
|
|
|
|
919
|
305
|
100
|
|
|
|
596
|
return new_block ('end', level => 0) unless $text; |
|
920
|
218
|
50
|
|
|
|
317
|
return new_block ($type, |
|
921
|
|
|
|
|
|
|
level => 0, |
|
922
|
|
|
|
|
|
|
opts => $opts, |
|
923
|
|
|
|
|
|
|
text => $text, |
|
924
|
|
|
|
|
|
|
tags => $tags,) |
|
925
|
|
|
|
|
|
|
if $type; |
|
926
|
|
|
|
|
|
|
|
|
927
|
218
|
|
|
|
|
178
|
for my $block (@{$tags->{blockorder}}) |
|
|
218
|
|
|
|
|
365
|
|
|
928
|
|
|
|
|
|
|
{ |
|
929
|
1216
|
|
|
|
|
1206
|
my ($line, $level, $indentation) = ($text, 0, ''); |
|
930
|
|
|
|
|
|
|
|
|
931
|
1216
|
100
|
|
|
|
2287
|
($level, $line, $indentation) = _get_indentation ($tags, $line) |
|
932
|
|
|
|
|
|
|
if $tags->{indented}{$block}; |
|
933
|
|
|
|
|
|
|
|
|
934
|
1216
|
|
|
|
|
3416
|
my $marker_removed = length ($line =~ s/$tags->{blocks}{$block}//); |
|
935
|
|
|
|
|
|
|
|
|
936
|
1216
|
100
|
|
|
|
2010
|
next unless $marker_removed; |
|
937
|
|
|
|
|
|
|
|
|
938
|
1944
|
|
|
|
|
3076
|
return new_block ($block, |
|
939
|
216
|
|
100
|
|
|
337
|
args => [grep {defined} $1, $2, $3, $4, $5, $6, $7, |
|
940
|
|
|
|
|
|
|
$8, $9], |
|
941
|
|
|
|
|
|
|
level => $level || 0, |
|
942
|
|
|
|
|
|
|
opts => $opts, |
|
943
|
|
|
|
|
|
|
text => $line, |
|
944
|
|
|
|
|
|
|
tags => $tags, |
|
945
|
|
|
|
|
|
|
); |
|
946
|
|
|
|
|
|
|
} |
|
947
|
|
|
|
|
|
|
} |
|
948
|
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
sub _nest_blocks |
|
950
|
|
|
|
|
|
|
{ |
|
951
|
54
|
|
|
54
|
|
1442
|
my $blocks = shift; |
|
952
|
54
|
100
|
|
|
|
128
|
return unless @$blocks; |
|
953
|
|
|
|
|
|
|
|
|
954
|
53
|
|
|
|
|
101
|
my @processed = shift @$blocks; |
|
955
|
|
|
|
|
|
|
|
|
956
|
53
|
|
|
|
|
88
|
for my $block (@$blocks) |
|
957
|
|
|
|
|
|
|
{ |
|
958
|
251
|
|
|
|
|
618
|
push @processed, $processed[-1]->nest( $block ); |
|
959
|
|
|
|
|
|
|
} |
|
960
|
|
|
|
|
|
|
|
|
961
|
53
|
|
|
|
|
353
|
return @processed; |
|
962
|
|
|
|
|
|
|
} |
|
963
|
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
sub _process_blocks |
|
965
|
|
|
|
|
|
|
{ |
|
966
|
51
|
|
|
51
|
|
160
|
my ($blocks, $tags, $opts) = @_; |
|
967
|
|
|
|
|
|
|
|
|
968
|
51
|
|
|
|
|
48
|
my @open; |
|
969
|
51
|
|
|
|
|
78
|
for my $block (@$blocks) |
|
970
|
|
|
|
|
|
|
{ |
|
971
|
205
|
100
|
|
|
|
415
|
push @open, _process_block ($block, $tags, $opts) |
|
972
|
|
|
|
|
|
|
unless $block->type() eq 'end'; |
|
973
|
|
|
|
|
|
|
} |
|
974
|
|
|
|
|
|
|
|
|
975
|
51
|
|
|
|
|
229
|
return join '', @open ; |
|
976
|
|
|
|
|
|
|
} |
|
977
|
|
|
|
|
|
|
|
|
978
|
|
|
|
|
|
|
sub _process_block |
|
979
|
|
|
|
|
|
|
{ |
|
980
|
135
|
|
|
135
|
|
148
|
my ($block, $tags, $opts) = @_; |
|
981
|
135
|
|
|
|
|
226
|
my $type = $block->type(); |
|
982
|
|
|
|
|
|
|
|
|
983
|
135
|
|
|
|
|
131
|
my ($start, $end, $start_line, $end_line, $between); |
|
984
|
135
|
50
|
|
|
|
255
|
if ($tags->{$type}) |
|
985
|
|
|
|
|
|
|
{ |
|
986
|
135
|
|
|
|
|
108
|
($start, $end, $start_line, $end_line, $between) = @{$tags->{$type}}; |
|
|
135
|
|
|
|
|
349
|
|
|
987
|
|
|
|
|
|
|
} |
|
988
|
|
|
|
|
|
|
else |
|
989
|
|
|
|
|
|
|
{ |
|
990
|
0
|
|
|
|
|
0
|
($start, $end, $start_line, $end_line) = ('', '', '', ''); |
|
991
|
|
|
|
|
|
|
} |
|
992
|
|
|
|
|
|
|
|
|
993
|
135
|
|
|
|
|
167
|
my @text = (); |
|
994
|
135
|
100
|
|
|
|
117
|
for my $line (grep (/^\Q$type\E$/, @{$tags->{unformatted_blocks}}) |
|
|
135
|
|
|
|
|
1952
|
|
|
995
|
|
|
|
|
|
|
? $block->text() |
|
996
|
|
|
|
|
|
|
: $block->formatted_text()) |
|
997
|
|
|
|
|
|
|
{ |
|
998
|
240
|
100
|
|
|
|
479
|
if (blessed $line) |
|
999
|
|
|
|
|
|
|
{ |
|
1000
|
18
|
|
33
|
|
|
45
|
my $prev_end = pop @text || (); |
|
1001
|
18
|
|
|
|
|
48
|
push @text, _process_block ($line, $tags, $opts), $prev_end; |
|
1002
|
18
|
|
|
|
|
30
|
next; |
|
1003
|
|
|
|
|
|
|
} |
|
1004
|
|
|
|
|
|
|
|
|
1005
|
222
|
|
|
|
|
170
|
my @triplets; |
|
1006
|
222
|
100
|
100
|
|
|
680
|
if ((ref ($start_line) || '') eq 'CODE') |
|
1007
|
|
|
|
|
|
|
{ |
|
1008
|
32
|
|
|
|
|
68
|
@triplets = $start_line->($line, $block->level(), |
|
1009
|
|
|
|
|
|
|
$block->shift_args(), $tags, $opts); |
|
1010
|
|
|
|
|
|
|
} |
|
1011
|
|
|
|
|
|
|
else |
|
1012
|
|
|
|
|
|
|
{ |
|
1013
|
190
|
|
|
|
|
274
|
@triplets = ($start_line, $line, $end_line); |
|
1014
|
|
|
|
|
|
|
} |
|
1015
|
222
|
|
|
|
|
423
|
push @text, @triplets; |
|
1016
|
|
|
|
|
|
|
} |
|
1017
|
|
|
|
|
|
|
|
|
1018
|
135
|
100
|
|
|
|
250
|
pop @text if $between; |
|
1019
|
135
|
|
|
|
|
438
|
return join '', $start, @text, $end; |
|
1020
|
|
|
|
|
|
|
} |
|
1021
|
|
|
|
|
|
|
|
|
1022
|
|
|
|
|
|
|
sub _get_indentation |
|
1023
|
|
|
|
|
|
|
{ |
|
1024
|
443
|
|
|
443
|
|
414
|
my ($tags, $text) = @_; |
|
1025
|
|
|
|
|
|
|
|
|
1026
|
443
|
100
|
|
|
|
2657
|
return 1, $text unless $text =~ s/($tags->{indent})//; |
|
1027
|
238
|
|
|
|
|
778
|
return length ($1) + 1, $text, $1; |
|
1028
|
|
|
|
|
|
|
} |
|
1029
|
|
|
|
|
|
|
|
|
1030
|
|
|
|
|
|
|
=head2 format_line |
|
1031
|
|
|
|
|
|
|
|
|
1032
|
|
|
|
|
|
|
$formatted = format_line ($raw, $tags, $opts); |
|
1033
|
|
|
|
|
|
|
|
|
1034
|
|
|
|
|
|
|
This function is never exported. It formats the phrase elements of a single |
|
1035
|
|
|
|
|
|
|
line of text (emphasised, strong, and links). |
|
1036
|
|
|
|
|
|
|
|
|
1037
|
|
|
|
|
|
|
This is only meant to be called from L and so |
|
1038
|
|
|
|
|
|
|
requires $tags and $opts to have all elements filled in. If you find a use for |
|
1039
|
|
|
|
|
|
|
it, please let me know and maybe I will have it default the missing elements as |
|
1040
|
|
|
|
|
|
|
C does. |
|
1041
|
|
|
|
|
|
|
|
|
1042
|
|
|
|
|
|
|
=cut |
|
1043
|
|
|
|
|
|
|
|
|
1044
|
|
|
|
|
|
|
sub format_line |
|
1045
|
|
|
|
|
|
|
{ |
|
1046
|
227
|
|
|
227
|
1
|
4045
|
my ($text, $tags, $opts) = @_; |
|
1047
|
|
|
|
|
|
|
|
|
1048
|
227
|
|
|
|
|
635
|
$text =~ s!$tags->{strong_tag}!$tags->{strong}->($1, $opts)!eg; |
|
|
5
|
|
|
|
|
12
|
|
|
1049
|
227
|
|
|
|
|
436
|
$text =~ s!$tags->{emphasized_tag}!$tags->{emphasized}->($1, $opts)!eg; |
|
|
6
|
|
|
|
|
18
|
|
|
1050
|
|
|
|
|
|
|
|
|
1051
|
227
|
50
|
66
|
|
|
646
|
$text = _find_links ($text, $tags, $opts) |
|
|
|
|
33
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
if $opts->{extended} |
|
1053
|
|
|
|
|
|
|
|| $opts->{absolute_links} |
|
1054
|
|
|
|
|
|
|
|| $opts->{implicit_links}; |
|
1055
|
|
|
|
|
|
|
|
|
1056
|
227
|
|
|
|
|
621
|
return $text; |
|
1057
|
|
|
|
|
|
|
} |
|
1058
|
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
sub _find_innermost_balanced_pair |
|
1060
|
|
|
|
|
|
|
{ |
|
1061
|
11
|
|
|
11
|
|
13
|
my ($text, $open, $close) = @_; |
|
1062
|
|
|
|
|
|
|
|
|
1063
|
11
|
|
|
|
|
16
|
my $start_pos = rindex $text, $open; |
|
1064
|
11
|
100
|
|
|
|
24
|
return if $start_pos == -1; |
|
1065
|
|
|
|
|
|
|
|
|
1066
|
7
|
|
|
|
|
9
|
my $end_pos = index $text, $close, $start_pos; |
|
1067
|
7
|
50
|
|
|
|
24
|
return if $end_pos == -1; |
|
1068
|
|
|
|
|
|
|
|
|
1069
|
7
|
|
|
|
|
7
|
my $open_length = length $open; |
|
1070
|
7
|
|
|
|
|
6
|
my $close_length = length $close; |
|
1071
|
7
|
|
|
|
|
8
|
my $close_pos = $end_pos + $close_length; |
|
1072
|
7
|
|
|
|
|
8
|
my $enclosed_length = $close_pos - $start_pos; |
|
1073
|
|
|
|
|
|
|
|
|
1074
|
7
|
|
|
|
|
11
|
my $enclosed_atom = substr $text, $start_pos, $enclosed_length; |
|
1075
|
7
|
|
|
|
|
27
|
return substr ($enclosed_atom, $open_length, 0 - $close_length), |
|
1076
|
|
|
|
|
|
|
substr ($text, 0, $start_pos), |
|
1077
|
|
|
|
|
|
|
substr ($text, $close_pos); |
|
1078
|
|
|
|
|
|
|
} |
|
1079
|
|
|
|
|
|
|
|
|
1080
|
|
|
|
|
|
|
sub _find_links |
|
1081
|
|
|
|
|
|
|
{ |
|
1082
|
227
|
|
|
227
|
|
236
|
my ($text, $tags, $opts) = @_; |
|
1083
|
|
|
|
|
|
|
|
|
1084
|
|
|
|
|
|
|
# Build Regexp |
|
1085
|
227
|
|
|
|
|
179
|
my @res; |
|
1086
|
|
|
|
|
|
|
|
|
1087
|
227
|
100
|
|
|
|
373
|
if ($opts->{absolute_links}) |
|
1088
|
|
|
|
|
|
|
{ |
|
1089
|
|
|
|
|
|
|
# URI |
|
1090
|
225
|
|
|
|
|
176
|
my $s; |
|
1091
|
225
|
|
66
|
|
|
451
|
$tags->{_schema_regex} ||= _make_schema_regex @{$tags->{schemas}}; |
|
|
47
|
|
|
|
|
135
|
|
|
1092
|
225
|
|
|
|
|
262
|
$s = $tags->{_schema_regex}; |
|
1093
|
225
|
|
|
|
|
1668
|
push @res, qr/\b$s:[$uricCheat][$uric]*/ |
|
1094
|
|
|
|
|
|
|
} |
|
1095
|
|
|
|
|
|
|
|
|
1096
|
227
|
100
|
|
|
|
442
|
if ($opts->{implicit_links}) |
|
1097
|
|
|
|
|
|
|
{ |
|
1098
|
|
|
|
|
|
|
# StudlyCaps |
|
1099
|
17
|
50
|
|
|
|
31
|
if ($tags->{implicit_link_delimiters}) |
|
1100
|
|
|
|
|
|
|
{ |
|
1101
|
17
|
|
|
|
|
49
|
push @res, qr/$tags->{implicit_link_delimiters}/; |
|
1102
|
|
|
|
|
|
|
} |
|
1103
|
|
|
|
|
|
|
else |
|
1104
|
|
|
|
|
|
|
{ |
|
1105
|
0
|
|
|
|
|
0
|
warnings::warnif ("Ignoring implicit_links option since implicit_link_delimiters is empty"); |
|
1106
|
|
|
|
|
|
|
} |
|
1107
|
|
|
|
|
|
|
} |
|
1108
|
|
|
|
|
|
|
|
|
1109
|
227
|
100
|
|
|
|
361
|
if ($opts->{extended}) |
|
1110
|
|
|
|
|
|
|
{ |
|
1111
|
|
|
|
|
|
|
# [[Wiki Page]] |
|
1112
|
226
|
100
|
|
|
|
588
|
if (!$tags->{extended_link_delimiters}) |
|
|
|
100
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
{ |
|
1114
|
3
|
|
|
|
|
82
|
warnings::warnif ("Ignoring extended option since extended_link_delimiters is empty"); |
|
1115
|
|
|
|
|
|
|
} |
|
1116
|
|
|
|
|
|
|
elsif (ref $tags->{extended_link_delimiters} eq "ARRAY") |
|
1117
|
|
|
|
|
|
|
{ |
|
1118
|
|
|
|
|
|
|
# Backwards compatibility for extended links. |
|
1119
|
|
|
|
|
|
|
# Bypasses the regex substitution used by absolute and implicit |
|
1120
|
|
|
|
|
|
|
# links. |
|
1121
|
4
|
|
|
|
|
4
|
my ($start, $end) = @{$tags->{extended_link_delimiters}}; |
|
|
4
|
|
|
|
|
10
|
|
|
1122
|
4
|
|
|
|
|
9
|
while (my @pieces = _find_innermost_balanced_pair ($text, $start, |
|
1123
|
|
|
|
|
|
|
$end)) |
|
1124
|
|
|
|
|
|
|
{ |
|
1125
|
7
|
50
|
|
|
|
9
|
my ($tag, $before, $after) = map { defined $_ ? $_ : '' } |
|
|
21
|
|
|
|
|
33
|
|
|
1126
|
|
|
|
|
|
|
@pieces; |
|
1127
|
7
|
|
50
|
|
|
14
|
my $extended = $tags->{link}->($tag, $opts, $tags) || ''; |
|
1128
|
7
|
|
|
|
|
44
|
$text = $before . $extended . $after; |
|
1129
|
|
|
|
|
|
|
} |
|
1130
|
|
|
|
|
|
|
} |
|
1131
|
|
|
|
|
|
|
else |
|
1132
|
|
|
|
|
|
|
{ |
|
1133
|
219
|
|
|
|
|
599
|
push @res, qr/$tags->{extended_link_delimiters}/; |
|
1134
|
|
|
|
|
|
|
} |
|
1135
|
|
|
|
|
|
|
} |
|
1136
|
|
|
|
|
|
|
|
|
1137
|
227
|
50
|
|
|
|
608
|
if (@res) |
|
1138
|
|
|
|
|
|
|
{ |
|
1139
|
227
|
|
|
|
|
370
|
my $re = join "|", @res; |
|
1140
|
227
|
|
|
|
|
2643
|
$text =~ s/$re/$tags->{link}->($&, $opts, $tags)/ge; |
|
|
30
|
|
|
|
|
107
|
|
|
1141
|
|
|
|
|
|
|
} |
|
1142
|
|
|
|
|
|
|
|
|
1143
|
227
|
|
|
|
|
547
|
return $text; |
|
1144
|
|
|
|
|
|
|
} |
|
1145
|
|
|
|
|
|
|
|
|
1146
|
|
|
|
|
|
|
=head1 Wiki Format |
|
1147
|
|
|
|
|
|
|
|
|
1148
|
|
|
|
|
|
|
Refer to L for |
|
1149
|
|
|
|
|
|
|
description of the default wiki format, as interpreted by this module. Any |
|
1150
|
|
|
|
|
|
|
discrepencies will be considered bugs in this module, with a few exceptions. |
|
1151
|
|
|
|
|
|
|
|
|
1152
|
|
|
|
|
|
|
=head2 Unimplemented Wiki Markup |
|
1153
|
|
|
|
|
|
|
|
|
1154
|
|
|
|
|
|
|
=over 4 |
|
1155
|
|
|
|
|
|
|
|
|
1156
|
|
|
|
|
|
|
=item Templates, Magic Words, and Wanted Links |
|
1157
|
|
|
|
|
|
|
|
|
1158
|
|
|
|
|
|
|
Templates, magic words, and the colorization of wanted links all require a back |
|
1159
|
|
|
|
|
|
|
end data store that can be consulted on the existance and content of named |
|
1160
|
|
|
|
|
|
|
pages. C has deliberately been constructed such that it |
|
1161
|
|
|
|
|
|
|
operates independantly from such a back end. For an interface to |
|
1162
|
|
|
|
|
|
|
C which implements these features, see |
|
1163
|
|
|
|
|
|
|
L. |
|
1164
|
|
|
|
|
|
|
|
|
1165
|
|
|
|
|
|
|
=item Tables |
|
1166
|
|
|
|
|
|
|
|
|
1167
|
|
|
|
|
|
|
This is on the TODO list. |
|
1168
|
|
|
|
|
|
|
|
|
1169
|
|
|
|
|
|
|
=back |
|
1170
|
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
=head1 EXPORT |
|
1172
|
|
|
|
|
|
|
|
|
1173
|
|
|
|
|
|
|
If you'd like to make your life more convenient, you can optionally import a |
|
1174
|
|
|
|
|
|
|
subroutine that already has default tags and options set up. This is |
|
1175
|
|
|
|
|
|
|
especially handy if you use a prefix: |
|
1176
|
|
|
|
|
|
|
|
|
1177
|
|
|
|
|
|
|
use Text::MediawikiFormat prefix => 'http://www.example.com/'; |
|
1178
|
|
|
|
|
|
|
wikiformat ('some text'); |
|
1179
|
|
|
|
|
|
|
|
|
1180
|
|
|
|
|
|
|
Tags are interpreted as default members of the $tags hash normally passed to |
|
1181
|
|
|
|
|
|
|
C, except for the five options (see above) and the C key, who's |
|
1182
|
|
|
|
|
|
|
value is interpreted as an alternate name for the imported function. |
|
1183
|
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
To use the C flag to control the name by which your code calls the imported |
|
1185
|
|
|
|
|
|
|
function, for example, |
|
1186
|
|
|
|
|
|
|
|
|
1187
|
|
|
|
|
|
|
use Text::MediawikiFormat as => 'formatTextWithWikiStyle'; |
|
1188
|
|
|
|
|
|
|
formatTextWithWikiStyle ('some text'); |
|
1189
|
|
|
|
|
|
|
|
|
1190
|
|
|
|
|
|
|
You might choose a better name, though. |
|
1191
|
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
The calling semantics are effectively the same as those of the C |
|
1193
|
|
|
|
|
|
|
function. Any additional tags or options to the imported function will |
|
1194
|
|
|
|
|
|
|
override the defaults. This code: |
|
1195
|
|
|
|
|
|
|
|
|
1196
|
|
|
|
|
|
|
use Text::MediawikiFormat as => 'wf', extended => 0; |
|
1197
|
|
|
|
|
|
|
wf ('some text', {}, {extended => 1}); |
|
1198
|
|
|
|
|
|
|
|
|
1199
|
|
|
|
|
|
|
enables extended links, after specifying that the default behavior should be |
|
1200
|
|
|
|
|
|
|
to disable them. |
|
1201
|
|
|
|
|
|
|
|
|
1202
|
|
|
|
|
|
|
=head1 GORY DETAILS |
|
1203
|
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
=head2 Tags |
|
1205
|
|
|
|
|
|
|
|
|
1206
|
|
|
|
|
|
|
There are two types of Wiki markup: phrase markup and blocks. Blocks include |
|
1207
|
|
|
|
|
|
|
lists, which are made up of lines and can also contain other lists. |
|
1208
|
|
|
|
|
|
|
|
|
1209
|
|
|
|
|
|
|
=head3 Phrase Markup |
|
1210
|
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
The are currently three types of wiki phrase markup. These are the |
|
1212
|
|
|
|
|
|
|
strong and emphasized markup and links. Links may additionally be of three |
|
1213
|
|
|
|
|
|
|
subtypes, extended, implicit, or absolute. |
|
1214
|
|
|
|
|
|
|
|
|
1215
|
|
|
|
|
|
|
You can change the regular expressions used to find strong and emphasized tags: |
|
1216
|
|
|
|
|
|
|
|
|
1217
|
|
|
|
|
|
|
%tags = ( |
|
1218
|
|
|
|
|
|
|
strong_tag => qr/\*([^*]+?)\*/, |
|
1219
|
|
|
|
|
|
|
emphasized_tag => qr|/([^/]+?)/|, |
|
1220
|
|
|
|
|
|
|
); |
|
1221
|
|
|
|
|
|
|
|
|
1222
|
|
|
|
|
|
|
$wikitext = 'this is *strong*, /emphasized/, and */em+strong/*'; |
|
1223
|
|
|
|
|
|
|
$htmltext = wikiformat ($wikitext, \%tags, {}); |
|
1224
|
|
|
|
|
|
|
|
|
1225
|
|
|
|
|
|
|
You can also change the regular expressions used to find links. The following |
|
1226
|
|
|
|
|
|
|
just sets them to their default states (but enables parsing of implicit links, |
|
1227
|
|
|
|
|
|
|
which is I the default): |
|
1228
|
|
|
|
|
|
|
|
|
1229
|
|
|
|
|
|
|
my $html = wikiformat |
|
1230
|
|
|
|
|
|
|
( |
|
1231
|
|
|
|
|
|
|
$raw, |
|
1232
|
|
|
|
|
|
|
{implicit_link_delimiters => qr!\b(?:[A-Z][a-z0-9]\w*){2,}!, |
|
1233
|
|
|
|
|
|
|
extended_link_delimiters => qr!\[(?:\[[^][]*\]|[^][]*)\]!, |
|
1234
|
|
|
|
|
|
|
}, |
|
1235
|
|
|
|
|
|
|
{implicit_links => 1} |
|
1236
|
|
|
|
|
|
|
); |
|
1237
|
|
|
|
|
|
|
|
|
1238
|
|
|
|
|
|
|
In addition, you may set the function references that format strong and |
|
1239
|
|
|
|
|
|
|
emphasized text and links. The strong and emphasized functions receive only |
|
1240
|
|
|
|
|
|
|
the text to be formatted as an argument and are expected to return the |
|
1241
|
|
|
|
|
|
|
formatted text. The link formatter also recieves references to the C<$tags> |
|
1242
|
|
|
|
|
|
|
and C<$opts> arrays. For example, the following sets the strong and |
|
1243
|
|
|
|
|
|
|
emphasized formatters to their default state while replacing the link formatter |
|
1244
|
|
|
|
|
|
|
with one which strips href information and returns only the title text: |
|
1245
|
|
|
|
|
|
|
|
|
1246
|
|
|
|
|
|
|
my $html = wikiformat |
|
1247
|
|
|
|
|
|
|
( |
|
1248
|
|
|
|
|
|
|
$raw, |
|
1249
|
|
|
|
|
|
|
{strong => sub {"$_[0]"}, |
|
1250
|
|
|
|
|
|
|
emphasized => sub {"$_[0]"}, |
|
1251
|
|
|
|
|
|
|
link => sub |
|
1252
|
|
|
|
|
|
|
{ |
|
1253
|
|
|
|
|
|
|
my ($tag, $opts, $tags) = @_; |
|
1254
|
|
|
|
|
|
|
if ($tag =~ s/^\[\[([^][]+)\]\]$/$1/) |
|
1255
|
|
|
|
|
|
|
{ |
|
1256
|
|
|
|
|
|
|
my ($page, $title) = split qr/\|/, $tag, 2; |
|
1257
|
|
|
|
|
|
|
return $title if $title; |
|
1258
|
|
|
|
|
|
|
return $page; |
|
1259
|
|
|
|
|
|
|
} |
|
1260
|
|
|
|
|
|
|
elsif ($tag =~ s/^\[([^][]+)\]$/$1/) |
|
1261
|
|
|
|
|
|
|
{ |
|
1262
|
|
|
|
|
|
|
my ($href, $title) = split qr/ /, $tag, 2; |
|
1263
|
|
|
|
|
|
|
return $title if $title; |
|
1264
|
|
|
|
|
|
|
return $href; |
|
1265
|
|
|
|
|
|
|
} |
|
1266
|
|
|
|
|
|
|
else |
|
1267
|
|
|
|
|
|
|
{ |
|
1268
|
|
|
|
|
|
|
return $tag; |
|
1269
|
|
|
|
|
|
|
} |
|
1270
|
|
|
|
|
|
|
}, |
|
1271
|
|
|
|
|
|
|
}, |
|
1272
|
|
|
|
|
|
|
); |
|
1273
|
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
=head3 Blocks |
|
1275
|
|
|
|
|
|
|
|
|
1276
|
|
|
|
|
|
|
The default block types are C, C, C, C, |
|
1277
|
|
|
|
|
|
|
C, C, C, and C |
|
1278
|
|
|
|
|
|
|
|
|
1279
|
|
|
|
|
|
|
Block entries in the tag hashes must contain array references. The first two |
|
1280
|
|
|
|
|
|
|
items are the tags used at the start and end of the block. The third and |
|
1281
|
|
|
|
|
|
|
fourth contain the tags used at the start and end of each line. Where there |
|
1282
|
|
|
|
|
|
|
needs to be more processing of individual lines, use a subref as the third |
|
1283
|
|
|
|
|
|
|
item. This is how the module processes ordered lines in HTML lists and |
|
1284
|
|
|
|
|
|
|
headers: |
|
1285
|
|
|
|
|
|
|
|
|
1286
|
|
|
|
|
|
|
my $html = wikiformat |
|
1287
|
|
|
|
|
|
|
( |
|
1288
|
|
|
|
|
|
|
$raw, |
|
1289
|
|
|
|
|
|
|
{ordered => ['', " \n", '', "\n"], |
|
1290
|
|
|
|
|
|
|
header => ['', "\n", \&_make_header], |
|
1291
|
|
|
|
|
|
|
}, |
|
1292
|
|
|
|
|
|
|
); |
|
1293
|
|
|
|
|
|
|
|
|
1294
|
|
|
|
|
|
|
The first argument to these subrefs is the post-processed text of the line |
|
1295
|
|
|
|
|
|
|
itself. (Processing removes the indentation and tokens used to mark this as a |
|
1296
|
|
|
|
|
|
|
list and checks the rest of the line for other line formattings.) The second |
|
1297
|
|
|
|
|
|
|
argument is the indentation level (see below). The subsequent arguments are |
|
1298
|
|
|
|
|
|
|
captured variables in the regular expression used to find this list type. The |
|
1299
|
|
|
|
|
|
|
regexp for headers is: |
|
1300
|
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
$html = wikiformat |
|
1302
|
|
|
|
|
|
|
( |
|
1303
|
|
|
|
|
|
|
$raw, |
|
1304
|
|
|
|
|
|
|
{blocks => {header => qr/^(=+)\s*(.+?)\s*\1$/}} |
|
1305
|
|
|
|
|
|
|
); |
|
1306
|
|
|
|
|
|
|
|
|
1307
|
|
|
|
|
|
|
The module processes indentation first, if applicable, and stores the |
|
1308
|
|
|
|
|
|
|
indentation level (the length of the indentation removed). |
|
1309
|
|
|
|
|
|
|
|
|
1310
|
|
|
|
|
|
|
Lists automatically start and end as necessary. |
|
1311
|
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
Because regular expressions could conceivably match more than one line, block |
|
1313
|
|
|
|
|
|
|
level markup is processed in a specific order. The C tag governs |
|
1314
|
|
|
|
|
|
|
this order. It contains a reference to an array of the names of the |
|
1315
|
|
|
|
|
|
|
appropriate blocks to process. If you add a block type, be sure to add an |
|
1316
|
|
|
|
|
|
|
entry for it in C: |
|
1317
|
|
|
|
|
|
|
|
|
1318
|
|
|
|
|
|
|
my $html = wikiformat |
|
1319
|
|
|
|
|
|
|
( |
|
1320
|
|
|
|
|
|
|
$raw, |
|
1321
|
|
|
|
|
|
|
{invisible => ['', '', '', ''], |
|
1322
|
|
|
|
|
|
|
blocks => {invisible => qr!^--(.*?)--$!}, |
|
1323
|
|
|
|
|
|
|
blockorder => [qw(code header line ordered |
|
1324
|
|
|
|
|
|
|
unordered definition invisible |
|
1325
|
|
|
|
|
|
|
paragraph_break paragraph)] |
|
1326
|
|
|
|
|
|
|
}, |
|
1327
|
|
|
|
|
|
|
}, |
|
1328
|
|
|
|
|
|
|
); |
|
1329
|
|
|
|
|
|
|
|
|
1330
|
|
|
|
|
|
|
=head3 Finding blocks |
|
1331
|
|
|
|
|
|
|
|
|
1332
|
|
|
|
|
|
|
As has already been mentioned in passing, C uses regular |
|
1333
|
|
|
|
|
|
|
expressions to find blocks. These are in the C<%tags> hash under the C |
|
1334
|
|
|
|
|
|
|
key. For example, to change the regular expression to find code block items, |
|
1335
|
|
|
|
|
|
|
use: |
|
1336
|
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
my $html = wikiformat ($raw, {blocks => {code => qr/^:\s+/}}); |
|
1338
|
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
This will require a leading colon to mark code lines (note that as writted |
|
1340
|
|
|
|
|
|
|
here, this would interfere with the default processing of definition lists). |
|
1341
|
|
|
|
|
|
|
|
|
1342
|
|
|
|
|
|
|
=head3 Finding Blocks in the Correct Order |
|
1343
|
|
|
|
|
|
|
|
|
1344
|
|
|
|
|
|
|
As intrepid bug reporter Tom Hukins pointed out in CPAN RT bug #671, the order |
|
1345
|
|
|
|
|
|
|
in which C searches for blocks varies by platform and |
|
1346
|
|
|
|
|
|
|
version of Perl. Because some block-finding regular expressions are more |
|
1347
|
|
|
|
|
|
|
specific than others, what you intend to be one type of block may turn into a |
|
1348
|
|
|
|
|
|
|
different list type. |
|
1349
|
|
|
|
|
|
|
|
|
1350
|
|
|
|
|
|
|
If you're adding new block types, be aware of this. The C entry in |
|
1351
|
|
|
|
|
|
|
C<%tags> exists to force C to apply its regexes from |
|
1352
|
|
|
|
|
|
|
most specific to least specific. It contains an array reference. By default, |
|
1353
|
|
|
|
|
|
|
it looks for ordered lists first, unordered lists second, and code references |
|
1354
|
|
|
|
|
|
|
at the end. |
|
1355
|
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
1357
|
|
|
|
|
|
|
|
|
1358
|
|
|
|
|
|
|
L |
|
1359
|
|
|
|
|
|
|
|
|
1360
|
|
|
|
|
|
|
=head1 SUPPORT |
|
1361
|
|
|
|
|
|
|
|
|
1362
|
|
|
|
|
|
|
You can find documentation for this module with the perldoc command. |
|
1363
|
|
|
|
|
|
|
|
|
1364
|
|
|
|
|
|
|
perldoc Text::MediawikiFormat |
|
1365
|
|
|
|
|
|
|
|
|
1366
|
|
|
|
|
|
|
You can also look for information at: |
|
1367
|
|
|
|
|
|
|
|
|
1368
|
|
|
|
|
|
|
=over 4 |
|
1369
|
|
|
|
|
|
|
|
|
1370
|
|
|
|
|
|
|
=item * AnnoCPAN: Annotated CPAN documentation |
|
1371
|
|
|
|
|
|
|
|
|
1372
|
|
|
|
|
|
|
L |
|
1373
|
|
|
|
|
|
|
|
|
1374
|
|
|
|
|
|
|
=item * CPAN Ratings |
|
1375
|
|
|
|
|
|
|
|
|
1376
|
|
|
|
|
|
|
L |
|
1377
|
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
=item * RT: CPAN's request tracker |
|
1379
|
|
|
|
|
|
|
|
|
1380
|
|
|
|
|
|
|
L |
|
1381
|
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
=item * Search CPAN |
|
1383
|
|
|
|
|
|
|
|
|
1384
|
|
|
|
|
|
|
L |
|
1385
|
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
=back |
|
1387
|
|
|
|
|
|
|
|
|
1388
|
|
|
|
|
|
|
=head1 AUTHOR |
|
1389
|
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
Derek Price C is the author. |
|
1391
|
|
|
|
|
|
|
|
|
1392
|
|
|
|
|
|
|
=head1 ACKNOWLEDGEMENTS |
|
1393
|
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
This module is derived from L, written by chromatic. |
|
1395
|
|
|
|
|
|
|
chromatic's original credits are below: |
|
1396
|
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
chromatic, C, with much input from the Jellybean team |
|
1398
|
|
|
|
|
|
|
(including Jonathan Paulett). Kate L Pugh has also provided several patches, |
|
1399
|
|
|
|
|
|
|
many failing tests, and is usually the driving force behind new features and |
|
1400
|
|
|
|
|
|
|
releases. If you think this module is worth buying me a beer, she deserves at |
|
1401
|
|
|
|
|
|
|
least half of it. |
|
1402
|
|
|
|
|
|
|
|
|
1403
|
|
|
|
|
|
|
Alex Vandiver added a nice patch and tests for extended links. |
|
1404
|
|
|
|
|
|
|
|
|
1405
|
|
|
|
|
|
|
Tony Bowden, Tom Hukins, and Andy H. all suggested useful features that are now |
|
1406
|
|
|
|
|
|
|
implemented. |
|
1407
|
|
|
|
|
|
|
|
|
1408
|
|
|
|
|
|
|
Sam Vilain, Chris Winters, Paul Schmidt, and Art Henry have all found and |
|
1409
|
|
|
|
|
|
|
reported silly bugs. |
|
1410
|
|
|
|
|
|
|
|
|
1411
|
|
|
|
|
|
|
Blame me for the implementation. |
|
1412
|
|
|
|
|
|
|
|
|
1413
|
|
|
|
|
|
|
=head1 BUGS |
|
1414
|
|
|
|
|
|
|
|
|
1415
|
|
|
|
|
|
|
The link checker in C may fail to detect existing links that do |
|
1416
|
|
|
|
|
|
|
not follow HTML, XML, or SGML style. They may die with some SGML styles too. |
|
1417
|
|
|
|
|
|
|
I. |
|
1418
|
|
|
|
|
|
|
|
|
1419
|
|
|
|
|
|
|
=head1 TODO |
|
1420
|
|
|
|
|
|
|
|
|
1421
|
|
|
|
|
|
|
=over 4 |
|
1422
|
|
|
|
|
|
|
|
|
1423
|
|
|
|
|
|
|
=item * Optimize C to work on a list of lines |
|
1424
|
|
|
|
|
|
|
|
|
1425
|
|
|
|
|
|
|
=back |
|
1426
|
|
|
|
|
|
|
|
|
1427
|
|
|
|
|
|
|
=head1 COPYRIGHT & LICENSE |
|
1428
|
|
|
|
|
|
|
|
|
1429
|
|
|
|
|
|
|
Copyright (c) 2006-2008 Derek R. Price, all rights reserved. |
|
1430
|
|
|
|
|
|
|
Copyright (c) 2002 - 2006, chromatic, all rights reserved. |
|
1431
|
|
|
|
|
|
|
|
|
1432
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
|
1433
|
|
|
|
|
|
|
under the same terms as Perl itself. |
|
1434
|
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
=cut |
|
1436
|
|
|
|
|
|
|
|
|
1437
|
|
|
|
|
|
|
1; # End of Text::MediaiwkiFormat |
|