File Coverage

blib/lib/MojoMojo/Formatter/Wiki.pm

Criterion	Covered	Total	%
statement	122	126	96.8
branch	32	38	84.2
condition	25	34	73.5
subroutine	18	18	100.0
pod	7	7	100.0
total	204	223	91.4

line	stmt	bran	cond	sub	pod	time	code
1							package MojoMojo::Formatter::Wiki;
2
3	37			37		52583	use parent qw/MojoMojo::Formatter/;
	37					314
	37					338
4
5	37			37		2663	use URI;
	37					7320
	37					809
6	37			37		186	use Scalar::Util qw/blessed/;
	37					72
	37					1831
7	37			37		11587	use MojoMojo::Formatter::TOC;
	37					119
	37					55267
8
9							=head1 NAME
10
11							MojoMojo::Formatter::Wiki - Handle interpage linking.
12
13							=head1 DESCRIPTION
14
15							This formatter handles intra-Wiki links specified between double square brackets
16							or parentheses: [[wiki link]] or ((another wiki link)). It will also indicate
17							missing links with a question mark and a link to the edit page. Links can be
18							implicit (like the two above), where the path is derived from the link text
19							by replacing spaces with underscores (<a href="wiki_link">wiki link</a>), or
20							explicit, where the path is specified before a '\|' sign:
21
22							[[/explicit/path\|Link text goes here]]
23
24							Note that external links have a different syntax: [Link text](http://foo.com).
25
26							=head1 METHODS
27
28							=head2 format_content_order
29
30							Format order can be 1-99. The Wiki formatter runs on 10.
31
32							=cut
33
34	620			620	1	1140	sub format_content_order { 10 }
35
36							## list of start-end delimiter pairs
37							my @explicit_delims = (qw{ \[\[ \]\] $\( $\) });
38							my $explicit_separator = '\\|';
39
40							my $wikiword_escape = qr{\\};
41
42							sub _explicit_start_delims {
43	74			74		272	my %delims = @explicit_delims;
44	74					372	return keys %delims;
45							}
46
47							sub _explicit_end_delims {
48	111			111		312	my %delims = @explicit_delims;
49	111					438	return values %delims;
50							}
51
52							sub _generate_explicit_start {
53	37			37		162	my $delims = join '\|', _explicit_start_delims();
54	37					806	return qr{(?: $delims )}x; # non-capturing match
55							}
56
57							sub _generate_explicit_end {
58	37			37		117	my $delims = join '\|', _explicit_end_delims();
59	37					530	return qr{(?: $delims )}x; # non-capturing match
60							}
61
62							sub _generate_explicit_path {
63							# non-greedily match characters that don't match the start-end and text delimiters
64	37			37		177	my $not_an_end_delimiter_or_separator = '(?:(?!' . (join '\|', _explicit_end_delims(), $explicit_separator) . ').)'; # produces (?: (?! ]] \| \)\) \| \\| ) .) # a character in a place where neither a ]], nor a )), nor a \| is
65	37					716	return qr{$not_an_end_delimiter_or_separator+?};
66							}
67
68							sub _generate_explicit_text {
69							# non-greedily match characters that don't match the start-end delimiters
70	37			37		107	my $not_an_end_delimiter = '(?:(?!' . ( join '\|', _explicit_end_delims() ) . ').)'; # produces (?: (?! ]] \| \)\) ) .) # a character in a place where neither a ]] nor a )) starts
71	37					506	return qr{$not_an_end_delimiter+?};
72							}
73
74							my $explicit_start = _generate_explicit_start();
75							my $explicit_end = _generate_explicit_end();
76							my $explicit_path = _generate_explicit_path();
77							my $explicit_text = _generate_explicit_text();
78
79
80							sub _generate_non_wikiword_check {
81							# FIXME: this evaluates incorrectly to a regexp that's clearly mistaken: (?x-ism:( ?<! [\[\[$\((?-xism:\$\/\?] ))
82							# we include '\/' to avoid wikiwords that are parts of urls
83							# but why the question mark ('\?') at the end?
84	37			37		110	my $non_wikiword_chars =
85							( join '', _explicit_start_delims() ) . $wikiword_escape . '\/' . '\?';
86	37					637	return qr{(?<! [$non_wikiword_chars])}x;
87							}
88
89							my $non_wikiword_check = _generate_non_wikiword_check();
90
91							=head2 strip_pre
92
93							Replace <pre ... with a placeholder
94
95							=cut
96
97							sub strip_pre {
98	159			159	1	383	my $content = shift;
99	159					367	my ( @parts, $res );
100	159					408	$res = '';
101	159					1082	while (
102							my ($part) =
103							$$content =~ m{
104							^(.+?)
105							<\spre\b[^>]>}sx
106							)
107							{
108							# $$content =~ s{^.+?<\spre\b[^>]>}{}sx;
109	17					125	$$content =~ s{^.+?<\spre(?:\s+lang=['"](.?)['"]")?>}{}sx;
110	17		100			95	my $lang = $1 \|\| '';
111	17					70	my ($inner) = $$content =~ m{^(.+?)<\s/pre\s>}sx;
112	17	50				56	unless ($inner) {
113	0					0	$res .= $part;
114	0					0	last;
115							}
116	17					40	push @parts, $inner;
117	17					57	$res .= $part . "<!--pre_placeholder::$lang-->";
118	17					154	$$content =~ s{^.+?<\s/pre\s>}{}sx;
119							}
120	159					520	$res .= $$content;
121	159					593	return $res, @parts;
122							}
123
124							=head2 reinsert_pre
125
126							Put pre and lang back into place.
127
128							=cut
129
130							sub reinsert_pre {
131	159			159	1	498	my ( $content, @parts ) = @_;
132	159					442	foreach my $part (@parts) {
133	17					159	$$content =~ s{<!--pre_placeholder::(.*?)-->}{<pre lang="$1">$part</pre>}sx;
134							}
135	159					763	return $$content;
136							}
137
138							=head2 format_content
139
140							Calls the formatter. Takes a ref to the content as well as the
141							context object.
142
143							=cut
144
145							# FIXME: should ACCEPT_CONTEXT?
146
147							sub format_content {
148	149			149	1	17013	my ( $class, $content, $c, $self ) = @_;
149
150							# Extract wikiwords, avoiding escaped and part of urls
151	149					354	my @parts;
152	149					583	( $$content, @parts ) = strip_pre($content);
153
154							# Do explicit links, e.g. [[ /path/to/page \| link text ]]
155	149					5083	$$content =~ s{
156							$non_wikiword_check
157							$explicit_start
158							\s*
159							($explicit_path)
160							\s*
161							(?:
162							$explicit_separator
163							\s*
164							($explicit_text)
165							\s*
166							)?
167							$explicit_end
168	109					19610	}{ $class->format_link($c, $1, $c->req->base, $2) }gex;
169	149					10885	$$content =~ s{
170							$non_wikiword_check
171							(
172							$explicit_start
173							\s*
174							$explicit_path
175							\s*
176							(?:
177							$explicit_separator
178							\s*
179							$explicit_text
180							\s*
181							)?
182							$explicit_end
183							)
184							}{ $1 }gx;
185
186							# Remove escapes on escaped wikiwords. The escape means
187							# that this wikiword is NOT a link to a wiki page.
188	149					1284	$$content =~ s{$wikiword_escape($explicit_start)}{$1}g;
189
190	149					596	$$content = reinsert_pre( $content, @parts );
191							}
192
193							=head2 format_link <c> <wikilink> <base> [<link_text>]
194
195							Format a wikilink as an HTML hyperlink with the given link_text. If the wikilink
196							doesn't exist, it will be rendered as a hyperlink to an .edit page ready to be
197							created.
198
199							Since there is no difference in syntax between new and existing links, some
200							abiguities my occur when it comes to characters that are invalid in URLs. For
201							example,
202
203							* [[say "NO" to #8]] should be rendered as C<< <a href="say_%22NO%22_to_%238">say "NO" to #8</a> >>
204							* [[100% match]] should be rendered as C<< <a href="100%25_match>100% match</a> >>, URL-escaping the '%'
205							* but what about a user pasting an existing link, C<[[say_%22NO%22_to_%238]]>? We shouldn't URL-escape the '%' or '#' here.
206							* for links with explicit link text, we should definitiely not URL-escape the link: C<[[say_%22NO%22_to_%238\|say "NO" to #8]]>
207
208							This is complicated by the fact that '#' can delimit the start of the anchor portion of a link.
209
210							* C<[[Mambo #5]]> - URL-escape '#' => Mambo_%235
211							* C<[[Mambo#origins]]> - do not URL-escape
212							* C<[[existing/link#Introduction\|See the Introduction]]> - definitely do not URL-escape
213
214							Since escaping is somewhat magic and therefore potentially counter-intuitive,
215							we will:
216							* only URL-escape '#' if it follows a whitespace directly
217							* always URL-escape '%' unless it is followed by two uppercase hex digits
218							* always escape other characters that are invalid in URLs
219
220							=cut
221
222							sub format_link {
223
224							#FIXME: why both base and $c?
225	171			171	1	13766	my ( $class, $c, $wikilink, $base, $link_text, $action) = @_;
226	171		33			1249	$base \|\|= $c->req->base;
227
228							# The following control structures are used to build the wikilink
229							# from the stashed path and $wikilink passed to this function.
230
231							# May as well smoke the page stash from MojoMojo.pm since we got it eh?
232	171					2247	my $stashed_path = $c->stash->{path};
233
234							# If the wikilink starts with a slash the pass it on through
235	171					11672	my $pass_wikilink_through;
236	171	100				808	if ( $wikilink =~ m{^/} ) {
		100
237	85					237	$pass_wikilink_through = 1;
238							}
239
240							# Make sure the $stashed_path starts with a bang, uh I mean slash.
241							elsif ( $stashed_path ) {
242	69	50				339	$stashed_path = '/' . $stashed_path if $stashed_path !~ m{^/};
243							}
244	17					27	else { $stashed_path = '/'; }
245
246							# Handle sibling case by making look it like the rest.
247	171	100				920	if ( my ($sibling) = $wikilink =~ m'^\.\./(.*)$' ) {
		100
248	3					13	my ($parent) = $stashed_path =~ m'(.)/.$';
249	3					9	$wikilink = $parent . '/' . $sibling;
250							}
251							elsif ( !$pass_wikilink_through ) {
252	83					272	$wikilink = $stashed_path . '/' . $wikilink;
253
254							# Old School Method:
255							# $wikilink = ( blessed $c->stash->{page} ? $c->stash->{page}->path : $c->stash->{page}->{path} ). '/' . $wikilink
256							# unless $wikilink =~ m'^(\.\.)?/';
257							}
258	171	50				650	$c = MojoMojo->context unless ref $c;
259
260							# keep the original wikilink for display, stripping leading slashes
261	171					370	my $orig_wikilink = $wikilink;
262	171	100				852	if ( $orig_wikilink =~ m\|^ \s* /+ \s* $\|x ) {
263	14					282	$orig_wikilink = '/';
264							}
265							else {
266	157					720	$orig_wikilink =~ s/.*\///;
267							}
268	171					419	my $fragment = '';
269	171					453	for ($wikilink) {
270	171					409	s/(?<!\s)#(.*)/$fragment = $1, ''/e; # trim the anchor (fragment) portion away, in preparation for the page search below, and save it in $fragment
	3					8
271	171					582	s/\s/_/g;
272
273							# MojoMojo doesn't support periods in wikilinks because they conflict with actions ('.edit', '.info' etc.);
274							# actions are a finite set apparently, but it's possible to add new actions from formatter plugins (e.g. Comment).
275							# At the same time, parent links (../sibling) or (../../nephew) should be left alone, but any other '.' should be replaced by '_'
276	171					411	s'^(\.\./)+'MOJOMOJO_RESERVED_TREE_CROSSING_LINK'g;
277	171					380	s/\./_/g;
278	171					373	s'MOJOMOJO_RESERVED_TREE_CROSSING_LINK'../'g;
279							# if there's no link text, URL-escape characters in the wikilink that are not valid in URLs
280	171	100	66			943	if (!defined $link_text or $link_text eq '') {
281	111					723	s/%(?![0-9A-F]{2}) # escape '%' unless it's followed by two uppercase hex digits
282							\| (?<=_)\# # escape '#' only if it directly follows a whitespace (which had been replaced by a '_')
283							\| [":<=>?{\|}] # escape all other characters that are invalid in URLs
284	6					35	/sprintf('%%%02X', ord($&))/egx; # all other characters in the 0x21..0x7E range are OK in URLs; see the conflicting guidelines at http://www.ietf.org/rfc/rfc1738.txt and http://labs.apache.org/webarch/uri/rfc/rfc3986.html#reserved
285							}
286							}
287							# if the fragment was not properly formatted as a fragment (per the rules explained in MojoMojo::Formatter::TOC::assembleAnchorName, i.e. i has an invalid character), convert it, unless it contains escaped characters already (.[0-9A-F]{2})
288	171	50				971	if(MojoMojo::Formatter::TOC->module_loaded){
289	171	100	100			646	$fragment = MojoMojo::Formatter::TOC::assembleAnchorName(undef, undef, undef, undef, $fragment)
			66
290							if $fragment ne '' and ($fragment =~ /[^A-Za-z0-9_:.-]/ or $fragment !~ /\.[0-9A-F]{2}/);
291							}
292	171		66			770	my $formatted = $link_text \|\| $class->expand_wikilink($orig_wikilink);
293
294							# convert relative paths to absolute paths
295	171	50	66			591	if (
		50	66
			66
296							$c->stash->{page}
297							&&
298
299							# drop spaces
300							ref $c->stash->{page} eq 'MojoMojo::Model::DBIC::Page' && $wikilink !~ m\|^/\|
301							)
302							{
303	0					0	$wikilink = URI->new_abs( $wikilink, $c->stash->{page}->path . "/" );
304							}
305							elsif ( $c->stash->{page_path} && $wikilink !~ m\|^/\| ) {
306	0					0	$wikilink = URI->new_abs( $wikilink, $c->stash->{page_path} . "/" );
307							}
308
309							# make sure that base URL has no trailing slash, since the page path will have a leading slash
310	171					29067	my $url = $base;
311	171					668	$url =~ s/[\/]+$//;
312
313							# remove http://host/ from url
314	171					1475	$url =~ s!^https?://[^/]+!!;
315
316							# use the normalized path string returned by path_pages:
317	171					2429	my ( $path_pages, $proto_pages ) = $c->model('DBIC::Page')->path_pages($wikilink);
318	171	100	100			1613	if ( defined $proto_pages && @$proto_pages ) {
319	63					1208	my $proto_page = pop @$proto_pages;
320	63					556	$url .= $proto_page->{path};
321	63	100				1943	if ( $action) {
322	6					24	$url .= ".$action" ;
323	6					51	return qq{<a class="existingWikiWord" href="$url">$formatted</a>};
324							}
325							else {
326	57					959	return qq{<span class="newWikiWord"><a title="}
327							. $c->loc('Not found. Click to create this page.')
328							. qq{" href="$url.edit">$formatted?</a></span>};
329							}
330							}
331							else {
332	108					2411	my $page = pop @$path_pages;
333	108					470	$url .= $page->path;
334	108	100				767	$url .= ".$action" if $action;
335	108	100				447	$url .= "#$fragment" if $fragment ne '';
336	108					808	return qq{<a class="existingWikiWord" href="$url">$formatted</a>};
337							}
338							}
339
340							=head2 expand_wikilink <wikilink>
341
342							Replace C<_> with spaces and unescape URL-encoded characters
343
344							=cut
345
346							sub expand_wikilink {
347	217			217	1	1174	my ( $class, $wikilink ) = @_;
348	217					671	for ($wikilink) {
349	217					647	s/\_/ /g;
350	217					569	s/%([0-9A-F]{2})/chr(hex($1))/eg;
	3					12
351							}
352	217					888	return $wikilink;
353							}
354
355							=head2 find_links <content> <page>
356
357							Find wiki links in content.
358
359							Return a listref of linked (existing) and wanted pages.
360
361							=cut
362
363							sub find_links {
364	10			10	1	1953	my ( $class, $content, $page ) = @_;
365	10					41	my @linked_pages;
366							my @wanted_pages;
367
368	10					0	my @parts;
369	10					38	( $$content, @parts ) = strip_pre($content);
370
371	10					330	my $explicit_regex =
372							qr/$non_wikiword_check$explicit_start \s* ($explicit_path) \s* (?: $explicit_separator \s* $explicit_text \s* )? $explicit_end/x;
373
374	10					173	while ( $$content =~ /$explicit_regex/g ) {
375	13					125	my $link = $1;
376	13					46	$link =~ s/\s/_/g;
377
378							# convert relative paths to absolute paths
379	13	100				52	if ( $link !~ m\|^/\| ) {
380	8		50			32	$link = URI->new_abs( $link, ( $page->path \|\| '' ) . "/" );
381							}
382
383							# use the normalized path string returned by path_pages:
384	13					5652	my ( $path_pages, $proto_pages ) =
385							$page->result_source->resultset->path_pages($link);
386	13	100	100			255	if ( defined $proto_pages && @$proto_pages ) {
387	5					137	push @wanted_pages, pop @$proto_pages;
388							}
389							else {
390	8					199	push @linked_pages, pop @$path_pages;
391							}
392							}
393	10					118	$$content = reinsert_pre( $content, @parts );
394	10					57	return ( \@linked_pages, \@wanted_pages );
395							}
396
397							=head1 SEE ALSO
398
399							L<MojoMojo>, L<Module::Pluggable::Ordered>
400
401							=head1 AUTHORS
402
403							Marcus Ramberg <mramberg@cpan.org>
404
405							=head1 LICENSE
406
407							This library is free software. You can redistribute it and/or modify
408							it under the same terms as Perl itself.
409
410							=cut
411
412							1;