File Coverage

blib/lib/MojoMojo/Formatter/Wiki.pm
Criterion Covered Total %
statement 122 126 96.8
branch 32 38 84.2
condition 25 34 73.5
subroutine 18 18 100.0
pod 7 7 100.0
total 204 223 91.4


line stmt bran cond sub pod time code
1             package MojoMojo::Formatter::Wiki;
2              
3 37     37   52583 use parent qw/MojoMojo::Formatter/;
  37         314  
  37         338  
4              
5 37     37   2663 use URI;
  37         7320  
  37         809  
6 37     37   186 use Scalar::Util qw/blessed/;
  37         72  
  37         1831  
7 37     37   11587 use MojoMojo::Formatter::TOC;
  37         119  
  37         55267  
8              
9             =head1 NAME
10              
11             MojoMojo::Formatter::Wiki - Handle interpage linking.
12              
13             =head1 DESCRIPTION
14              
15             This formatter handles intra-Wiki links specified between double square brackets
16             or parentheses: [[wiki link]] or ((another wiki link)). It will also indicate
17             missing links with a question mark and a link to the edit page. Links can be
18             implicit (like the two above), where the path is derived from the link text
19             by replacing spaces with underscores (<a href="wiki_link">wiki link</a>), or
20             explicit, where the path is specified before a '|' sign:
21              
22             [[/explicit/path|Link text goes here]]
23              
24             Note that external links have a different syntax: [Link text](http://foo.com).
25              
26             =head1 METHODS
27              
28             =head2 format_content_order
29              
30             Format order can be 1-99. The Wiki formatter runs on 10.
31              
32             =cut
33              
34 620     620 1 1140 sub format_content_order { 10 }
35              
36             ## list of start-end delimiter pairs
37             my @explicit_delims = (qw{ \[\[ \]\] \(\( \)\) });
38             my $explicit_separator = '\|';
39              
40             my $wikiword_escape = qr{\\};
41              
42             sub _explicit_start_delims {
43 74     74   272 my %delims = @explicit_delims;
44 74         372 return keys %delims;
45             }
46              
47             sub _explicit_end_delims {
48 111     111   312 my %delims = @explicit_delims;
49 111         438 return values %delims;
50             }
51              
52             sub _generate_explicit_start {
53 37     37   162 my $delims = join '|', _explicit_start_delims();
54 37         806 return qr{(?: $delims )}x; # non-capturing match
55             }
56              
57             sub _generate_explicit_end {
58 37     37   117 my $delims = join '|', _explicit_end_delims();
59 37         530 return qr{(?: $delims )}x; # non-capturing match
60             }
61              
62             sub _generate_explicit_path {
63             # non-greedily match characters that don't match the start-end and text delimiters
64 37     37   177 my $not_an_end_delimiter_or_separator = '(?:(?!' . (join '|', _explicit_end_delims(), $explicit_separator) . ').)'; # produces (?: (?! ]] | \)\) | \| ) .) # a character in a place where neither a ]], nor a )), nor a | is
65 37         716 return qr{$not_an_end_delimiter_or_separator+?};
66             }
67              
68             sub _generate_explicit_text {
69             # non-greedily match characters that don't match the start-end delimiters
70 37     37   107 my $not_an_end_delimiter = '(?:(?!' . ( join '|', _explicit_end_delims() ) . ').)'; # produces (?: (?! ]] | \)\) ) .) # a character in a place where neither a ]] nor a )) starts
71 37         506 return qr{$not_an_end_delimiter+?};
72             }
73              
74             my $explicit_start = _generate_explicit_start();
75             my $explicit_end = _generate_explicit_end();
76             my $explicit_path = _generate_explicit_path();
77             my $explicit_text = _generate_explicit_text();
78              
79              
80             sub _generate_non_wikiword_check {
81             # FIXME: this evaluates incorrectly to a regexp that's clearly mistaken: (?x-ism:( ?<! [\[\[\(\((?-xism:\\)\/\?] ))
82             # we include '\/' to avoid wikiwords that are parts of urls
83             # but why the question mark ('\?') at the end?
84 37     37   110 my $non_wikiword_chars =
85             ( join '', _explicit_start_delims() ) . $wikiword_escape . '\/' . '\?';
86 37         637 return qr{(?<! [$non_wikiword_chars])}x;
87             }
88              
89             my $non_wikiword_check = _generate_non_wikiword_check();
90              
91             =head2 strip_pre
92              
93             Replace <pre ... with a placeholder
94              
95             =cut
96              
97             sub strip_pre {
98 159     159 1 383 my $content = shift;
99 159         367 my ( @parts, $res );
100 159         408 $res = '';
101 159         1082 while (
102             my ($part) =
103             $$content =~ m{
104             ^(.+?)
105             <\s*pre\b[^>]*>}sx
106             )
107             {
108             # $$content =~ s{^.+?<\s*pre\b[^>]*>}{}sx;
109 17         125 $$content =~ s{^.+?<\s*pre(?:\s+lang=['"]*(.*?)['"]*")?>}{}sx;
110 17   100     95 my $lang = $1 || '';
111 17         70 my ($inner) = $$content =~ m{^(.+?)<\s*/pre\s*>}sx;
112 17 50       56 unless ($inner) {
113 0         0 $res .= $part;
114 0         0 last;
115             }
116 17         40 push @parts, $inner;
117 17         57 $res .= $part . "<!--pre_placeholder::$lang-->";
118 17         154 $$content =~ s{^.+?<\s*/pre\s*>}{}sx;
119             }
120 159         520 $res .= $$content;
121 159         593 return $res, @parts;
122             }
123              
124             =head2 reinsert_pre
125              
126             Put pre and lang back into place.
127              
128             =cut
129              
130             sub reinsert_pre {
131 159     159 1 498 my ( $content, @parts ) = @_;
132 159         442 foreach my $part (@parts) {
133 17         159 $$content =~ s{<!--pre_placeholder::(.*?)-->}{<pre lang="$1">$part</pre>}sx;
134             }
135 159         763 return $$content;
136             }
137              
138             =head2 format_content
139              
140             Calls the formatter. Takes a ref to the content as well as the
141             context object.
142              
143             =cut
144              
145             # FIXME: should ACCEPT_CONTEXT?
146              
147             sub format_content {
148 149     149 1 17013 my ( $class, $content, $c, $self ) = @_;
149              
150             # Extract wikiwords, avoiding escaped and part of urls
151 149         354 my @parts;
152 149         583 ( $$content, @parts ) = strip_pre($content);
153              
154             # Do explicit links, e.g. [[ /path/to/page | link text ]]
155 149         5083 $$content =~ s{
156             $non_wikiword_check
157             $explicit_start
158             \s*
159             ($explicit_path)
160             \s*
161             (?:
162             $explicit_separator
163             \s*
164             ($explicit_text)
165             \s*
166             )?
167             $explicit_end
168 109         19610 }{ $class->format_link($c, $1, $c->req->base, $2) }gex;
169 149         10885 $$content =~ s{
170             $non_wikiword_check
171             (
172             $explicit_start
173             \s*
174             $explicit_path
175             \s*
176             (?:
177             $explicit_separator
178             \s*
179             $explicit_text
180             \s*
181             )?
182             $explicit_end
183             )
184             }{ $1 }gx;
185              
186             # Remove escapes on escaped wikiwords. The escape means
187             # that this wikiword is NOT a link to a wiki page.
188 149         1284 $$content =~ s{$wikiword_escape($explicit_start)}{$1}g;
189              
190 149         596 $$content = reinsert_pre( $content, @parts );
191             }
192              
193             =head2 format_link <c> <wikilink> <base> [<link_text>]
194              
195             Format a wikilink as an HTML hyperlink with the given link_text. If the wikilink
196             doesn't exist, it will be rendered as a hyperlink to an .edit page ready to be
197             created.
198              
199             Since there is no difference in syntax between new and existing links, some
200             abiguities my occur when it comes to characters that are invalid in URLs. For
201             example,
202              
203             * [[say "NO" to #8]] should be rendered as C<< <a href="say_%22NO%22_to_%238">say "NO" to #8</a> >>
204             * [[100% match]] should be rendered as C<< <a href="100%25_match>100% match</a> >>, URL-escaping the '%'
205             * but what about a user pasting an existing link, C<[[say_%22NO%22_to_%238]]>? We shouldn't URL-escape the '%' or '#' here.
206             * for links with explicit link text, we should definitiely not URL-escape the link: C<[[say_%22NO%22_to_%238|say "NO" to #8]]>
207              
208             This is complicated by the fact that '#' can delimit the start of the anchor portion of a link.
209              
210             * C<[[Mambo #5]]> - URL-escape '#' => Mambo_%235
211             * C<[[Mambo#origins]]> - do not URL-escape
212             * C<[[existing/link#Introduction|See the Introduction]]> - definitely do not URL-escape
213              
214             Since escaping is somewhat magic and therefore potentially counter-intuitive,
215             we will:
216             * only URL-escape '#' if it follows a whitespace directly
217             * always URL-escape '%' unless it is followed by two uppercase hex digits
218             * always escape other characters that are invalid in URLs
219              
220             =cut
221              
222             sub format_link {
223              
224             #FIXME: why both base and $c?
225 171     171 1 13766 my ( $class, $c, $wikilink, $base, $link_text, $action) = @_;
226 171   33     1249 $base ||= $c->req->base;
227            
228             # The following control structures are used to build the wikilink
229             # from the stashed path and $wikilink passed to this function.
230            
231             # May as well smoke the page stash from MojoMojo.pm since we got it eh?
232 171         2247 my $stashed_path = $c->stash->{path};
233            
234             # If the wikilink starts with a slash the pass it on through
235 171         11672 my $pass_wikilink_through;
236 171 100       808 if ( $wikilink =~ m{^/} ) {
    100          
237 85         237 $pass_wikilink_through = 1;
238             }
239              
240             # Make sure the $stashed_path starts with a bang, uh I mean slash.
241             elsif ( $stashed_path ) {
242 69 50       339 $stashed_path = '/' . $stashed_path if $stashed_path !~ m{^/};
243             }
244 17         27 else { $stashed_path = '/'; }
245            
246             # Handle sibling case by making look it like the rest.
247 171 100       920 if ( my ($sibling) = $wikilink =~ m'^\.\./(.*)$' ) {
    100          
248 3         13 my ($parent) = $stashed_path =~ m'(.*)/.*$';
249 3         9 $wikilink = $parent . '/' . $sibling;
250             }
251             elsif ( !$pass_wikilink_through ) {
252 83         272 $wikilink = $stashed_path . '/' . $wikilink;
253            
254             # Old School Method:
255             # $wikilink = ( blessed $c->stash->{page} ? $c->stash->{page}->path : $c->stash->{page}->{path} ). '/' . $wikilink
256             # unless $wikilink =~ m'^(\.\.)?/';
257             }
258 171 50       650 $c = MojoMojo->context unless ref $c;
259              
260             # keep the original wikilink for display, stripping leading slashes
261 171         370 my $orig_wikilink = $wikilink;
262 171 100       852 if ( $orig_wikilink =~ m|^ \s* /+ \s* $|x ) {
263 14         282 $orig_wikilink = '/';
264             }
265             else {
266 157         720 $orig_wikilink =~ s/.*\///;
267             }
268 171         419 my $fragment = '';
269 171         453 for ($wikilink) {
270 171         409 s/(?<!\s)#(.*)/$fragment = $1, ''/e; # trim the anchor (fragment) portion away, in preparation for the page search below, and save it in $fragment
  3         8  
271 171         582 s/\s/_/g;
272              
273             # MojoMojo doesn't support periods in wikilinks because they conflict with actions ('.edit', '.info' etc.);
274             # actions are a finite set apparently, but it's possible to add new actions from formatter plugins (e.g. Comment).
275             # At the same time, parent links (../sibling) or (../../nephew) should be left alone, but any other '.' should be replaced by '_'
276 171         411 s'^(\.\./)+'MOJOMOJO_RESERVED_TREE_CROSSING_LINK'g;
277 171         380 s/\./_/g;
278 171         373 s'MOJOMOJO_RESERVED_TREE_CROSSING_LINK'../'g;
279             # if there's no link text, URL-escape characters in the wikilink that are not valid in URLs
280 171 100 66     943 if (!defined $link_text or $link_text eq '') {
281 111         723 s/%(?![0-9A-F]{2}) # escape '%' unless it's followed by two uppercase hex digits
282             | (?<=_)\# # escape '#' only if it directly follows a whitespace (which had been replaced by a '_')
283             | [":<=>?{|}] # escape all other characters that are invalid in URLs
284 6         35 /sprintf('%%%02X', ord($&))/egx; # all other characters in the 0x21..0x7E range are OK in URLs; see the conflicting guidelines at http://www.ietf.org/rfc/rfc1738.txt and http://labs.apache.org/webarch/uri/rfc/rfc3986.html#reserved
285             }
286             }
287             # if the fragment was not properly formatted as a fragment (per the rules explained in MojoMojo::Formatter::TOC::assembleAnchorName, i.e. i has an invalid character), convert it, unless it contains escaped characters already (.[0-9A-F]{2})
288 171 50       971 if(MojoMojo::Formatter::TOC->module_loaded){
289 171 100 100     646 $fragment = MojoMojo::Formatter::TOC::assembleAnchorName(undef, undef, undef, undef, $fragment)
      66        
290             if $fragment ne '' and ($fragment =~ /[^A-Za-z0-9_:.-]/ or $fragment !~ /\.[0-9A-F]{2}/);
291             }
292 171   66     770 my $formatted = $link_text || $class->expand_wikilink($orig_wikilink);
293              
294             # convert relative paths to absolute paths
295 171 50 66     591 if (
    50 66        
      66        
296             $c->stash->{page}
297             &&
298              
299             # drop spaces
300             ref $c->stash->{page} eq 'MojoMojo::Model::DBIC::Page' && $wikilink !~ m|^/|
301             )
302             {
303 0         0 $wikilink = URI->new_abs( $wikilink, $c->stash->{page}->path . "/" );
304             }
305             elsif ( $c->stash->{page_path} && $wikilink !~ m|^/| ) {
306 0         0 $wikilink = URI->new_abs( $wikilink, $c->stash->{page_path} . "/" );
307             }
308              
309             # make sure that base URL has no trailing slash, since the page path will have a leading slash
310 171         29067 my $url = $base;
311 171         668 $url =~ s/[\/]+$//;
312              
313             # remove http://host/ from url
314 171         1475 $url =~ s!^https?://[^/]+!!;
315              
316             # use the normalized path string returned by path_pages:
317 171         2429 my ( $path_pages, $proto_pages ) = $c->model('DBIC::Page')->path_pages($wikilink);
318 171 100 100     1613 if ( defined $proto_pages && @$proto_pages ) {
319 63         1208 my $proto_page = pop @$proto_pages;
320 63         556 $url .= $proto_page->{path};
321 63 100       1943 if ( $action) {
322 6         24 $url .= ".$action" ;
323 6         51 return qq{<a class="existingWikiWord" href="$url">$formatted</a>};
324             }
325             else {
326 57         959 return qq{<span class="newWikiWord"><a title="}
327             . $c->loc('Not found. Click to create this page.')
328             . qq{" href="$url.edit">$formatted?</a></span>};
329             }
330             }
331             else {
332 108         2411 my $page = pop @$path_pages;
333 108         470 $url .= $page->path;
334 108 100       767 $url .= ".$action" if $action;
335 108 100       447 $url .= "#$fragment" if $fragment ne '';
336 108         808 return qq{<a class="existingWikiWord" href="$url">$formatted</a>};
337             }
338             }
339              
340             =head2 expand_wikilink <wikilink>
341              
342             Replace C<_> with spaces and unescape URL-encoded characters
343              
344             =cut
345              
346             sub expand_wikilink {
347 217     217 1 1174 my ( $class, $wikilink ) = @_;
348 217         671 for ($wikilink) {
349 217         647 s/\_/ /g;
350 217         569 s/%([0-9A-F]{2})/chr(hex($1))/eg;
  3         12  
351             }
352 217         888 return $wikilink;
353             }
354              
355             =head2 find_links <content> <page>
356              
357             Find wiki links in content.
358              
359             Return a listref of linked (existing) and wanted pages.
360              
361             =cut
362              
363             sub find_links {
364 10     10 1 1953 my ( $class, $content, $page ) = @_;
365 10         41 my @linked_pages;
366             my @wanted_pages;
367              
368 10         0 my @parts;
369 10         38 ( $$content, @parts ) = strip_pre($content);
370              
371 10         330 my $explicit_regex =
372             qr/$non_wikiword_check$explicit_start \s* ($explicit_path) \s* (?: $explicit_separator \s* $explicit_text \s* )? $explicit_end/x;
373              
374 10         173 while ( $$content =~ /$explicit_regex/g ) {
375 13         125 my $link = $1;
376 13         46 $link =~ s/\s/_/g;
377              
378             # convert relative paths to absolute paths
379 13 100       52 if ( $link !~ m|^/| ) {
380 8   50     32 $link = URI->new_abs( $link, ( $page->path || '' ) . "/" );
381             }
382              
383             # use the normalized path string returned by path_pages:
384 13         5652 my ( $path_pages, $proto_pages ) =
385             $page->result_source->resultset->path_pages($link);
386 13 100 100     255 if ( defined $proto_pages && @$proto_pages ) {
387 5         137 push @wanted_pages, pop @$proto_pages;
388             }
389             else {
390 8         199 push @linked_pages, pop @$path_pages;
391             }
392             }
393 10         118 $$content = reinsert_pre( $content, @parts );
394 10         57 return ( \@linked_pages, \@wanted_pages );
395             }
396              
397             =head1 SEE ALSO
398              
399             L<MojoMojo>, L<Module::Pluggable::Ordered>
400              
401             =head1 AUTHORS
402              
403             Marcus Ramberg <mramberg@cpan.org>
404              
405             =head1 LICENSE
406              
407             This library is free software. You can redistribute it and/or modify
408             it under the same terms as Perl itself.
409              
410             =cut
411              
412             1;