File Coverage

blib/lib/Text/MediawikiFormat.pm
Criterion Covered Total %
statement 253 367 68.9
branch 106 194 54.6
condition 20 69 28.9
subroutine 37 42 88.1
pod 2 2 100.0
total 418 674 62.0


line stmt bran cond sub pod time code
1             package Text::MediawikiFormat;
2              
3 14     14   177210 use strict;
  14         21  
  14         515  
4 14     14   66 use warnings::register;
  14         17  
  14         2049  
5              
6             =head1 NAME
7              
8             Text::MediawikiFormat - Translate Wiki markup into other text formats
9              
10             =head1 VERSION
11              
12             Version 1.03
13              
14             =cut
15              
16             our $VERSION = '1.03';
17              
18             =head1 SYNOPSIS
19              
20             use Text::MediawikiFormat 'wikiformat';
21             my $html = wikiformat ($raw);
22             my $text = wikiformat ($raw, {}, {implicit_links => 1});
23              
24             =head1 DESCRIPTION
25              
26             L and its sister projects use the PHP Mediawiki to format
27             their pages. This module attempts to duplicate the Mediawiki formatting rules.
28             Those formatting rules can be simple and easy to use, while providing more
29             advanced options for the power user. They are also easy to translate into
30             other, more complicated markup languages with this module. It creates HTML by
31             default, but could produce valid POD, DocBook, XML, or any other format
32             imaginable.
33              
34             The most important function is C. It is
35             not exported by default, but will be exported as C if any
36             options at all are passed to the exporter, unless the name is overridden
37             explicitly. See L<"EXPORT"> for more information.
38              
39             It should be noted that this module is written as a drop in replacement for
40             L that expands on that modules functionality and provides
41             a default rule set that may be used to format text like the PHP Mediawiki. It
42             is also well to note early that if you just want a Mediawiki clone (you don't
43             need to customize it heavily and you want integration with a back end
44             database), you should look at L.
45              
46             =cut
47              
48 14     14   64 use Carp qw(carp confess croak);
  14         22  
  14         825  
49 14     14   17768 use CGI qw(:standard);
  14         169903  
  14         87  
50 14     14   36801 use Scalar::Util qw(blessed);
  14         24  
  14         1117  
51 14     14   4966 use Text::MediawikiFormat::Blocks;
  14         21  
  14         74  
52 14     14   7466 use URI;
  14         49477  
  14         429  
53 14     14   114 use URI::Escape qw(uri_escape uri_escape_utf8);
  14         19  
  14         875  
54              
55 14         1494 use vars qw($missing_html_packages %tags %opts %merge_matrix
56 14     14   60 $uric $uricCheat $uriCruft);
  14         15  
57              
58             BEGIN
59             {
60             # Try to load optional HTML packages, recording any errors.
61 14     14   24 eval {require HTML::Parser};
  14         8477  
62 14         65928 $missing_html_packages = $@;
63 14         27 eval {require HTML::Tagset};
  14         6734  
64 14         29661 $missing_html_packages .= $@;
65             }
66              
67              
68              
69             ###
70             ### Defaults
71             ###
72             %tags =
73             (
74             indent => qr/^(?:[:*#;]*)(?=[:*#;])/,
75             link => \&_make_html_link,
76             strong => sub {"$_[0]"},
77             emphasized => sub {"$_[0]"},
78             strong_tag => qr/'''(.+?)'''/,
79             emphasized_tag => qr/''(.+?)''/,
80              
81             code => ['
', "
\n", '', "\n"],
82             line => ['', '', '
', "\n"],
83             paragraph => ["

", "

\n", '', "\n", 1],
84             paragraph_break => ['', '', '', "\n"],
85             unordered => ["
    \n", "
\n", '
  • ', "
  • \n"],
    86             ordered => ["
      \n", "
    \n", '
  • ', "
  • \n"],
    87             definition => ["
    \n", "
    \n", \&_dl],
    88             header => ['', "\n", \&_make_header],
    89              
    90             blocks =>
    91             {
    92             code => qr/^ /,
    93             header => qr/^(=+)\s*(.+?)\s*\1$/,
    94             line => qr/^-{4,}$/,
    95             ordered => qr/^#\s*/,
    96             unordered => qr/^\*\s*/,
    97             definition => qr/^([;:])\s*/,
    98             paragraph => qr/^/,
    99             paragraph_break => qr/^\s*$/,
    100             },
    101              
    102             indented => {map {$_ => 1} qw(ordered unordered definition)},
    103             nests => {map {$_ => 1} qw(ordered unordered definition)},
    104             nests_anywhere => {map {$_ => 1} qw(nowiki)},
    105              
    106             blockorder => [qw(code header line ordered unordered definition
    107             paragraph_break paragraph)],
    108             implicit_link_delimiters
    109             => qr!\b(?:[A-Z][a-z0-9]\w*){2,}!,
    110             extended_link_delimiters
    111             => qr!\[(?:\[[^][]*\]|[^][]*)\]!,
    112              
    113             schemas => [qw(http https ftp mailto gopher)],
    114              
    115             unformatted_blocks => [qw(header nowiki pre)],
    116              
    117             allowed_tags => [#HTML
    118             qw(b big blockquote br caption center cite code dd
    119             div dl dt em font h1 h2 h3 h4 h5 h6 hr i li ol p
    120             pre rb rp rt ruby s samp small strike strong sub
    121             sup table td th tr tt u ul var),
    122             # Mediawiki Specific
    123             qw(nowiki),],
    124             allowed_attrs => [qw(title align lang dir width height bgcolor),
    125             qw(clear), # BR
    126             qw(noshade), # HR
    127             qw(cite), # BLOCKQUOTE, Q
    128             qw(size face color), # FONT
    129             # For various lists, mostly deprecated but safe
    130             qw(type start value compact),
    131             # Tables
    132             qw(summary width border frame rules cellspacing
    133             cellpadding valign char charoff colgroup col
    134             span abbr axis headers scope rowspan colspan),
    135             qw(id class name style), # For CSS
    136             ],
    137              
    138             _toc => [],
    139             );
    140              
    141             %opts =
    142             (
    143             extended => 1,
    144             implicit_links => 0,
    145             absolute_links => 1,
    146             prefix => '',
    147             process_html => 1,
    148             charset => 'utf-8',
    149             );
    150              
    151             # Make sure import's argument hash contains an `as' entry. `as' defaults to
    152             # `wikiformat' when none is given.
    153             sub _process_args
    154             {
    155 15     15   15 shift; # Class
    156 15 100       40 return as => shift if @_ == 1;
    157 14         66 return as => 'wikiformat', @_;
    158             }
    159              
    160             # Delete the options (prefix, extended, implicit_links, ...) from a hash,
    161             # returning a new hash with the deleted options.
    162             sub _extract_opts
    163             {
    164 15     15   16 my %newopts;
    165              
    166 15         27 for my $key (qw{prefix extended implicit_links absolute_links
    167             process_html debug})
    168             {
    169 90 100       202 if (defined (my $val = delete $_[0]->{$key}))
    170             {
    171 19         40 $newopts{$key} = $val;
    172             }
    173             }
    174              
    175 15         50 return \%newopts;
    176             }
    177              
    178             # Shamelessly ripped from Hash::Merge, which doesn't work in a threaded
    179             # environment with two threads trying to use different merge matrices.
    180             %merge_matrix =
    181             (
    182             SCALAR =>
    183             {
    184             SCALAR => sub {return $_[0]},
    185             ARRAY => sub {# Need to be able to replace scalar with array
    186             # for extended_link_delimiters (could be array
    187             # or regex).
    188             return $_[0];},
    189             HASH => sub {confess "Attempt to replace hash with scalar"
    190             if defined $_[0];
    191             return _clone ($_[1]);}
    192             },
    193              
    194             ARRAY =>
    195             {
    196             SCALAR => sub {# Need to be able to replace array with scalar
    197             # for extended_link_delimiters (could be array
    198             # or regex).
    199             return _clone ($_[0]);},
    200             ARRAY => sub {return _clone ($_[0]);},
    201             HASH => sub {confess "Attempt to replace hash with array"}
    202             },
    203              
    204             HASH =>
    205             {
    206             SCALAR => sub {confess "Attempt to replace scalar with hash"},
    207             ARRAY => sub {confess "Attempt to replace array with hash"},
    208             HASH => sub {_merge_hash_elements ($_[0], $_[1])}
    209             }
    210             );
    211             # Return arrays and a deep copy of hashes.
    212             sub _clone
    213             {
    214 2890     2890   2206 my ($obj) = @_;
    215 2890         1822 my $type;
    216 2890 50       5766 if (!defined $obj) { # Perl 5.005 compatibility
        100          
        100          
    217 0         0 $type = 'SCALAR';
    218             } elsif (ref $obj eq 'HASH') {
    219 231         213 $type = 'HASH';
    220             } elsif (ref $obj eq 'ARRAY') {
    221 913         735 $type = 'ARRAY';
    222             } else {
    223 1746         1402 $type = 'SCALAR';
    224             }
    225              
    226 2890 100       5330 return $obj if $type eq 'SCALAR';
    227 1144 100       2315 return $obj if $type eq 'ARRAY';
    228              
    229 231         169 my %copy;
    230 231         429 foreach my $key (keys %$obj)
    231             {
    232 800         941 $copy{$key} = _clone ($obj->{$key});
    233             }
    234 231         520 return \%copy;
    235             }
    236             # This does a straight merge of hashes, delegating the merge-specific
    237             # work to '_merge_hashes'.
    238             sub _merge_hash_elements
    239             {
    240 169     169   159 my ($left, $right) = @_;
    241 169 50 33     848 die "Arguments for _merge_hash_elements must be hash references" unless
    242             UNIVERSAL::isa ($left, 'HASH') && UNIVERSAL::isa ($right, 'HASH');
    243              
    244 169         140 my %newhash;
    245 169         383 foreach my $leftkey (keys %$left)
    246             {
    247 243 100       323 if (exists $right->{$leftkey})
    248             {
    249 235         345 $newhash{$leftkey} =
    250             _merge_hashes ($left->{$leftkey}, $right->{$leftkey});
    251             }
    252             else
    253             {
    254 8         11 $newhash{$leftkey} = _clone ($left->{$leftkey});
    255             }
    256             }
    257 169         563 foreach my $rightkey (keys %$right)
    258             {
    259 2266 100       4137 $newhash{$rightkey} = _clone ($right->{$rightkey})
    260             if !exists $left->{$rightkey};
    261             }
    262 169         425 return \%newhash;
    263             }
    264             sub _merge_hashes
    265             {
    266 371     371   5372 my ($left, $right) = @_;
    267            
    268             # if one argument or the other is undefined or empty, don't worry about
    269             # copying, just return the original.
    270 371 50       548 return $right unless defined $left;
    271 371 50       474 return $left unless defined $right;
    272              
    273             # For the general use of this function, we want to create duplicates
    274             # of all data that is merged.
    275            
    276 371         264 my ($lefttype, $righttype);
    277 371 100       667 if (ref $left eq 'HASH') {
        100          
    278 169         161 $lefttype = 'HASH';
    279             } elsif (ref $left eq 'ARRAY') {
    280 51         52 $lefttype = 'ARRAY';
    281             } else {
    282 151         136 $lefttype = 'SCALAR';
    283             }
    284            
    285 371 100       560 if (ref $right eq 'HASH') {
        100          
    286 169         144 $righttype = 'HASH';
    287             } elsif (ref $right eq 'ARRAY') {
    288 49         34 $righttype = 'ARRAY';
    289             } else {
    290 153         136 $righttype = 'SCALAR';
    291             }
    292            
    293 371         657 return $merge_matrix{$lefttype}->{$righttype} ($left, $right);
    294             }
    295              
    296             sub _require_html_packages
    297             {
    298 1 50   1   3 croak "$missing_html_packages\n"
    299             . "HTML::Parser & HTML::Tagset is required for process_html\n"
    300             if $missing_html_packages;
    301             }
    302              
    303             sub import
    304             {
    305 18 100   18   3001 return unless @_ > 1;
    306              
    307 15         23 my $class = shift;
    308 15         51 my %args = $class->_process_args (@_);
    309 15         36 my $name = delete $args{as};
    310              
    311 15         29 my $caller = caller();
    312 15         157 my $iopts = _merge_hashes _extract_opts (\%args), \%opts;
    313 15         54 my $itags = _merge_hashes \%args, \%tags;
    314              
    315 15 100       61 _require_html_packages
    316             if $iopts->{process_html};
    317              
    318             # Could verify ITAGS here via _check_blocks, but what if a user
    319             # wants to add a block to block_order that they intend to override
    320             # the implementation of with every call to format()?
    321              
    322 14     14   77 no strict 'refs';
      14         18  
      14         44436  
    323 15         14503 *{ $caller . "::" . $name } = sub
    324             {
    325 45     45   32812 Text::MediawikiFormat::_format ($itags, $iopts, @_);
    326             }
    327 15         53 }
    328              
    329              
    330              
    331             =head1 FUNCTIONS
    332              
    333             =head2 format
    334              
    335             C takes one required argument, the text to convert, and returns the
    336             converted text. It allows two optional arguments. The first is a reference to
    337             a hash of tags used to override the function's default behavior. Anything
    338             passed in here will override the default tags. The second argument is a hash
    339             reference of options. The options are currently:
    340              
    341             =over 4
    342              
    343             =item prefix
    344              
    345             The prefix of any links to wiki pages. In HTML mode, this is the path to the
    346             Wiki. The actual linked item itself will be appended to the prefix. This is
    347             useful to create full URIs:
    348              
    349             {prefix => 'http://example.com/wiki.pl?page='}
    350              
    351             =item extended
    352              
    353             A boolean flag, true by default, to let square brackets mark links.
    354             An optional title may occur after the Wiki targets, preceded by an open pipe.
    355             URI titles are separated from their title with a space. These are valid
    356             extended links:
    357              
    358             [[A wiki page|and the title to display]]
    359             [http://ximbiot.com URI title]
    360              
    361             Where the linking semantics of the destination format allow it, the result will
    362             display the title instead of the URI. In HTML terms, the title is the content
    363             of an C element (not the content of its C attribute).
    364              
    365             You can use delimiters other than single square brackets for marking extended
    366             links by passing a value for C in the C<%tags> hash
    367             when calling C.
    368              
    369             Note that if you disable this flag, you should probably enable
    370             C or there will be no automated way to link to other pages in
    371             your wiki.
    372              
    373             =item implicit_links
    374              
    375             A boolean flag, false by default, to create links from StudlyCapsStrings.
    376              
    377             =item absolute_links
    378              
    379             A boolean flag, true by default, which treats any links that are absolute URIs
    380             (such as C) specially. Any prefix will not apply.
    381             This should maybe be called implicit_absolute_links since the C
    382             option enables absolute links inside square brackets by default.
    383              
    384             A link is any text that starts with a known schema followed by a colon and one
    385             or more non-whitespace characters. This is a distinct subset of what L
    386             recognizes as a URI, but is a good first-order approximation. If you need to
    387             recognize more complex URIs, use the standard wiki formatting explained
    388             earlier.
    389              
    390             The recognized schemas are those defined in the C value in the C<%tags>
    391             hash. C defaults to C, C, C, C, and
    392             C.
    393              
    394             =item process_html
    395              
    396             This flag, true by default, causes the formatter to ignore block level wiki
    397             markup (code, ordered, unordered, etc...) when they occur on lines which also
    398             contain allowed block-level HTML tags (
    , 
      ,
        ,
    , etc...).
    399             Phrase level wiki markup (emphasis, strong, & links) is unaffected by this
    400             flag.
    401              
    402             =back
    403              
    404             =cut
    405              
    406             sub format
    407             {
    408 7     7 1 6160 _format (\%tags, \%opts, @_);
    409             }
    410              
    411             # Turn the contents after a ; or : into a dictionary list.
    412             # Using : without ; just looks like an indent.
    413             sub _dl
    414             {
    415             #my ($line, $indent, $lead) = @_;
    416 23     23   18 my ($term, $def);
    417              
    418 23 100       29 if ($_[2] eq ';')
    419             {
    420 11 100       32 if ($_[0] =~ /^(.*?)\s+:\s+(.*)$/)
    421             {
    422 6         8 $term = $1;
    423 6         7 $def = $2;
    424             }
    425             else
    426             {
    427 5         7 $term = $_[0];
    428             }
    429             }
    430             else
    431             {
    432 12         12 $def = $_[0];
    433             }
    434              
    435 23         13 my @retval;
    436 23 100       44 push @retval, "
    ", $term, "
    \n" if defined $term;
    437 23 100       40 push @retval, "
    ", $def, "
    \n" if defined $def;
    438 23         57 return @retval;
    439             }
    440              
    441             # Makes a regex out of the allowed schema array.
    442             sub _make_schema_regex
    443             {
    444 47     47   67 my $re = join "|", map {qr/\Q$_\E/} @_;
      231         1625  
    445 47         774 return qr/(?:$re)/;
    446             }
    447              
    448             $uric = $URI::uric;
    449             $uricCheat = $uric;
    450              
    451             # We need to avoid picking up 'HTTP::Request::Common' so we have a
    452             # subset of uric without a colon.
    453             $uricCheat =~ tr/://d;
    454              
    455             # Identifying characters often accidentally picked up trailing a URI.
    456             $uriCruft = q/]),.!'";}/;
    457              
    458             # escape a URI based on our charset.
    459             sub _escape_uri
    460             {
    461 28     28   39 my ($opts, $uri) = @_;
    462 28 50       62 confess "charset not initialized" unless $opts->{charset};
    463 28 50       203 return uri_escape_utf8 $uri if $opts->{charset} =~ /^utf-?8$/i;
    464 0         0 return uri_escape $uri;
    465             }
    466              
    467             # Turn [[Wiki Link|Title]], [URI Title], scheme:url, or StudlyCaps into links.
    468             sub _make_html_link
    469             {
    470 32     32   67 my ($tag, $opts, $tags) = @_;
    471              
    472 32         40 my ($class, $trailing) = ('', '');
    473 32         28 my ($href, $title);
    474 32 100       131 if ($tag =~ /^\[\[([^|#]*)(?:(#)([^|]*))?(?:(\|)(.*))?\]\]$/)
        100          
    475             {
    476             # Wiki link
    477 7 50       47 $href = $opts->{prefix} . _escape_uri $opts, $1 if $1;
    478 7 50       175 $href .= $2 . _escape_uri $opts, $3 if $2;
    479              
    480 7 100       22 if ($4)
    481             {
    482             # Title specified explicitly.
    483 3 50       7 if (length $5)
    484             {
    485 3         6 $title = $5;
    486             }
    487             else
    488             {
    489             # An empty title asks Mediawiki to strip any parens off the end
    490             # of the node name.
    491 0         0 $1 =~ /^([^(]*)(?:\s*\()?/;
    492 0         0 $title = $1;
    493             }
    494             }
    495             else
    496             {
    497             # Title defaults to the node name.
    498 4         8 $title = $1;
    499             }
    500             }
    501             elsif ($tag =~ /^\[(\S*)(?:(\s+)(.*))?\]$/)
    502             {
    503             # URI
    504 5         8 $href = $1;
    505 5 50       10 if ($2)
    506             {
    507 5         7 $title = $3;
    508             }
    509             else
    510             {
    511 0         0 $title = ++$opts->{_uri_refs};
    512             }
    513 5         7 $href =~ s/'/%27/g;
    514             }
    515             else
    516             {
    517             # Shouldn't be able to get here without either $opts->{absolute_links}
    518             # or $opts->{implicit_links};
    519 20   33     49 $tags->{_schema_regex} ||= _make_schema_regex @{$tags->{schemas}};
      0         0  
    520 20         24 my $s = $tags->{_schema_regex};
    521              
    522 20 100       489 if ($tag =~ /^$s:[$uricCheat][$uric]*$/)
    523             {
    524             # absolute link
    525 8         15 $href = $&;
    526 8 100       53 $trailing = $& if $href =~ s/[$uriCruft]$//;
    527 8         15 $title = $href;
    528             }
    529             else
    530             {
    531             # StudlyCaps
    532 12         31 $href = $opts->{prefix} . _escape_uri $opts, $tag;
    533 12         254 $title = $tag;
    534             }
    535             }
    536              
    537 32         209 return "$title$trailing";
    538             }
    539              
    540             # Store a TOC line for later.
    541             #
    542             # ASSUMPTIONS
    543             # $level >= 1
    544             sub _store_toc_line
    545             {
    546 12     12   18 my ($toc, $level, $title, $name) = @_;
    547              
    548             # TODO: Strip formatting from $title.
    549              
    550 12 100 100     50 if (@$toc && $level > $toc->[-1]->{level})
    551             {
    552             # Nest a sublevel.
    553 3 100       12 $toc->[-1]->{sublevel} = []
    554             unless exists $toc->[-1]->{sublevel};
    555 3         7 _store_toc_line ($toc->[-1]->{sublevel}, $level, $title, $name);
    556             }
    557             else
    558             {
    559 9         35 push @$toc, {level => $level, title => $title, name => $name};
    560             }
    561              
    562 12         17 return $level;
    563             }
    564              
    565             # Make header text, storing the line for the TOC.
    566             #
    567             # ASSUMPTIONS
    568             # $tags->{_toc} has been initialized to an array ref.
    569             sub _make_header
    570             {
    571 9     9   12 my $level = length $_[2];
    572 9         19 my $n = _escape_uri $_[-1], $_[3];
    573              
    574 9         190 _store_toc_line ($_[-2]->{_toc}, $level, $_[3], $n);
    575              
    576 9         34 return "",
    577             Text::MediawikiFormat::format_line ($_[3], @_[-2, -1]),
    578             "\n";
    579             }
    580              
    581             sub _format
    582             {
    583 50     50   89 my ($itags, $iopts, $text, $tags, $opts) = @_;
    584              
    585             # Overwriting the caller's hashes locally after merging its contents
    586             # is okay.
    587 50   100     221 $tags = _merge_hashes ($tags || {}, $itags);
    588 50   100     203 $opts = _merge_hashes ($opts || {}, $iopts);
    589              
    590 50 50       143 _require_html_packages
    591             if $opts->{process_html};
    592              
    593             # Always verify the blocks since the user may have slagged the
    594             # default hash on import.
    595 50         94 _check_blocks ($tags);
    596              
    597 50         144 my @blocks = _find_blocks ($text, $tags, $opts);
    598 50         121 @blocks = _nest_blocks (\@blocks);
    599 50         145 return _process_blocks (\@blocks, $tags, $opts);
    600             }
    601              
    602             sub _check_blocks
    603             {
    604 52     52   1484 my $tags = shift;
    605 52         50 my %blocks = %{$tags->{blocks}};
      52         261  
    606 52         75 delete @blocks{@{$tags->{blockorder}}};
      52         176  
    607              
    608 52 100       622 carp
    609             "No order specified for blocks: "
    610             . join (', ', keys %blocks)
    611             . ".\n"
    612             if keys %blocks;
    613             }
    614              
    615             # This sub recognizes three states:
    616             #
    617             # 1. undef
    618             # Normal wiki processing will be done on this line.
    619             #
    620             # 2. html
    621             # Links and phrasal processing will be done, but formatting should be
    622             # ignored.
    623             #
    624             # 3. nowiki
    625             # No further wiki processing should be done.
    626             #
    627             # Each state may override the lower ones if already set on a given line.
    628             #
    629             sub _append_processed_line
    630             {
    631 0     0   0 my ($parser, $text, $state) = @_;
    632 0         0 my $lines = $parser->{processed_lines};
    633              
    634 0   0     0 $state ||= '';
    635              
    636 0         0 my @newlines = split /(?<=\n)/, $text;
    637 0 0 0     0 if (@$lines && $lines->[-1]->[1] !~ /\n$/
          0        
          0        
    638             && # State not changing from or to 'nowiki'
    639             !($state ne $lines->[-1]->[0]
    640             && grep /^nowiki$/, $state, $lines->[-1]->[0]))
    641             {
    642 0         0 $lines->[-1]->[1] .= shift @newlines;
    643 0 0       0 $lines->[-1]->[0] = $state if $state eq 'html';
    644             }
    645              
    646 0         0 foreach my $line (@newlines)
    647             {
    648 0 0       0 $lines->[-1]->[2] = '1' if @$lines;
    649 0         0 push @$lines, [$state, $line];
    650             }
    651 0 0 0     0 $lines->[-1]->[2] = '1'
    652             if @$lines && $lines->[-1]->[1] =~ /\n$/;
    653             }
    654              
    655             sub _html_tag
    656             {
    657 0     0   0 my ($parser, $type, $tagname, $orig, $attr) = @_;
    658 0         0 my $tags = $parser->{tags};
    659              
    660             # $tagname may have been generated by an empty tag. If so, HTML::Parser
    661             # will sometimes include the trailing / in the tag name.
    662 0         0 my $isEmptyTag = $orig =~ m#/>$#;
    663 0 0       0 $tagname =~ s#/$## if $isEmptyTag;
    664              
    665 0 0       0 unless (grep /^\Q$tagname\E$/, @{$tags->{allowed_tags}})
      0         0  
    666             {
    667 0         0 _append_processed_line $parser, CGI::escapeHTML $orig;
    668 0         0 return;
    669             }
    670             # Any $tagname must now be in the allowed list, including .
    671              
    672 0         0 my $tagstack = $parser->{tag_stack};
    673 0 0       0 my $stacktop = @$tagstack ? $tagstack->[-1] : '';
    674              
    675             # First, process end tags, since they can change our state.
    676 0 0 0     0 if ($type eq 'E' && $stacktop eq $tagname)
    677             {
    678             # The closing tag is at the top of the stack, like it should be.
    679             # Pop it and append the close tag to the output.
    680 0         0 pop @$tagstack;
    681 0         0 my $newtag;
    682              
    683 0 0       0 if ($tagname eq 'nowiki')
    684             {
    685             # The browser doesn't need to see the tag.
    686 0         0 $newtag = '';
    687             }
    688             else
    689             {
    690 0         0 $newtag = "";
    691             }
    692              
    693             # Can't close a state into
     or  
    694 0         0 _append_processed_line $parser, $newtag, 'html';
    695 0         0 return;
    696             }
    697              
    698 0 0 0     0 if (@$tagstack && grep /^\Q$stacktop\E$/, qw{nowiki pre})
    699             {
    700             # Ignore all markup within
     or  tags. 
    701 0         0 _append_processed_line $parser, CGI::escapeHTML ($orig), 'nowiki';
    702 0         0 return;
    703             }
    704              
    705 0 0 0     0 if ($type eq 'E' && $HTML::Tagset::isPhraseMarkup{$tagname})
    706             # If we ask for artificial end element events for self-closed elements,
    707             # then we need to check $HTML::Tagset::emptyElement($tagname) here too.
    708             {
    709             # We didn't record phrase markup on the stack, so it's okay to just
    710             # let it close.
    711 0         0 _append_processed_line $parser, "";
    712 0         0 return;
    713             }
    714              
    715 0 0       0 if ($type eq 'E')
    716             {
    717             # We got a non-phrase end tag that wasn't on the stack. Escape it.
    718 0         0 _append_processed_line $parser, CGI::escapeHTML ($orig);
    719 0         0 return;
    720             }
    721              
    722              
    723             ###
    724             ### $type must now eq 'S'.
    725             ###
    726              
    727             # The browser doesn't need to see the tag.
    728 0 0       0 if ($tagname eq 'nowiki')
    729             {
    730 0 0       0 push @$tagstack, $tagname
    731             unless $isEmptyTag;
    732 0         0 return;
    733             }
    734              
    735             # Strip disallowed attributes.
    736 0         0 my $newtag = "<$tagname";
    737 0         0 foreach (@{$tags->{allowed_attrs}})
      0         0  
    738             {
    739 0 0       0 if (defined $attr->{$_})
    740             {
    741 0         0 $newtag .= " $_";
    742 0 0       0 unless ($attr->{$_}
    743             eq '__TEXT_MEDIAWIKIFORMAT_BOOL__')
    744             {
    745             # CGI::escapeHTML escapes single quotes.
    746 0         0 $attr->{$_} = CGI::escapeHTML $attr->{$_};
    747 0         0 $newtag .= "='" . $attr->{$_} . "'";
    748             }
    749             }
    750             }
    751 0 0 0     0 $newtag .= " /" if $HTML::Tagset::emptyElement{$tagname} || $isEmptyTag;
    752 0         0 $newtag .= ">";
    753              
    754             # If this isn't a block level element, there's no need to track nesting.
    755 0 0 0     0 if ($HTML::Tagset::isPhraseMarkup{$tagname}
    756             || $HTML::Tagset::emptyElement{$tagname})
    757             {
    758 0         0 _append_processed_line $parser, $newtag;
    759 0         0 return;
    760             }
    761              
    762             # Some elements can close implicitly
    763 0 0       0 if (@$tagstack)
    764             {
    765 0 0 0     0 if ($tagname eq $stacktop
        0          
    766             && $HTML::Tagset::optionalEndTag{$tagname})
    767             {
    768 0         0 pop @$tagstack;
    769             }
    770             elsif (!$HTML::Tagset::is_Possible_Strict_P_Content{$tagname})
    771             {
    772             # Need to check more than the last item for paragraphs.
    773 0         0 for (my $i = $#{$tagstack}; $i >= 0; $i--)
      0         0  
    774             {
    775 0         0 my $checking = $tagstack->[$i];
    776 0 0       0 last if grep /^\Q$checking\E$/,
    777             @HTML::Tagset::p_closure_barriers;
    778              
    779 0 0       0 if ($checking eq 'p')
    780             {
    781             # pop 'em all.
    782 0         0 splice @$tagstack, $i;
    783 0         0 last;
    784             }
    785             }
    786             }
    787             }
    788              
    789             # Could verify here that
  • and sub-elements only appear where
    790             # they belong.
    791              
    792             # Push the new tag onto the stack.
    793 0 0       0 push @$tagstack, $tagname
    794             unless $isEmptyTag;
    795              
    796 0 0       0 _append_processed_line $parser, $newtag,
    797             $tagname eq 'pre' ? 'nowiki' : 'html';
    798 0         0 return;
    799             }
    800              
    801             sub _html_comment
    802             {
    803 0     0   0 my ($parser, $text) = @_;
    804              
    805 0         0 _append_processed_line $parser, $text, 'nowiki';
    806             }
    807              
    808             sub _html_text
    809             {
    810 0     0   0 my ($parser, $dtext, $skipped_text, $is_cdata) = @_;
    811 0         0 my $tagstack = $parser->{tag_stack};
    812 0         0 my ($newtext, $newstate);
    813              
    814 0 0       0 warnings::warnif ("Got skipped_text: `$skipped_text'")
    815             if $skipped_text;
    816              
    817 0 0       0 if (@$tagstack)
    818             {
    819 0 0 0     0 if (grep /\Q$tagstack->[-1]\E/, qw{nowiki pre})
        0          
    820             {
    821 0         0 $newstate = 'nowiki'
    822             }
    823             elsif ($is_cdata && $HTML::Tagset::isCDATA_Parent{$tagstack->[-1]})
    824             {
    825             # If the user hadn't specifically allowed a tag which contains
    826             # CDATA, then it won't be on the tag stack.
    827 0         0 $newtext = $dtext;
    828             }
    829             }
    830              
    831 0 0       0 unless (defined $newtext)
    832             {
    833 0 0       0 $newtext = CGI::escapeHTML $dtext unless defined $newtext;
    834             # CGI::escapeHTML escapes single quotes so the text may be included
    835             # in attribute values, but we know we aren't processing an attribute
    836             # value here.
    837 0         0 $newtext =~ s/'/'/g;
    838             }
    839              
    840 0         0 _append_processed_line $parser, $newtext, $newstate;
    841             }
    842              
    843             sub _find_blocks_in_html
    844             {
    845 0     0   0 my ($text, $tags, $opts) = @_;
    846              
    847 0         0 my $parser = HTML::Parser->new
    848             (start_h => [\&_html_tag, 'self, "S", tagname, text, attr'],
    849             end_h => [\&_html_tag, 'self, "E", tagname, text'],
    850             comment_h => [\&_html_comment, 'self, text'],
    851             text_h => [\&_html_text, 'self, dtext, skipped_text, is_cdata'],
    852             marked_sections => 1,
    853             boolean_attribute_value => '__TEXT_MEDIAWIKIFORMAT_BOOL__',
    854             );
    855 0         0 $parser->{opts} = $opts;
    856 0         0 $parser->{tags} = $tags;
    857 0         0 $parser->{processed_lines} = [];
    858 0         0 $parser->{tag_stack} = [];
    859              
    860 0         0 my @blocks;
    861 0         0 my @lines = split /\r?\n/, $text;
    862 0         0 for (my $i = 0; $i < @lines; $i++)
    863             {
    864 0         0 $parser->parse ($lines[$i]);
    865 0         0 $parser->parse ("\n");
    866 0 0       0 $parser->eof if $i == $#lines;
    867              
    868             # @{$parser->{processed_lines}} may be empty when tags are
    869             # still open.
    870 0   0     0 while (@{$parser->{processed_lines}}
      0         0  
    871             && $parser->{processed_lines}->[0]->[2])
    872             {
    873 0         0 my ($type, $dtext)
    874 0         0 = @{shift @{$parser->{processed_lines}}};
      0         0  
    875              
    876 0         0 my $block;
    877 0 0       0 if ($type)
    878             {
    879 0         0 $block = _start_block ($dtext, $tags, $opts, $type);
    880             }
    881             else
    882             {
    883 0         0 chomp $dtext;
    884 0         0 $block = _start_block ($dtext, $tags, $opts);
    885             }
    886 0 0       0 push @blocks, $block if $block;
    887             }
    888             }
    889              
    890 0         0 return @blocks;
    891             }
    892              
    893             sub _find_blocks
    894             {
    895 50     50   66 my ($text, $tags, $opts) = @_;
    896 50         53 my @blocks;
    897              
    898 50 50       115 if ($opts->{process_html})
    899             {
    900 0         0 @blocks = _find_blocks_in_html $text, $tags, $opts;
    901             }
    902             else
    903             {
    904             # The original behavior.
    905 50         490 for my $line (split /\r?\n/, $text)
    906             {
    907 300         416 my $block = _start_block ($line, $tags, $opts);
    908 300 100       852 push @blocks, $block if $block;
    909             }
    910             }
    911              
    912 50         178 return @blocks;
    913             }
    914              
    915             sub _start_block
    916             {
    917 305     305   950 my ($text, $tags, $opts, $type) = @_;
    918              
    919 305 100       641 return new_block ('end', level => 0) unless $text;
    920 218 50       306 return new_block ($type,
    921             level => 0,
    922             opts => $opts,
    923             text => $text,
    924             tags => $tags,)
    925             if $type;
    926              
    927 218         180 for my $block (@{$tags->{blockorder}})
      218         347  
    928             {
    929 1216         1122 my ($line, $level, $indentation) = ($text, 0, '');
    930              
    931 1216 100       2227 ($level, $line, $indentation) = _get_indentation ($tags, $line)
    932             if $tags->{indented}{$block};
    933              
    934 1216         3646 my $marker_removed = length ($line =~ s/$tags->{blocks}{$block}//);
    935              
    936 1216 100       2020 next unless $marker_removed;
    937              
    938 1944         2897 return new_block ($block,
    939 216   100     324 args => [grep {defined} $1, $2, $3, $4, $5, $6, $7,
    940             $8, $9],
    941             level => $level || 0,
    942             opts => $opts,
    943             text => $line,
    944             tags => $tags,
    945             );
    946             }
    947             }
    948              
    949             sub _nest_blocks
    950             {
    951 54     54   1355 my $blocks = shift;
    952 54 100       115 return unless @$blocks;
    953              
    954 53         88 my @processed = shift @$blocks;
    955              
    956 53         88 for my $block (@$blocks)
    957             {
    958 251         767 push @processed, $processed[-1]->nest( $block );
    959             }
    960              
    961 53         305 return @processed;
    962             }
    963              
    964             sub _process_blocks
    965             {
    966 51     51   102 my ($blocks, $tags, $opts) = @_;
    967              
    968 51         55 my @open;
    969 51         67 for my $block (@$blocks)
    970             {
    971 205 100       391 push @open, _process_block ($block, $tags, $opts)
    972             unless $block->type() eq 'end';
    973             }
    974              
    975 51         226 return join '', @open ;
    976             }
    977              
    978             sub _process_block
    979             {
    980 135     135   145 my ($block, $tags, $opts) = @_;
    981 135         224 my $type = $block->type();
    982              
    983 135         115 my ($start, $end, $start_line, $end_line, $between);
    984 135 50       265 if ($tags->{$type})
    985             {
    986 135         117 ($start, $end, $start_line, $end_line, $between) = @{$tags->{$type}};
      135         312  
    987             }
    988             else
    989             {
    990 0         0 ($start, $end, $start_line, $end_line) = ('', '', '', '');
    991             }
    992              
    993 135         162 my @text = ();
    994 135 100       125 for my $line (grep (/^\Q$type\E$/, @{$tags->{unformatted_blocks}})
      135         1763  
    995             ? $block->text()
    996             : $block->formatted_text())
    997             {
    998 240 100       500 if (blessed $line)
    999             {
    1000 18   33     41 my $prev_end = pop @text || ();
    1001 18         45 push @text, _process_block ($line, $tags, $opts), $prev_end;
    1002 18         31 next;
    1003             }
    1004              
    1005 222         168 my @triplets;
    1006 222 100 100     671 if ((ref ($start_line) || '') eq 'CODE')
    1007             {
    1008 32         61 @triplets = $start_line->($line, $block->level(),
    1009             $block->shift_args(), $tags, $opts);
    1010             }
    1011             else
    1012             {
    1013 190         270 @triplets = ($start_line, $line, $end_line);
    1014             }
    1015 222         416 push @text, @triplets;
    1016             }
    1017              
    1018 135 100       247 pop @text if $between;
    1019 135         433 return join '', $start, @text, $end;
    1020             }
    1021              
    1022             sub _get_indentation
    1023             {
    1024 443     443   394 my ($tags, $text) = @_;
    1025              
    1026 443 100       2472 return 1, $text unless $text =~ s/($tags->{indent})//;
    1027 238         715 return length ($1) + 1, $text, $1;
    1028             }
    1029              
    1030             =head2 format_line
    1031              
    1032             $formatted = format_line ($raw, $tags, $opts);
    1033              
    1034             This function is never exported. It formats the phrase elements of a single
    1035             line of text (emphasised, strong, and links).
    1036              
    1037             This is only meant to be called from L and so
    1038             requires $tags and $opts to have all elements filled in. If you find a use for
    1039             it, please let me know and maybe I will have it default the missing elements as
    1040             C does.
    1041              
    1042             =cut
    1043              
    1044             sub format_line
    1045             {
    1046 227     227 1 2336 my ($text, $tags, $opts) = @_;
    1047              
    1048 227         622 $text =~ s!$tags->{strong_tag}!$tags->{strong}->($1, $opts)!eg;
      5         12  
    1049 227         418 $text =~ s!$tags->{emphasized_tag}!$tags->{emphasized}->($1, $opts)!eg;
      6         11  
    1050              
    1051 227 50 66     604 $text = _find_links ($text, $tags, $opts)
          33        
    1052             if $opts->{extended}
    1053             || $opts->{absolute_links}
    1054             || $opts->{implicit_links};
    1055              
    1056 227         579 return $text;
    1057             }
    1058              
    1059             sub _find_innermost_balanced_pair
    1060             {
    1061 11     11   11 my ($text, $open, $close) = @_;
    1062              
    1063 11         18 my $start_pos = rindex $text, $open;
    1064 11 100       59 return if $start_pos == -1;
    1065              
    1066 7         10 my $end_pos = index $text, $close, $start_pos;
    1067 7 50       210 return if $end_pos == -1;
    1068              
    1069 7         8 my $open_length = length $open;
    1070 7         9 my $close_length = length $close;
    1071 7         8 my $close_pos = $end_pos + $close_length;
    1072 7         10 my $enclosed_length = $close_pos - $start_pos;
    1073              
    1074 7         14 my $enclosed_atom = substr $text, $start_pos, $enclosed_length;
    1075 7         33 return substr ($enclosed_atom, $open_length, 0 - $close_length),
    1076             substr ($text, 0, $start_pos),
    1077             substr ($text, $close_pos);
    1078             }
    1079              
    1080             sub _find_links
    1081             {
    1082 227     227   223 my ($text, $tags, $opts) = @_;
    1083              
    1084             # Build Regexp
    1085 227         172 my @res;
    1086              
    1087 227 100       367 if ($opts->{absolute_links})
    1088             {
    1089             # URI
    1090 225         149 my $s;
    1091 225   66     412 $tags->{_schema_regex} ||= _make_schema_regex @{$tags->{schemas}};
      47         129  
    1092 225         226 $s = $tags->{_schema_regex};
    1093 225         1585 push @res, qr/\b$s:[$uricCheat][$uric]*/
    1094             }
    1095              
    1096 227 100       433 if ($opts->{implicit_links})
    1097             {
    1098             # StudlyCaps
    1099 17 50       30 if ($tags->{implicit_link_delimiters})
    1100             {
    1101 17         45 push @res, qr/$tags->{implicit_link_delimiters}/;
    1102             }
    1103             else
    1104             {
    1105 0         0 warnings::warnif ("Ignoring implicit_links option since implicit_link_delimiters is empty");
    1106             }
    1107             }
    1108              
    1109 227 100       353 if ($opts->{extended})
    1110             {
    1111             # [[Wiki Page]]
    1112 226 100       574 if (!$tags->{extended_link_delimiters})
        100          
    1113             {
    1114 3         61 warnings::warnif ("Ignoring extended option since extended_link_delimiters is empty");
    1115             }
    1116             elsif (ref $tags->{extended_link_delimiters} eq "ARRAY")
    1117             {
    1118             # Backwards compatibility for extended links.
    1119             # Bypasses the regex substitution used by absolute and implicit
    1120             # links.
    1121 4         4 my ($start, $end) = @{$tags->{extended_link_delimiters}};
      4         9  
    1122 4         11 while (my @pieces = _find_innermost_balanced_pair ($text, $start,
    1123             $end))
    1124             {
    1125 7 50       12 my ($tag, $before, $after) = map { defined $_ ? $_ : '' }
      21         36  
    1126             @pieces;
    1127 7   50     29 my $extended = $tags->{link}->($tag, $opts, $tags) || '';
    1128 7         63 $text = $before . $extended . $after;
    1129             }
    1130             }
    1131             else
    1132             {
    1133 219         548 push @res, qr/$tags->{extended_link_delimiters}/;
    1134             }
    1135             }
    1136              
    1137 227 50       586 if (@res)
    1138             {
    1139 227         355 my $re = join "|", @res;
    1140 227         2484 $text =~ s/$re/$tags->{link}->($&, $opts, $tags)/ge;
      30         98  
    1141             }
    1142              
    1143 227         533 return $text;
    1144             }
    1145              
    1146             =head1 Wiki Format
    1147              
    1148             Refer to L for
    1149             description of the default wiki format, as interpreted by this module. Any
    1150             discrepencies will be considered bugs in this module, with a few exceptions.
    1151              
    1152             =head2 Unimplemented Wiki Markup
    1153              
    1154             =over 4
    1155              
    1156             =item Templates, Magic Words, and Wanted Links
    1157              
    1158             Templates, magic words, and the colorization of wanted links all require a back
    1159             end data store that can be consulted on the existance and content of named
    1160             pages. C has deliberately been constructed such that it
    1161             operates independantly from such a back end. For an interface to
    1162             C which implements these features, see
    1163             L.
    1164              
    1165             =item Tables
    1166              
    1167             This is on the TODO list.
    1168              
    1169             =back
    1170              
    1171             =head1 EXPORT
    1172              
    1173             If you'd like to make your life more convenient, you can optionally import a
    1174             subroutine that already has default tags and options set up. This is
    1175             especially handy if you use a prefix:
    1176              
    1177             use Text::MediawikiFormat prefix => 'http://www.example.com/';
    1178             wikiformat ('some text');
    1179              
    1180             Tags are interpreted as default members of the $tags hash normally passed to
    1181             C, except for the five options (see above) and the C key, who's
    1182             value is interpreted as an alternate name for the imported function.
    1183              
    1184             To use the C flag to control the name by which your code calls the imported
    1185             function, for example,
    1186              
    1187             use Text::MediawikiFormat as => 'formatTextWithWikiStyle';
    1188             formatTextWithWikiStyle ('some text');
    1189              
    1190             You might choose a better name, though.
    1191              
    1192             The calling semantics are effectively the same as those of the C
    1193             function. Any additional tags or options to the imported function will
    1194             override the defaults. This code:
    1195              
    1196             use Text::MediawikiFormat as => 'wf', extended => 0;
    1197             wf ('some text', {}, {extended => 1});
    1198              
    1199             enables extended links, after specifying that the default behavior should be
    1200             to disable them.
    1201              
    1202             =head1 GORY DETAILS
    1203              
    1204             =head2 Tags
    1205              
    1206             There are two types of Wiki markup: phrase markup and blocks. Blocks include
    1207             lists, which are made up of lines and can also contain other lists.
    1208              
    1209             =head3 Phrase Markup
    1210              
    1211             The are currently three types of wiki phrase markup. These are the
    1212             strong and emphasized markup and links. Links may additionally be of three
    1213             subtypes, extended, implicit, or absolute.
    1214              
    1215             You can change the regular expressions used to find strong and emphasized tags:
    1216              
    1217             %tags = (
    1218             strong_tag => qr/\*([^*]+?)\*/,
    1219             emphasized_tag => qr|/([^/]+?)/|,
    1220             );
    1221              
    1222             $wikitext = 'this is *strong*, /emphasized/, and */em+strong/*';
    1223             $htmltext = wikiformat ($wikitext, \%tags, {});
    1224              
    1225             You can also change the regular expressions used to find links. The following
    1226             just sets them to their default states (but enables parsing of implicit links,
    1227             which is I the default):
    1228              
    1229             my $html = wikiformat
    1230             (
    1231             $raw,
    1232             {implicit_link_delimiters => qr!\b(?:[A-Z][a-z0-9]\w*){2,}!,
    1233             extended_link_delimiters => qr!\[(?:\[[^][]*\]|[^][]*)\]!,
    1234             },
    1235             {implicit_links => 1}
    1236             );
    1237              
    1238             In addition, you may set the function references that format strong and
    1239             emphasized text and links. The strong and emphasized functions receive only
    1240             the text to be formatted as an argument and are expected to return the
    1241             formatted text. The link formatter also recieves references to the C<$tags>
    1242             and C<$opts> arrays. For example, the following sets the strong and
    1243             emphasized formatters to their default state while replacing the link formatter
    1244             with one which strips href information and returns only the title text:
    1245              
    1246             my $html = wikiformat
    1247             (
    1248             $raw,
    1249             {strong => sub {"$_[0]"},
    1250             emphasized => sub {"$_[0]"},
    1251             link => sub
    1252             {
    1253             my ($tag, $opts, $tags) = @_;
    1254             if ($tag =~ s/^\[\[([^][]+)\]\]$/$1/)
    1255             {
    1256             my ($page, $title) = split qr/\|/, $tag, 2;
    1257             return $title if $title;
    1258             return $page;
    1259             }
    1260             elsif ($tag =~ s/^\[([^][]+)\]$/$1/)
    1261             {
    1262             my ($href, $title) = split qr/ /, $tag, 2;
    1263             return $title if $title;
    1264             return $href;
    1265             }
    1266             else
    1267             {
    1268             return $tag;
    1269             }
    1270             },
    1271             },
    1272             );
    1273              
    1274             =head3 Blocks
    1275              
    1276             The default block types are C, C, C, C,
    1277             C, C, C, and C
    .
    1278              
    1279             Block entries in the tag hashes must contain array references. The first two
    1280             items are the tags used at the start and end of the block. The third and
    1281             fourth contain the tags used at the start and end of each line. Where there
    1282             needs to be more processing of individual lines, use a subref as the third
    1283             item. This is how the module processes ordered lines in HTML lists and
    1284             headers:
    1285              
    1286             my $html = wikiformat
    1287             (
    1288             $raw,
    1289             {ordered => ['
      ', "
    \n", '
  • ', "
  • \n"],
  • 1290             header => ['', "\n", \&_make_header],
    1291             },
    1292             );
    1293              
    1294             The first argument to these subrefs is the post-processed text of the line
    1295             itself. (Processing removes the indentation and tokens used to mark this as a
    1296             list and checks the rest of the line for other line formattings.) The second
    1297             argument is the indentation level (see below). The subsequent arguments are
    1298             captured variables in the regular expression used to find this list type. The
    1299             regexp for headers is:
    1300              
    1301             $html = wikiformat
    1302             (
    1303             $raw,
    1304             {blocks => {header => qr/^(=+)\s*(.+?)\s*\1$/}}
    1305             );
    1306              
    1307             The module processes indentation first, if applicable, and stores the
    1308             indentation level (the length of the indentation removed).
    1309              
    1310             Lists automatically start and end as necessary.
    1311              
    1312             Because regular expressions could conceivably match more than one line, block
    1313             level markup is processed in a specific order. The C tag governs
    1314             this order. It contains a reference to an array of the names of the
    1315             appropriate blocks to process. If you add a block type, be sure to add an
    1316             entry for it in C:
    1317              
    1318             my $html = wikiformat
    1319             (
    1320             $raw,
    1321             {invisible => ['', '', '', ''],
    1322             blocks => {invisible => qr!^--(.*?)--$!},
    1323             blockorder => [qw(code header line ordered
    1324             unordered definition invisible
    1325             paragraph_break paragraph)]
    1326             },
    1327             },
    1328             );
    1329              
    1330             =head3 Finding blocks
    1331              
    1332             As has already been mentioned in passing, C uses regular
    1333             expressions to find blocks. These are in the C<%tags> hash under the C
    1334             key. For example, to change the regular expression to find code block items,
    1335             use:
    1336              
    1337             my $html = wikiformat ($raw, {blocks => {code => qr/^:\s+/}});
    1338              
    1339             This will require a leading colon to mark code lines (note that as writted
    1340             here, this would interfere with the default processing of definition lists).
    1341              
    1342             =head3 Finding Blocks in the Correct Order
    1343              
    1344             As intrepid bug reporter Tom Hukins pointed out in CPAN RT bug #671, the order
    1345             in which C searches for blocks varies by platform and
    1346             version of Perl. Because some block-finding regular expressions are more
    1347             specific than others, what you intend to be one type of block may turn into a
    1348             different list type.
    1349              
    1350             If you're adding new block types, be aware of this. The C entry in
    1351             C<%tags> exists to force C to apply its regexes from
    1352             most specific to least specific. It contains an array reference. By default,
    1353             it looks for ordered lists first, unordered lists second, and code references
    1354             at the end.
    1355              
    1356             =head1 SEE ALSO
    1357              
    1358             L
    1359              
    1360             =head1 SUPPORT
    1361              
    1362             You can find documentation for this module with the perldoc command.
    1363              
    1364             perldoc Text::MediawikiFormat
    1365              
    1366             You can also look for information at:
    1367              
    1368             =over 4
    1369              
    1370             =item * AnnoCPAN: Annotated CPAN documentation
    1371              
    1372             L
    1373              
    1374             =item * CPAN Ratings
    1375              
    1376             L
    1377              
    1378             =item * RT: CPAN's request tracker
    1379              
    1380             L
    1381              
    1382             =item * Search CPAN
    1383              
    1384             L
    1385              
    1386             =back
    1387              
    1388             =head1 AUTHOR
    1389              
    1390             Derek Price C is the author.
    1391              
    1392             =head1 ACKNOWLEDGEMENTS
    1393              
    1394             This module is derived from L, written by chromatic.
    1395             chromatic's original credits are below:
    1396              
    1397             chromatic, C, with much input from the Jellybean team
    1398             (including Jonathan Paulett). Kate L Pugh has also provided several patches,
    1399             many failing tests, and is usually the driving force behind new features and
    1400             releases. If you think this module is worth buying me a beer, she deserves at
    1401             least half of it.
    1402              
    1403             Alex Vandiver added a nice patch and tests for extended links.
    1404              
    1405             Tony Bowden, Tom Hukins, and Andy H. all suggested useful features that are now
    1406             implemented.
    1407              
    1408             Sam Vilain, Chris Winters, Paul Schmidt, and Art Henry have all found and
    1409             reported silly bugs.
    1410              
    1411             Blame me for the implementation.
    1412              
    1413             =head1 BUGS
    1414              
    1415             The link checker in C may fail to detect existing links that do
    1416             not follow HTML, XML, or SGML style. They may die with some SGML styles too.
    1417             I.
    1418              
    1419             =head1 TODO
    1420              
    1421             =over 4
    1422              
    1423             =item * Optimize C to work on a list of lines
    1424              
    1425             =back
    1426              
    1427             =head1 COPYRIGHT & LICENSE
    1428              
    1429             Copyright (c) 2006-2008 Derek R. Price, all rights reserved.
    1430             Copyright (c) 2002 - 2006, chromatic, all rights reserved.
    1431              
    1432             This program is free software; you can redistribute it and/or modify it
    1433             under the same terms as Perl itself.
    1434              
    1435             =cut
    1436              
    1437             1; # End of Text::MediaiwkiFormat