File Coverage

blib/lib/Text/MediawikiFormat.pm
Criterion Covered Total %
statement 253 367 68.9
branch 106 194 54.6
condition 20 69 28.9
subroutine 37 42 88.1
pod 2 2 100.0
total 418 674 62.0


line stmt bran cond sub pod time code
1             package Text::MediawikiFormat;
2              
3 14     14   208599 use strict;
  14         29  
  14         602  
4 14     14   65 use warnings::register;
  14         21  
  14         2488  
5              
6             =head1 NAME
7              
8             Text::MediawikiFormat - Translate Wiki markup into other text formats
9              
10             =head1 VERSION
11              
12             Version 1.04
13              
14             =cut
15              
16             our $VERSION = '1.04';
17              
18             =head1 SYNOPSIS
19              
20             use Text::MediawikiFormat 'wikiformat';
21             my $html = wikiformat ($raw);
22             my $text = wikiformat ($raw, {}, {implicit_links => 1});
23              
24             =head1 DESCRIPTION
25              
26             L and its sister projects use the PHP Mediawiki to format
27             their pages. This module attempts to duplicate the Mediawiki formatting rules.
28             Those formatting rules can be simple and easy to use, while providing more
29             advanced options for the power user. They are also easy to translate into
30             other, more complicated markup languages with this module. It creates HTML by
31             default, but could produce valid POD, DocBook, XML, or any other format
32             imaginable.
33              
34             The most important function is C. It is
35             not exported by default, but will be exported as C if any
36             options at all are passed to the exporter, unless the name is overridden
37             explicitly. See L<"EXPORT"> for more information.
38              
39             It should be noted that this module is written as a drop in replacement for
40             L that expands on that modules functionality and provides
41             a default rule set that may be used to format text like the PHP Mediawiki. It
42             is also well to note early that if you just want a Mediawiki clone (you don't
43             need to customize it heavily and you want integration with a back end
44             database), you should look at L.
45              
46             =cut
47              
48 14     14   75 use Carp qw(carp confess croak);
  14         21  
  14         1016  
49 14     14   27225 use CGI qw(:standard);
  14         196326  
  14         109  
50 14     14   40323 use Scalar::Util qw(blessed);
  14         28  
  14         1514  
51 14     14   6749 use Text::MediawikiFormat::Blocks;
  14         29  
  14         78  
52 14     14   8654 use URI;
  14         56265  
  14         562  
53 14     14   122 use URI::Escape qw(uri_escape uri_escape_utf8);
  14         19  
  14         1162  
54              
55 14         1860 use vars qw($missing_html_packages %tags %opts %merge_matrix
56 14     14   70 $uric $uricCheat $uriCruft);
  14         19  
57              
58             BEGIN {
59             # Try to load optional HTML packages, recording any errors.
60 14     14   30 eval { require HTML::Parser };
  14         12637  
61 14         75984 $missing_html_packages = $@;
62 14         35 eval { require HTML::Tagset };
  14         8202  
63 14         34545 $missing_html_packages .= $@;
64             }
65              
66             ###
67             ### Defaults
68             ###
69             %tags = (
70             indent => qr/^(?:[:*#;]*)(?=[:*#;])/,
71             link => \&_make_html_link,
72             strong => sub {"$_[0]"},
73             emphasized => sub {"$_[0]"},
74             strong_tag => qr/'''(.+?)'''/,
75             emphasized_tag => qr/''(.+?)''/,
76              
77             code => [ '
',  "
\n", '', "\n" ],
78             line => [ '', '', '
', "\n" ],
79             paragraph => [ "

", "

\n", '', "\n", 1 ],
80             paragraph_break => [ '', '', '', "\n" ],
81             unordered => [ "
    \n", "
\n", '
  • ', "
  • \n" ],
    82             ordered => [ "
      \n", "
    \n", '
  • ', "
  • \n" ],
    83             definition => [ "
    \n", "
    \n", \&_dl ],
    84             header => [ '', "\n", \&_make_header ],
    85              
    86             blocks => {
    87             code => qr/^ /,
    88             header => qr/^(=+)\s*(.+?)\s*\1$/,
    89             line => qr/^-{4,}$/,
    90             ordered => qr/^#\s*/,
    91             unordered => qr/^\*\s*/,
    92             definition => qr/^([;:])\s*/,
    93             paragraph => qr/^/,
    94             paragraph_break => qr/^\s*$/,
    95             },
    96              
    97             indented => { map { $_ => 1 } qw(ordered unordered definition) },
    98             nests => { map { $_ => 1 } qw(ordered unordered definition) },
    99             nests_anywhere => { map { $_ => 1 } qw(nowiki) },
    100              
    101             blockorder => [
    102             qw(code header line ordered unordered definition
    103             paragraph_break paragraph)
    104             ],
    105             implicit_link_delimiters => qr!\b(?:[A-Z][a-z0-9]\w*){2,}!,
    106             extended_link_delimiters => qr!\[(?:\[[^][]*\]|[^][]*)\]!,
    107              
    108             schemas => [qw(http https ftp mailto gopher)],
    109              
    110             unformatted_blocks => [qw(header nowiki pre)],
    111              
    112             allowed_tags => [ #HTML
    113             qw(b big blockquote br caption center cite code dd
    114             div dl dt em font h1 h2 h3 h4 h5 h6 hr i li ol p
    115             pre rb rp rt ruby s samp small strike strong sub
    116             sup table td th tr tt u ul var),
    117              
    118             # Mediawiki Specific
    119             qw(nowiki),
    120             ],
    121             allowed_attrs => [
    122             qw(title align lang dir width height bgcolor),
    123             qw(clear), # BR
    124             qw(noshade), # HR
    125             qw(cite), # BLOCKQUOTE, Q
    126             qw(size face color), # FONT
    127             # For various lists, mostly deprecated but safe
    128             qw(type start value compact),
    129              
    130             # Tables
    131             qw(summary width border frame rules cellspacing
    132             cellpadding valign char charoff colgroup col
    133             span abbr axis headers scope rowspan colspan),
    134             qw(id class name style), # For CSS
    135             ],
    136              
    137             _toc => [],
    138             );
    139              
    140             %opts = (
    141             extended => 1,
    142             implicit_links => 0,
    143             absolute_links => 1,
    144             prefix => '',
    145             process_html => 1,
    146             charset => 'utf-8',
    147             );
    148              
    149             # Make sure import's argument hash contains an `as' entry. `as' defaults to
    150             # `wikiformat' when none is given.
    151             sub _process_args {
    152 15     15   23 shift; # Class
    153 15 100       57 return as => shift if @_ == 1;
    154 14         92 return as => 'wikiformat', @_;
    155             }
    156              
    157             # Delete the options (prefix, extended, implicit_links, ...) from a hash,
    158             # returning a new hash with the deleted options.
    159             sub _extract_opts {
    160 15     15   29 my %newopts;
    161              
    162 15         47 for my $key (
    163             qw{prefix extended implicit_links absolute_links
    164             process_html debug}
    165             )
    166             {
    167 90 100       253 if ( defined( my $val = delete $_[0]->{$key} ) ) {
    168 19         53 $newopts{$key} = $val;
    169             }
    170             }
    171              
    172 15         69 return \%newopts;
    173             }
    174              
    175             # Shamelessly ripped from Hash::Merge, which doesn't work in a threaded
    176             # environment with two threads trying to use different merge matrices.
    177             %merge_matrix = (
    178             SCALAR => {
    179             SCALAR => sub { return $_[0] },
    180             ARRAY => sub { # Need to be able to replace scalar with array
    181             # for extended_link_delimiters (could be array
    182             # or regex).
    183             return $_[0];
    184             },
    185             HASH => sub {
    186             confess "Attempt to replace hash with scalar"
    187             if defined $_[0];
    188             return _clone( $_[1] );
    189             }
    190             },
    191              
    192             ARRAY => {
    193             SCALAR => sub { # Need to be able to replace array with scalar
    194             # for extended_link_delimiters (could be array
    195             # or regex).
    196             return _clone( $_[0] );
    197             },
    198             ARRAY => sub { return _clone( $_[0] ); },
    199             HASH => sub { confess "Attempt to replace hash with array" }
    200             },
    201              
    202             HASH => {
    203             SCALAR => sub { confess "Attempt to replace scalar with hash" },
    204             ARRAY => sub { confess "Attempt to replace array with hash" },
    205             HASH => sub { _merge_hash_elements( $_[0], $_[1] ) }
    206             }
    207             );
    208              
    209             # Return arrays and a deep copy of hashes.
    210             sub _clone {
    211 2890     2890   2487 my ($obj) = @_;
    212 2890         2030 my $type;
    213 2890 50       6495 if ( !defined $obj ) { # Perl 5.005 compatibility
        100          
        100          
    214 0         0 $type = 'SCALAR';
    215             }
    216             elsif ( ref $obj eq 'HASH' ) {
    217 231         226 $type = 'HASH';
    218             }
    219             elsif ( ref $obj eq 'ARRAY' ) {
    220 913         838 $type = 'ARRAY';
    221             }
    222             else {
    223 1746         1475 $type = 'SCALAR';
    224             }
    225              
    226 2890 100       6314 return $obj if $type eq 'SCALAR';
    227 1144 100       2596 return $obj if $type eq 'ARRAY';
    228              
    229 231         201 my %copy;
    230 231         492 foreach my $key ( keys %$obj ) {
    231 800         1105 $copy{$key} = _clone( $obj->{$key} );
    232             }
    233 231         615 return \%copy;
    234             }
    235              
    236             # This does a straight merge of hashes, delegating the merge-specific
    237             # work to '_merge_hashes'.
    238             sub _merge_hash_elements {
    239 169     169   177 my ( $left, $right ) = @_;
    240 169 50 33     987 die "Arguments for _merge_hash_elements must be hash references"
    241             unless UNIVERSAL::isa( $left, 'HASH' ) && UNIVERSAL::isa( $right, 'HASH' );
    242              
    243 169         158 my %newhash;
    244 169         512 foreach my $leftkey ( keys %$left ) {
    245 243 100       378 if ( exists $right->{$leftkey} ) {
    246 235         435 $newhash{$leftkey} = _merge_hashes( $left->{$leftkey}, $right->{$leftkey} );
    247             }
    248             else {
    249 8         15 $newhash{$leftkey} = _clone( $left->{$leftkey} );
    250             }
    251             }
    252 169         694 foreach my $rightkey ( keys %$right ) {
    253 2266 100       4650 $newhash{$rightkey} = _clone( $right->{$rightkey} )
    254             if !exists $left->{$rightkey};
    255             }
    256 169         508 return \%newhash;
    257             }
    258              
    259             sub _merge_hashes {
    260 371     371   3762 my ( $left, $right ) = @_;
    261              
    262             # if one argument or the other is undefined or empty, don't worry about
    263             # copying, just return the original.
    264 371 50       707 return $right unless defined $left;
    265 371 50       549 return $left unless defined $right;
    266              
    267             # For the general use of this function, we want to create duplicates
    268             # of all data that is merged.
    269              
    270 371         285 my ( $lefttype, $righttype );
    271 371 100       829 if ( ref $left eq 'HASH' ) {
        100          
    272 169         204 $lefttype = 'HASH';
    273             }
    274             elsif ( ref $left eq 'ARRAY' ) {
    275 51         54 $lefttype = 'ARRAY';
    276             }
    277             else {
    278 151         157 $lefttype = 'SCALAR';
    279             }
    280              
    281 371 100       664 if ( ref $right eq 'HASH' ) {
        100          
    282 169         176 $righttype = 'HASH';
    283             }
    284             elsif ( ref $right eq 'ARRAY' ) {
    285 49         43 $righttype = 'ARRAY';
    286             }
    287             else {
    288 153         155 $righttype = 'SCALAR';
    289             }
    290              
    291 371         847 return $merge_matrix{$lefttype}->{$righttype}( $left, $right );
    292             }
    293              
    294             sub _require_html_packages {
    295 1 50   1   4 croak "$missing_html_packages\n" . "HTML::Parser & HTML::Tagset is required for process_html\n"
    296             if $missing_html_packages;
    297             }
    298              
    299             sub import {
    300 18 100   18   4025 return unless @_ > 1;
    301              
    302 15         42 my $class = shift;
    303 15         71 my %args = $class->_process_args(@_);
    304 15         56 my $name = delete $args{as};
    305              
    306 15         69 my $caller = caller();
    307 15         229 my $iopts = _merge_hashes _extract_opts( \%args ), \%opts;
    308 15         67 my $itags = _merge_hashes \%args, \%tags;
    309              
    310 15 100       82 _require_html_packages
    311             if $iopts->{process_html};
    312              
    313             # Could verify ITAGS here via _check_blocks, but what if a user
    314             # wants to add a block to block_order that they intend to override
    315             # the implementation of with every call to format()?
    316              
    317 14     14   121 no strict 'refs';
      14         20  
      14         50271  
    318 15         17904 *{ $caller . "::" . $name } = sub {
    319 45     45   30730 Text::MediawikiFormat::_format( $itags, $iopts, @_ );
    320             }
    321 15         80 }
    322              
    323             =head1 FUNCTIONS
    324              
    325             =head2 format
    326              
    327             C takes one required argument, the text to convert, and returns the
    328             converted text. It allows two optional arguments. The first is a reference to
    329             a hash of tags used to override the function's default behavior. Anything
    330             passed in here will override the default tags. The second argument is a hash
    331             reference of options. The options are currently:
    332              
    333             =over 4
    334              
    335             =item prefix
    336              
    337             The prefix of any links to wiki pages. In HTML mode, this is the path to the
    338             Wiki. The actual linked item itself will be appended to the prefix. This is
    339             useful to create full URIs:
    340              
    341             {prefix => 'http://example.com/wiki.pl?page='}
    342              
    343             =item extended
    344              
    345             A boolean flag, true by default, to let square brackets mark links.
    346             An optional title may occur after the Wiki targets, preceded by an open pipe.
    347             URI titles are separated from their title with a space. These are valid
    348             extended links:
    349              
    350             [[A wiki page|and the title to display]]
    351             [http://ximbiot.com URI title]
    352              
    353             Where the linking semantics of the destination format allow it, the result will
    354             display the title instead of the URI. In HTML terms, the title is the content
    355             of an C element (not the content of its C attribute).
    356              
    357             You can use delimiters other than single square brackets for marking extended
    358             links by passing a value for C in the C<%tags> hash
    359             when calling C.
    360              
    361             Note that if you disable this flag, you should probably enable
    362             C or there will be no automated way to link to other pages in
    363             your wiki.
    364              
    365             =item implicit_links
    366              
    367             A boolean flag, false by default, to create links from StudlyCapsStrings.
    368              
    369             =item absolute_links
    370              
    371             A boolean flag, true by default, which treats any links that are absolute URIs
    372             (such as C) specially. Any prefix will not apply.
    373             This should maybe be called implicit_absolute_links since the C
    374             option enables absolute links inside square brackets by default.
    375              
    376             A link is any text that starts with a known schema followed by a colon and one
    377             or more non-whitespace characters. This is a distinct subset of what L
    378             recognizes as a URI, but is a good first-order approximation. If you need to
    379             recognize more complex URIs, use the standard wiki formatting explained
    380             earlier.
    381              
    382             The recognized schemas are those defined in the C value in the C<%tags>
    383             hash. C defaults to C, C, C, C, and
    384             C.
    385              
    386             =item process_html
    387              
    388             This flag, true by default, causes the formatter to ignore block level wiki
    389             markup (code, ordered, unordered, etc...) when they occur on lines which also
    390             contain allowed block-level HTML tags (
    , 
      ,
        ,
    , etc...).
    391             Phrase level wiki markup (emphasis, strong, & links) is unaffected by this
    392             flag.
    393              
    394             =back
    395              
    396             =cut
    397              
    398             sub format {
    399 7     7 1 22763 _format( \%tags, \%opts, @_ );
    400             }
    401              
    402             # Turn the contents after a ; or : into a dictionary list.
    403             # Using : without ; just looks like an indent.
    404             sub _dl {
    405              
    406             #my ($line, $indent, $lead) = @_;
    407 23     23   17 my ( $term, $def );
    408              
    409 23 100       31 if ( $_[2] eq ';' ) {
    410 11 100       35 if ( $_[0] =~ /^(.*?)\s+:\s+(.*)$/ ) {
    411 6         10 $term = $1;
    412 6         9 $def = $2;
    413             }
    414             else {
    415 5         7 $term = $_[0];
    416             }
    417             }
    418             else {
    419 12         12 $def = $_[0];
    420             }
    421              
    422 23         18 my @retval;
    423 23 100       47 push @retval, "
    ", $term, "
    \n" if defined $term;
    424 23 100       43 push @retval, "
    ", $def, "
    \n" if defined $def;
    425 23         63 return @retval;
    426             }
    427              
    428             # Makes a regex out of the allowed schema array.
    429             sub _make_schema_regex {
    430 47     47   85 my $re = join "|", map {qr/\Q$_\E/} @_;
      231         1998  
    431 47         871 return qr/(?:$re)/;
    432             }
    433              
    434             $uric = $URI::uric;
    435             $uricCheat = $uric;
    436              
    437             # We need to avoid picking up 'HTTP::Request::Common' so we have a
    438             # subset of uric without a colon.
    439             $uricCheat =~ tr/://d;
    440              
    441             # Identifying characters often accidentally picked up trailing a URI.
    442             $uriCruft = q/]),.!'";}/;
    443              
    444             # escape a URI based on our charset.
    445             sub _escape_uri {
    446 28     28   43 my ( $opts, $uri ) = @_;
    447 28 50       76 confess "charset not initialized" unless $opts->{charset};
    448 28 50       247 return uri_escape_utf8 $uri if $opts->{charset} =~ /^utf-?8$/i;
    449 0         0 return uri_escape $uri;
    450             }
    451              
    452             # Turn [[Wiki Link|Title]], [URI Title], scheme:url, or StudlyCaps into links.
    453             sub _make_html_link {
    454 32     32   77 my ( $tag, $opts, $tags ) = @_;
    455              
    456 32         48 my ( $class, $trailing ) = ( '', '' );
    457 32         34 my ( $href, $title );
    458 32 100       147 if ( $tag =~ /^\[\[([^|#]*)(?:(#)([^|]*))?(?:(\|)(.*))?\]\]$/ ) {
        100          
    459              
    460             # Wiki link
    461 7 50       55 $href = $opts->{prefix} . _escape_uri $opts, $1 if $1;
    462 7 50       194 $href .= $2 . _escape_uri $opts, $3 if $2;
    463              
    464 7 100       21 if ($4) {
    465              
    466             # Title specified explicitly.
    467 3 50       10 if ( length $5 ) {
    468 3         8 $title = $5;
    469             }
    470             else {
    471             # An empty title asks Mediawiki to strip any parens off the end
    472             # of the node name.
    473 0         0 $1 =~ /^([^(]*)(?:\s*\()?/;
    474 0         0 $title = $1;
    475             }
    476             }
    477             else {
    478             # Title defaults to the node name.
    479 4         7 $title = $1;
    480             }
    481             }
    482             elsif ( $tag =~ /^\[(\S*)(?:(\s+)(.*))?\]$/ ) {
    483              
    484             # URI
    485 5         10 $href = $1;
    486 5 50       13 if ($2) {
    487 5         8 $title = $3;
    488             }
    489             else {
    490 0         0 $title = ++$opts->{_uri_refs};
    491             }
    492 5         10 $href =~ s/'/%27/g;
    493             }
    494             else {
    495             # Shouldn't be able to get here without either $opts->{absolute_links}
    496             # or $opts->{implicit_links};
    497 20   33     50 $tags->{_schema_regex} ||= _make_schema_regex @{ $tags->{schemas} };
      0         0  
    498 20         25 my $s = $tags->{_schema_regex};
    499              
    500 20 100       525 if ( $tag =~ /^$s:[$uricCheat][$uric]*$/ ) {
    501              
    502             # absolute link
    503 8         17 $href = $&;
    504 8 100       67 $trailing = $& if $href =~ s/[$uriCruft]$//;
    505 8         16 $title = $href;
    506             }
    507             else {
    508             # StudlyCaps
    509 12         38 $href = $opts->{prefix} . _escape_uri $opts, $tag;
    510 12         290 $title = $tag;
    511             }
    512             }
    513              
    514 32         223 return "$title$trailing";
    515             }
    516              
    517             # Store a TOC line for later.
    518             #
    519             # ASSUMPTIONS
    520             # $level >= 1
    521             sub _store_toc_line {
    522 12     12   20 my ( $toc, $level, $title, $name ) = @_;
    523              
    524             # TODO: Strip formatting from $title.
    525              
    526 12 100 100     63 if ( @$toc && $level > $toc->[-1]->{level} ) {
    527              
    528             # Nest a sublevel.
    529 3 100       19 $toc->[-1]->{sublevel} = []
    530             unless exists $toc->[-1]->{sublevel};
    531 3         13 _store_toc_line( $toc->[-1]->{sublevel}, $level, $title, $name );
    532             }
    533             else {
    534 9         54 push @$toc, { level => $level, title => $title, name => $name };
    535             }
    536              
    537 12         23 return $level;
    538             }
    539              
    540             # Make header text, storing the line for the TOC.
    541             #
    542             # ASSUMPTIONS
    543             # $tags->{_toc} has been initialized to an array ref.
    544             sub _make_header {
    545 9     9   16 my $level = length $_[2];
    546 9         29 my $n = _escape_uri $_[-1], $_[3];
    547              
    548 9         249 _store_toc_line( $_[-2]->{_toc}, $level, $_[3], $n );
    549              
    550 9         50 return "", Text::MediawikiFormat::format_line( $_[3], @_[ -2, -1 ] ), "\n";
    551             }
    552              
    553             sub _format {
    554 50     50   117 my ( $itags, $iopts, $text, $tags, $opts ) = @_;
    555              
    556             # Overwriting the caller's hashes locally after merging its contents
    557             # is okay.
    558 50   100     366 $tags = _merge_hashes( $tags || {}, $itags );
    559 50   100     253 $opts = _merge_hashes( $opts || {}, $iopts );
    560              
    561 50 50       157 _require_html_packages
    562             if $opts->{process_html};
    563              
    564             # Always verify the blocks since the user may have slagged the
    565             # default hash on import.
    566 50         114 _check_blocks($tags);
    567              
    568 50         157 my @blocks = _find_blocks( $text, $tags, $opts );
    569 50         144 @blocks = _nest_blocks( \@blocks );
    570 50         200 return _process_blocks( \@blocks, $tags, $opts );
    571             }
    572              
    573             sub _check_blocks {
    574 52     52   2467 my $tags = shift;
    575 52         64 my %blocks = %{ $tags->{blocks} };
      52         305  
    576 52         80 delete @blocks{ @{ $tags->{blockorder} } };
      52         212  
    577              
    578 52 100       666 carp "No order specified for blocks: " . join( ', ', keys %blocks ) . ".\n"
    579             if keys %blocks;
    580             }
    581              
    582             # This sub recognizes three states:
    583             #
    584             # 1. undef
    585             # Normal wiki processing will be done on this line.
    586             #
    587             # 2. html
    588             # Links and phrasal processing will be done, but formatting should be
    589             # ignored.
    590             #
    591             # 3. nowiki
    592             # No further wiki processing should be done.
    593             #
    594             # Each state may override the lower ones if already set on a given line.
    595             #
    596             sub _append_processed_line {
    597 0     0   0 my ( $parser, $text, $state ) = @_;
    598 0         0 my $lines = $parser->{processed_lines};
    599              
    600 0   0     0 $state ||= '';
    601              
    602 0         0 my @newlines = split /(?<=\n)/, $text;
    603 0 0 0     0 if (
          0        
          0        
    604             @$lines
    605             && $lines->[-1]->[1] !~ /\n$/
    606             && # State not changing from or to 'nowiki'
    607             !( $state ne $lines->[-1]->[0] && grep /^nowiki$/, $state, $lines->[-1]->[0] )
    608             )
    609             {
    610 0         0 $lines->[-1]->[1] .= shift @newlines;
    611 0 0       0 $lines->[-1]->[0] = $state if $state eq 'html';
    612             }
    613              
    614 0         0 foreach my $line (@newlines) {
    615 0 0       0 $lines->[-1]->[2] = '1' if @$lines;
    616 0         0 push @$lines, [ $state, $line ];
    617             }
    618 0 0 0     0 $lines->[-1]->[2] = '1'
    619             if @$lines && $lines->[-1]->[1] =~ /\n$/;
    620             }
    621              
    622             sub _html_tag {
    623 0     0   0 my ( $parser, $type, $tagname, $orig, $attr ) = @_;
    624 0         0 my $tags = $parser->{tags};
    625              
    626             # $tagname may have been generated by an empty tag. If so, HTML::Parser
    627             # will sometimes include the trailing / in the tag name.
    628 0         0 my $isEmptyTag = $orig =~ m#/>$#;
    629 0 0       0 $tagname =~ s#/$## if $isEmptyTag;
    630              
    631 0 0       0 unless ( grep /^\Q$tagname\E$/, @{ $tags->{allowed_tags} } ) {
      0         0  
    632 0         0 _append_processed_line $parser, CGI::escapeHTML $orig;
    633 0         0 return;
    634             }
    635              
    636             # Any $tagname must now be in the allowed list, including .
    637              
    638 0         0 my $tagstack = $parser->{tag_stack};
    639 0 0       0 my $stacktop = @$tagstack ? $tagstack->[-1] : '';
    640              
    641             # First, process end tags, since they can change our state.
    642 0 0 0     0 if ( $type eq 'E' && $stacktop eq $tagname ) {
    643              
    644             # The closing tag is at the top of the stack, like it should be.
    645             # Pop it and append the close tag to the output.
    646 0         0 pop @$tagstack;
    647 0         0 my $newtag;
    648              
    649 0 0       0 if ( $tagname eq 'nowiki' ) {
    650              
    651             # The browser doesn't need to see the tag.
    652 0         0 $newtag = '';
    653             }
    654             else {
    655 0         0 $newtag = "";
    656             }
    657              
    658             # Can't close a state into
     or  
    659 0         0 _append_processed_line $parser, $newtag, 'html';
    660 0         0 return;
    661             }
    662              
    663 0 0 0     0 if ( @$tagstack && grep /^\Q$stacktop\E$/, qw{nowiki pre} ) {
    664              
    665             # Ignore all markup within
     or  tags. 
    666 0         0 _append_processed_line $parser, CGI::escapeHTML($orig), 'nowiki';
    667 0         0 return;
    668             }
    669              
    670 0 0 0     0 if ( $type eq 'E' && $HTML::Tagset::isPhraseMarkup{$tagname} )
    671              
    672             # If we ask for artificial end element events for self-closed elements,
    673             # then we need to check $HTML::Tagset::emptyElement($tagname) here too.
    674             {
    675             # We didn't record phrase markup on the stack, so it's okay to just
    676             # let it close.
    677 0         0 _append_processed_line $parser, "";
    678 0         0 return;
    679             }
    680              
    681 0 0       0 if ( $type eq 'E' ) {
    682              
    683             # We got a non-phrase end tag that wasn't on the stack. Escape it.
    684 0         0 _append_processed_line $parser, CGI::escapeHTML($orig);
    685 0         0 return;
    686             }
    687              
    688             ###
    689             ### $type must now eq 'S'.
    690             ###
    691              
    692             # The browser doesn't need to see the tag.
    693 0 0       0 if ( $tagname eq 'nowiki' ) {
    694 0 0       0 push @$tagstack, $tagname
    695             unless $isEmptyTag;
    696 0         0 return;
    697             }
    698              
    699             # Strip disallowed attributes.
    700 0         0 my $newtag = "<$tagname";
    701 0         0 foreach ( @{ $tags->{allowed_attrs} } ) {
      0         0  
    702 0 0       0 if ( defined $attr->{$_} ) {
    703 0         0 $newtag .= " $_";
    704 0 0       0 unless ( $attr->{$_} eq '__TEXT_MEDIAWIKIFORMAT_BOOL__' ) {
    705              
    706             # CGI::escapeHTML escapes single quotes.
    707 0         0 $attr->{$_} = CGI::escapeHTML $attr->{$_};
    708 0         0 $newtag .= "='" . $attr->{$_} . "'";
    709             }
    710             }
    711             }
    712 0 0 0     0 $newtag .= " /" if $HTML::Tagset::emptyElement{$tagname} || $isEmptyTag;
    713 0         0 $newtag .= ">";
    714              
    715             # If this isn't a block level element, there's no need to track nesting.
    716 0 0 0     0 if ( $HTML::Tagset::isPhraseMarkup{$tagname}
    717             || $HTML::Tagset::emptyElement{$tagname} )
    718             {
    719 0         0 _append_processed_line $parser, $newtag;
    720 0         0 return;
    721             }
    722              
    723             # Some elements can close implicitly
    724 0 0       0 if (@$tagstack) {
    725 0 0 0     0 if ( $tagname eq $stacktop
        0          
    726             && $HTML::Tagset::optionalEndTag{$tagname} )
    727             {
    728 0         0 pop @$tagstack;
    729             }
    730             elsif ( !$HTML::Tagset::is_Possible_Strict_P_Content{$tagname} ) {
    731              
    732             # Need to check more than the last item for paragraphs.
    733 0         0 for ( my $i = $#{$tagstack}; $i >= 0; $i-- ) {
      0         0  
    734 0         0 my $checking = $tagstack->[$i];
    735 0 0       0 last if grep /^\Q$checking\E$/, @HTML::Tagset::p_closure_barriers;
    736              
    737 0 0       0 if ( $checking eq 'p' ) {
    738              
    739             # pop 'em all.
    740 0         0 splice @$tagstack, $i;
    741 0         0 last;
    742             }
    743             }
    744             }
    745             }
    746              
    747             # Could verify here that
  • and sub-elements only appear where
    748             # they belong.
    749              
    750             # Push the new tag onto the stack.
    751 0 0       0 push @$tagstack, $tagname
    752             unless $isEmptyTag;
    753              
    754 0 0       0 _append_processed_line $parser, $newtag, $tagname eq 'pre' ? 'nowiki' : 'html';
    755 0         0 return;
    756             }
    757              
    758             sub _html_comment {
    759 0     0   0 my ( $parser, $text ) = @_;
    760              
    761 0         0 _append_processed_line $parser, $text, 'nowiki';
    762             }
    763              
    764             sub _html_text {
    765 0     0   0 my ( $parser, $dtext, $skipped_text, $is_cdata ) = @_;
    766 0         0 my $tagstack = $parser->{tag_stack};
    767 0         0 my ( $newtext, $newstate );
    768              
    769 0 0       0 warnings::warnif("Got skipped_text: `$skipped_text'")
    770             if $skipped_text;
    771              
    772 0 0       0 if (@$tagstack) {
    773 0 0 0     0 if ( grep /\Q$tagstack->[-1]\E/, qw{nowiki pre} ) {
        0          
    774 0         0 $newstate = 'nowiki';
    775             }
    776             elsif ( $is_cdata && $HTML::Tagset::isCDATA_Parent{ $tagstack->[-1] } ) {
    777              
    778             # If the user hadn't specifically allowed a tag which contains
    779             # CDATA, then it won't be on the tag stack.
    780 0         0 $newtext = $dtext;
    781             }
    782             }
    783              
    784 0 0       0 unless ( defined $newtext ) {
    785 0 0       0 $newtext = CGI::escapeHTML $dtext unless defined $newtext;
    786              
    787             # CGI::escapeHTML escapes single quotes so the text may be included
    788             # in attribute values, but we know we aren't processing an attribute
    789             # value here.
    790 0         0 $newtext =~ s/'/'/g;
    791             }
    792              
    793 0         0 _append_processed_line $parser, $newtext, $newstate;
    794             }
    795              
    796             sub _find_blocks_in_html {
    797 0     0   0 my ( $text, $tags, $opts ) = @_;
    798              
    799 0         0 my $parser = HTML::Parser->new(
    800             start_h => [ \&_html_tag, 'self, "S", tagname, text, attr' ],
    801             end_h => [ \&_html_tag, 'self, "E", tagname, text' ],
    802             comment_h => [ \&_html_comment, 'self, text' ],
    803             text_h => [ \&_html_text, 'self, dtext, skipped_text, is_cdata' ],
    804             marked_sections => 1,
    805             boolean_attribute_value => '__TEXT_MEDIAWIKIFORMAT_BOOL__',
    806             );
    807 0         0 $parser->{opts} = $opts;
    808 0         0 $parser->{tags} = $tags;
    809 0         0 $parser->{processed_lines} = [];
    810 0         0 $parser->{tag_stack} = [];
    811              
    812 0         0 my @blocks;
    813 0         0 my @lines = split /\r?\n/, $text;
    814 0         0 for ( my $i = 0; $i < @lines; $i++ ) {
    815 0         0 $parser->parse( $lines[$i] );
    816 0         0 $parser->parse("\n");
    817 0 0       0 $parser->eof if $i == $#lines;
    818              
    819             # @{$parser->{processed_lines}} may be empty when tags are
    820             # still open.
    821 0   0     0 while ( @{ $parser->{processed_lines} }
      0         0  
    822             && $parser->{processed_lines}->[0]->[2] )
    823             {
    824 0         0 my ( $type, $dtext )
    825 0         0 = @{ shift @{ $parser->{processed_lines} } };
      0         0  
    826              
    827 0         0 my $block;
    828 0 0       0 if ($type) {
    829 0         0 $block = _start_block( $dtext, $tags, $opts, $type );
    830             }
    831             else {
    832 0         0 chomp $dtext;
    833 0         0 $block = _start_block( $dtext, $tags, $opts );
    834             }
    835 0 0       0 push @blocks, $block if $block;
    836             }
    837             }
    838              
    839 0         0 return @blocks;
    840             }
    841              
    842             sub _find_blocks {
    843 50     50   74 my ( $text, $tags, $opts ) = @_;
    844 50         64 my @blocks;
    845              
    846 50 50       143 if ( $opts->{process_html} ) {
    847 0         0 @blocks = _find_blocks_in_html $text, $tags, $opts;
    848             }
    849             else {
    850             # The original behavior.
    851 50         599 for my $line ( split /\r?\n/, $text ) {
    852 300         501 my $block = _start_block( $line, $tags, $opts );
    853 300 100       926 push @blocks, $block if $block;
    854             }
    855             }
    856              
    857 50         265 return @blocks;
    858             }
    859              
    860             sub _start_block {
    861 305     305   1458 my ( $text, $tags, $opts, $type ) = @_;
    862              
    863 305 100       723 return new_block( 'end', level => 0 ) unless $text;
    864 218 50       381 return new_block(
    865             $type,
    866             level => 0,
    867             opts => $opts,
    868             text => $text,
    869             tags => $tags,
    870             ) if $type;
    871              
    872 218         194 for my $block ( @{ $tags->{blockorder} } ) {
      218         421  
    873 1216         1277 my ( $line, $level, $indentation ) = ( $text, 0, '' );
    874              
    875 1216 100       2385 ( $level, $line, $indentation ) = _get_indentation( $tags, $line )
    876             if $tags->{indented}{$block};
    877              
    878 1216         3785 my $marker_removed = length( $line =~ s/$tags->{blocks}{$block}// );
    879              
    880 1216 100       2175 next unless $marker_removed;
    881              
    882 1944         3448 return new_block(
    883             $block,
    884 216   100     502 args => [ grep {defined} $1, $2, $3, $4, $5, $6, $7, $8, $9 ],
    885             level => $level || 0,
    886             opts => $opts,
    887             text => $line,
    888             tags => $tags,
    889             );
    890             }
    891             }
    892              
    893             sub _nest_blocks {
    894 54     54   1442 my $blocks = shift;
    895 54 100       141 return unless @$blocks;
    896              
    897 53         93 my @processed = shift @$blocks;
    898              
    899 53         108 for my $block (@$blocks) {
    900 251         925 push @processed, $processed[-1]->nest($block);
    901             }
    902              
    903 53         337 return @processed;
    904             }
    905              
    906             sub _process_blocks {
    907 51     51   93 my ( $blocks, $tags, $opts ) = @_;
    908              
    909 51         55 my @open;
    910 51         86 for my $block (@$blocks) {
    911 205 100       461 push @open, _process_block( $block, $tags, $opts )
    912             unless $block->type() eq 'end';
    913             }
    914              
    915 51         280 return join '', @open;
    916             }
    917              
    918             sub _process_block {
    919 135     135   172 my ( $block, $tags, $opts ) = @_;
    920 135         262 my $type = $block->type();
    921              
    922 135         144 my ( $start, $end, $start_line, $end_line, $between );
    923 135 50       284 if ( $tags->{$type} ) {
    924 135         136 ( $start, $end, $start_line, $end_line, $between ) = @{ $tags->{$type} };
      135         385  
    925             }
    926             else {
    927 0         0 ( $start, $end, $start_line, $end_line ) = ( '', '', '', '' );
    928             }
    929              
    930 135         185 my @text = ();
    931 135 100       135 for my $line (
      135         2638  
    932             grep ( /^\Q$type\E$/, @{ $tags->{unformatted_blocks} } )
    933             ? $block->text()
    934             : $block->formatted_text()
    935             )
    936             {
    937 240 100       538 if ( blessed $line) {
    938 18   33     44 my $prev_end = pop @text || ();
    939 18         47 push @text, _process_block( $line, $tags, $opts ), $prev_end;
    940 18         31 next;
    941             }
    942              
    943 222         201 my @triplets;
    944 222 100 100     785 if ( ( ref($start_line) || '' ) eq 'CODE' ) {
    945 32         78 @triplets = $start_line->( $line, $block->level(), $block->shift_args(), $tags, $opts );
    946             }
    947             else {
    948 190         380 @triplets = ( $start_line, $line, $end_line );
    949             }
    950 222         469 push @text, @triplets;
    951             }
    952              
    953 135 100       285 pop @text if $between;
    954 135         514 return join '', $start, @text, $end;
    955             }
    956              
    957             sub _get_indentation {
    958 443     443   442 my ( $tags, $text ) = @_;
    959              
    960 443 100       2882 return 1, $text unless $text =~ s/($tags->{indent})//;
    961 238         797 return length($1) + 1, $text, $1;
    962             }
    963              
    964             =head2 format_line
    965              
    966             $formatted = format_line ($raw, $tags, $opts);
    967              
    968             This function is never exported. It formats the phrase elements of a single
    969             line of text (emphasised, strong, and links).
    970              
    971             This is only meant to be called from L and so
    972             requires $tags and $opts to have all elements filled in. If you find a use for
    973             it, please let me know and maybe I will have it default the missing elements as
    974             C does.
    975              
    976             =cut
    977              
    978             sub format_line {
    979 227     227 1 3702 my ( $text, $tags, $opts ) = @_;
    980              
    981 227         712 $text =~ s!$tags->{strong_tag}!$tags->{strong}->($1, $opts)!eg;
      5         28  
    982 227         508 $text =~ s!$tags->{emphasized_tag}!$tags->{emphasized}->($1, $opts)!eg;
      6         22  
    983              
    984 227 50 66     717 $text = _find_links( $text, $tags, $opts )
          33        
    985             if $opts->{extended}
    986             || $opts->{absolute_links}
    987             || $opts->{implicit_links};
    988              
    989 227         676 return $text;
    990             }
    991              
    992             sub _find_innermost_balanced_pair {
    993 11     11   12 my ( $text, $open, $close ) = @_;
    994              
    995 11         18 my $start_pos = rindex $text, $open;
    996 11 100       29 return if $start_pos == -1;
    997              
    998 7         8 my $end_pos = index $text, $close, $start_pos;
    999 7 50       20 return if $end_pos == -1;
    1000              
    1001 7         8 my $open_length = length $open;
    1002 7         8 my $close_length = length $close;
    1003 7         9 my $close_pos = $end_pos + $close_length;
    1004 7         7 my $enclosed_length = $close_pos - $start_pos;
    1005              
    1006 7         12 my $enclosed_atom = substr $text, $start_pos, $enclosed_length;
    1007 7         28 return substr( $enclosed_atom, $open_length, 0 - $close_length ),
    1008             substr( $text, 0, $start_pos ),
    1009             substr( $text, $close_pos );
    1010             }
    1011              
    1012             sub _find_links {
    1013 227     227   253 my ( $text, $tags, $opts ) = @_;
    1014              
    1015             # Build Regexp
    1016 227         190 my @res;
    1017              
    1018 227 100       460 if ( $opts->{absolute_links} ) {
    1019              
    1020             # URI
    1021 225         182 my $s;
    1022 225   66     493 $tags->{_schema_regex} ||= _make_schema_regex @{ $tags->{schemas} };
      47         163  
    1023 225         243 $s = $tags->{_schema_regex};
    1024 225         1700 push @res, qr/\b$s:[$uricCheat][$uric]*/;
    1025             }
    1026              
    1027 227 100       454 if ( $opts->{implicit_links} ) {
    1028              
    1029             # StudlyCaps
    1030 17 50       36 if ( $tags->{implicit_link_delimiters} ) {
    1031 17         47 push @res, qr/$tags->{implicit_link_delimiters}/;
    1032             }
    1033             else {
    1034 0         0 warnings::warnif("Ignoring implicit_links option since implicit_link_delimiters is empty");
    1035             }
    1036             }
    1037              
    1038 227 100       400 if ( $opts->{extended} ) {
    1039              
    1040             # [[Wiki Page]]
    1041 226 100       658 if ( !$tags->{extended_link_delimiters} ) {
        100          
    1042 3         70 warnings::warnif("Ignoring extended option since extended_link_delimiters is empty");
    1043             }
    1044             elsif ( ref $tags->{extended_link_delimiters} eq "ARRAY" ) {
    1045              
    1046             # Backwards compatibility for extended links.
    1047             # Bypasses the regex substitution used by absolute and implicit
    1048             # links.
    1049 4         6 my ( $start, $end ) = @{ $tags->{extended_link_delimiters} };
      4         8  
    1050 4         10 while ( my @pieces = _find_innermost_balanced_pair( $text, $start, $end ) ) {
    1051 7 50       21 my ( $tag, $before, $after ) = map { defined $_ ? $_ : '' } @pieces;
      21         36  
    1052 7   50     20 my $extended = $tags->{link}->( $tag, $opts, $tags ) || '';
    1053 7         50 $text = $before . $extended . $after;
    1054             }
    1055             }
    1056             else {
    1057 219         677 push @res, qr/$tags->{extended_link_delimiters}/;
    1058             }
    1059             }
    1060              
    1061 227 50       633 if (@res) {
    1062 227         395 my $re = join "|", @res;
    1063 227         2783 $text =~ s/$re/$tags->{link}->($&, $opts, $tags)/ge;
      30         104  
    1064             }
    1065              
    1066 227         577 return $text;
    1067             }
    1068              
    1069             =head1 Wiki Format
    1070              
    1071             Refer to L for
    1072             description of the default wiki format, as interpreted by this module. Any
    1073             discrepencies will be considered bugs in this module, with a few exceptions.
    1074              
    1075             =head2 Unimplemented Wiki Markup
    1076              
    1077             =over 4
    1078              
    1079             =item Templates, Magic Words, and Wanted Links
    1080              
    1081             Templates, magic words, and the colorization of wanted links all require a back
    1082             end data store that can be consulted on the existance and content of named
    1083             pages. C has deliberately been constructed such that it
    1084             operates independantly from such a back end. For an interface to
    1085             C which implements these features, see
    1086             L.
    1087              
    1088             =item Tables
    1089              
    1090             This is on the TODO list.
    1091              
    1092             =back
    1093              
    1094             =head1 EXPORT
    1095              
    1096             If you'd like to make your life more convenient, you can optionally import a
    1097             subroutine that already has default tags and options set up. This is
    1098             especially handy if you use a prefix:
    1099              
    1100             use Text::MediawikiFormat prefix => 'http://www.example.com/';
    1101             wikiformat ('some text');
    1102              
    1103             Tags are interpreted as default members of the $tags hash normally passed to
    1104             C, except for the five options (see above) and the C key, who's
    1105             value is interpreted as an alternate name for the imported function.
    1106              
    1107             To use the C flag to control the name by which your code calls the imported
    1108             function, for example,
    1109              
    1110             use Text::MediawikiFormat as => 'formatTextWithWikiStyle';
    1111             formatTextWithWikiStyle ('some text');
    1112              
    1113             You might choose a better name, though.
    1114              
    1115             The calling semantics are effectively the same as those of the C
    1116             function. Any additional tags or options to the imported function will
    1117             override the defaults. This code:
    1118              
    1119             use Text::MediawikiFormat as => 'wf', extended => 0;
    1120             wf ('some text', {}, {extended => 1});
    1121              
    1122             enables extended links, after specifying that the default behavior should be
    1123             to disable them.
    1124              
    1125             =head1 GORY DETAILS
    1126              
    1127             =head2 Tags
    1128              
    1129             There are two types of Wiki markup: phrase markup and blocks. Blocks include
    1130             lists, which are made up of lines and can also contain other lists.
    1131              
    1132             =head3 Phrase Markup
    1133              
    1134             The are currently three types of wiki phrase markup. These are the
    1135             strong and emphasized markup and links. Links may additionally be of three
    1136             subtypes, extended, implicit, or absolute.
    1137              
    1138             You can change the regular expressions used to find strong and emphasized tags:
    1139              
    1140             %tags = (
    1141             strong_tag => qr/\*([^*]+?)\*/,
    1142             emphasized_tag => qr|/([^/]+?)/|,
    1143             );
    1144              
    1145             $wikitext = 'this is *strong*, /emphasized/, and */em+strong/*';
    1146             $htmltext = wikiformat ($wikitext, \%tags, {});
    1147              
    1148             You can also change the regular expressions used to find links. The following
    1149             just sets them to their default states (but enables parsing of implicit links,
    1150             which is I the default):
    1151              
    1152             my $html = wikiformat
    1153             (
    1154             $raw,
    1155             {implicit_link_delimiters => qr!\b(?:[A-Z][a-z0-9]\w*){2,}!,
    1156             extended_link_delimiters => qr!\[(?:\[[^][]*\]|[^][]*)\]!,
    1157             },
    1158             {implicit_links => 1}
    1159             );
    1160              
    1161             In addition, you may set the function references that format strong and
    1162             emphasized text and links. The strong and emphasized functions receive only
    1163             the text to be formatted as an argument and are expected to return the
    1164             formatted text. The link formatter also recieves references to the C<$tags>
    1165             and C<$opts> arrays. For example, the following sets the strong and
    1166             emphasized formatters to their default state while replacing the link formatter
    1167             with one which strips href information and returns only the title text:
    1168              
    1169             my $html = wikiformat
    1170             (
    1171             $raw,
    1172             {strong => sub {"$_[0]"},
    1173             emphasized => sub {"$_[0]"},
    1174             link => sub
    1175             {
    1176             my ($tag, $opts, $tags) = @_;
    1177             if ($tag =~ s/^\[\[([^][]+)\]\]$/$1/)
    1178             {
    1179             my ($page, $title) = split qr/\|/, $tag, 2;
    1180             return $title if $title;
    1181             return $page;
    1182             }
    1183             elsif ($tag =~ s/^\[([^][]+)\]$/$1/)
    1184             {
    1185             my ($href, $title) = split qr/ /, $tag, 2;
    1186             return $title if $title;
    1187             return $href;
    1188             }
    1189             else
    1190             {
    1191             return $tag;
    1192             }
    1193             },
    1194             },
    1195             );
    1196              
    1197             =head3 Blocks
    1198              
    1199             The default block types are C, C, C, C,
    1200             C, C, C, and C
    .
    1201              
    1202             Block entries in the tag hashes must contain array references. The first two
    1203             items are the tags used at the start and end of the block. The third and
    1204             fourth contain the tags used at the start and end of each line. Where there
    1205             needs to be more processing of individual lines, use a subref as the third
    1206             item. This is how the module processes ordered lines in HTML lists and
    1207             headers:
    1208              
    1209             my $html = wikiformat
    1210             (
    1211             $raw,
    1212             {ordered => ['
      ', "
    \n", '
  • ', "
  • \n"],
  • 1213             header => ['', "\n", \&_make_header],
    1214             },
    1215             );
    1216              
    1217             The first argument to these subrefs is the post-processed text of the line
    1218             itself. (Processing removes the indentation and tokens used to mark this as a
    1219             list and checks the rest of the line for other line formattings.) The second
    1220             argument is the indentation level (see below). The subsequent arguments are
    1221             captured variables in the regular expression used to find this list type. The
    1222             regexp for headers is:
    1223              
    1224             $html = wikiformat
    1225             (
    1226             $raw,
    1227             {blocks => {header => qr/^(=+)\s*(.+?)\s*\1$/}}
    1228             );
    1229              
    1230             The module processes indentation first, if applicable, and stores the
    1231             indentation level (the length of the indentation removed).
    1232              
    1233             Lists automatically start and end as necessary.
    1234              
    1235             Because regular expressions could conceivably match more than one line, block
    1236             level markup is processed in a specific order. The C tag governs
    1237             this order. It contains a reference to an array of the names of the
    1238             appropriate blocks to process. If you add a block type, be sure to add an
    1239             entry for it in C:
    1240              
    1241             my $html = wikiformat
    1242             (
    1243             $raw,
    1244             {invisible => ['', '', '', ''],
    1245             blocks => {invisible => qr!^--(.*?)--$!},
    1246             blockorder => [qw(code header line ordered
    1247             unordered definition invisible
    1248             paragraph_break paragraph)]
    1249             },
    1250             },
    1251             );
    1252              
    1253             =head3 Finding blocks
    1254              
    1255             As has already been mentioned in passing, C uses regular
    1256             expressions to find blocks. These are in the C<%tags> hash under the C
    1257             key. For example, to change the regular expression to find code block items,
    1258             use:
    1259              
    1260             my $html = wikiformat ($raw, {blocks => {code => qr/^:\s+/}});
    1261              
    1262             This will require a leading colon to mark code lines (note that as writted
    1263             here, this would interfere with the default processing of definition lists).
    1264              
    1265             =head3 Finding Blocks in the Correct Order
    1266              
    1267             As intrepid bug reporter Tom Hukins pointed out in CPAN RT bug #671, the order
    1268             in which C searches for blocks varies by platform and
    1269             version of Perl. Because some block-finding regular expressions are more
    1270             specific than others, what you intend to be one type of block may turn into a
    1271             different list type.
    1272              
    1273             If you're adding new block types, be aware of this. The C entry in
    1274             C<%tags> exists to force C to apply its regexes from
    1275             most specific to least specific. It contains an array reference. By default,
    1276             it looks for ordered lists first, unordered lists second, and code references
    1277             at the end.
    1278              
    1279             =head1 SEE ALSO
    1280              
    1281             L
    1282              
    1283             =head1 SUPPORT
    1284              
    1285             You can find documentation for this module with the perldoc command.
    1286              
    1287             perldoc Text::MediawikiFormat
    1288              
    1289             You can also look for information at:
    1290              
    1291             =over 4
    1292              
    1293             =item * AnnoCPAN: Annotated CPAN documentation
    1294              
    1295             L
    1296              
    1297             =item * CPAN Ratings
    1298              
    1299             L
    1300              
    1301             =item * RT: CPAN's request tracker
    1302              
    1303             L
    1304              
    1305             =item * Search CPAN
    1306              
    1307             L
    1308              
    1309             =back
    1310              
    1311             =head1 AUTHOR
    1312              
    1313             Derek Price C is the author.
    1314              
    1315             =head1 ACKNOWLEDGEMENTS
    1316              
    1317             This module is derived from L, written by chromatic.
    1318             chromatic's original credits are below:
    1319              
    1320             chromatic, C, with much input from the Jellybean team
    1321             (including Jonathan Paulett). Kate L Pugh has also provided several patches,
    1322             many failing tests, and is usually the driving force behind new features and
    1323             releases. If you think this module is worth buying me a beer, she deserves at
    1324             least half of it.
    1325              
    1326             Alex Vandiver added a nice patch and tests for extended links.
    1327              
    1328             Tony Bowden, Tom Hukins, and Andy H. all suggested useful features that are now
    1329             implemented.
    1330              
    1331             Sam Vilain, Chris Winters, Paul Schmidt, and Art Henry have all found and
    1332             reported silly bugs.
    1333              
    1334             Blame me for the implementation.
    1335              
    1336             =head1 BUGS
    1337              
    1338             The link checker in C may fail to detect existing links that do
    1339             not follow HTML, XML, or SGML style. They may die with some SGML styles too.
    1340             I.
    1341              
    1342             =head1 TODO
    1343              
    1344             =over 4
    1345              
    1346             =item * Optimize C to work on a list of lines
    1347              
    1348             =back
    1349              
    1350             =head1 COPYRIGHT & LICENSE
    1351              
    1352             Copyright (c) 2006-2008 Derek R. Price, all rights reserved.
    1353             Copyright (c) 2002 - 2006, chromatic, all rights reserved.
    1354              
    1355             This program is free software; you can redistribute it and/or modify it
    1356             under the same terms as Perl itself.
    1357              
    1358             =cut
    1359              
    1360             1; # End of Text::MediaiwkiFormat