File Coverage

blib/lib/Text/MultiMarkdown.pm
Criterion Covered Total %
statement 453 474 95.5
branch 136 162 83.9
condition 25 44 56.8
subroutine 41 41 100.0
pod 2 2 100.0
total 657 723 90.8


line stmt bran cond sub pod time code
1             package Text::MultiMarkdown;
2             require 5.008_000;
3 22     22   76152 use strict;
  22         44  
  22         936  
4 22     22   201 use warnings;
  22         43  
  22         826  
5 22     22   120 use re 'eval';
  22         50  
  22         1254  
6              
7 22     22   125 use Digest::MD5 qw(md5_hex);
  22         39  
  22         1488  
8 22     22   26105 use Encode qw();
  22         345704  
  22         746  
9 22     22   188 use Carp qw(croak);
  22         41  
  22         1667  
10 22     22   126 use base qw(Text::Markdown);
  22         118  
  22         26559  
11 22     22   886890 use HTML::Entities qw(encode_entities);
  22         184021  
  22         169603  
12              
13             our $VERSION = '1.000035'; # 1.0.34
14             $VERSION = eval $VERSION;
15             our @EXPORT_OK = qw(markdown);
16              
17             =head1 NAME
18              
19             Text::MultiMarkdown - Convert MultiMarkdown syntax to (X)HTML
20              
21             =head1 SYNOPSIS
22              
23             use Text::MultiMarkdown 'markdown';
24             my $html = markdown($text);
25              
26             use Text::MultiMarkdown 'markdown';
27             my $html = markdown( $text, {
28             empty_element_suffix => '>',
29             tab_width => 2,
30             use_wikilinks => 1,
31             } );
32              
33             use Text::MultiMarkdown;
34             my $m = Text::MultiMarkdown->new;
35             my $html = $m->markdown($text);
36              
37             use Text::MultiMarkdown;
38             my $m = Text::MultiMarkdown->new(
39             empty_element_suffix => '>',
40             tab_width => 2,
41             use_wikilinks => 1,
42             );
43             my $html = $m->markdown( $text );
44              
45             =head1 DESCRIPTION
46              
47             Markdown is a text-to-HTML filter; it translates an easy-to-read /
48             easy-to-write structured text format into HTML. Markdown's text format
49             is most similar to that of plain text email, and supports features such
50             as headers, *emphasis*, code blocks, blockquotes, and links.
51              
52             Markdown's syntax is designed not as a generic markup language, but
53             specifically to serve as a front-end to (X)HTML. You can use span-level
54             HTML tags anywhere in a Markdown document, and you can use block level
55             HTML tags (C<<
>>, C<< >> etc.). Note that by default
56             Markdown isn't interpreted in HTML block-level elements, unless you add
57             a C attribute to the element. See L for
58             details.
59              
60             This module implements the MultiMarkdown markdown syntax extensions from:
61              
62             http://fletcherpenney.net/multimarkdown/
63              
64             =head1 SYNTAX
65              
66             For more information about (original) Markdown's syntax, see:
67              
68             http://daringfireball.net/projects/markdown/
69              
70             This module implements MultiMarkdown, which is an extension to Markdown..
71              
72             The extension is documented at:
73              
74             http://fletcherpenney.net/multimarkdown/
75              
76             and borrows from php-markdown, which lives at:
77              
78             http://michelf.com/projects/php-markdown/extra/
79              
80             This documentation is going to be moved/copied into this module for clearer reading in a future release..
81              
82             =head1 OPTIONS
83              
84             MultiMarkdown supports a number of options to it's processor which control the behaviour of the output document.
85              
86             These options can be supplied to the constructor, on in a hash with the individual calls to the markdown method.
87             See the synopsis for examples of both of the above styles.
88              
89             The options for the processor are:
90              
91             =over
92              
93             =item use_metadata
94              
95             Controls the metadata options below.
96              
97             =item strip_metadata
98              
99             If true, any metadata in the input document is removed from the output document (note - does not take effect in complete document format).
100              
101             =item empty element suffix
102              
103             This option can be used to generate normal HTML output. By default, it is ' />', which is xHTML, change to '>' for normal HTML.
104              
105             =item img_ids
106              
107             Controls if tags generated have an id attribute. Defaults to true.
108             Turn off for compatibility with the original markdown.
109              
110             =item heading_ids
111              
112             Controls if tags generated have an id attribute. Defaults to true.
113             Turn off for compatibility with the original markdown.
114              
115             =item bibliography_title
116              
117             The title of the generated bibliography, defaults to 'Bibliography'.
118              
119             =item tab_width
120              
121             Controls indent width in the generated markup, defaults to 4
122              
123             =item disable_tables
124              
125             If true, this disables the MultiMarkdown table handling.
126              
127             =item disable_footnotes
128              
129             If true, this disables the MultiMarkdown footnotes handling.
130              
131             =item disable_bibliography
132              
133             If true, this disables the MultiMarkdown bibliography/citation handling.
134              
135             =item disable_definition_lists
136              
137             If true, this disables the MultiMarkdown definition list handling.
138              
139             =back
140              
141             A number of possible items of metadata can also be supplied as options.
142             Note that if the use_metadata is true then the metadata in the document will overwrite the settings on command line.
143              
144             Metadata options supported are:
145              
146             =over
147              
148             =item document_format
149              
150             =item use_wikilinks
151              
152             =item base_url
153              
154             =item self_url - The document url is prepended to the "#" anchor of footnotes.
155              
156             =back
157              
158             =head1 METADATA
159              
160             MultiMarkdown supports the concept of 'metadata', which allows you to specify a number of formatting options
161             within the document itself. Metadata should be placed in the top few lines of a file, on value per line as colon separated key/value pairs.
162             The metadata should be separated from the document with a blank line.
163              
164             Most metadata keys are also supported as options to the constructor, or options
165             to the markdown method itself. (Note, as metadata, keys contain space, whereas options the keys are underscore separated.)
166              
167             You can attach arbitrary metadata to a document, which is output in HTML tags if unknown, see t/11document_format.t for more info.
168              
169             A list of 'known' metadata keys, and their effects are listed below:
170              
171             =over
172              
173             =item document format
174              
175             If set to 'complete', MultiMarkdown will render an entire xHTML page, otherwise it will render a document fragment
176              
177             =over
178              
179             =item css
180              
181             Sets a CSS file for the file, if in 'complete' document format.
182              
183             =item title
184              
185             Sets the page title, if in 'complete' document format.
186              
187             =back
188              
189             =item use wikilinks
190              
191             If set to '1' or 'on', causes links that are WikiWords to automatically be processed into links.
192              
193             =item base url
194              
195             This is the base URL for referencing wiki pages. In this is not supplied, all wiki links are relative.
196              
197             =back
198              
199             =head1 METHODS
200              
201             =head2 new
202              
203             A simple constructor, see the SYNTAX and OPTIONS sections for more information.
204              
205             =cut
206              
207             sub new {
208 27     27 1 19012 my ($class, %p) = @_;
209              
210             # Default metadata to 1
211 27 100       194 $p{use_metadata} = 1 unless exists $p{use_metadata};
212             # Squash value to [01]
213 27 100       135 $p{use_metadata} = $p{use_metadata} ? 1 : 0;
214              
215 27   100     959 $p{base_url} ||= ''; # This is the base url to be used for WikiLinks
216              
217 27 50 33     227 $p{tab_width} = 4 unless (defined $p{tab_width} and $p{tab_width} =~ m/^\d+$/);
218              
219 27   50     199 $p{document_format} ||= '';
220              
221 27   50     168 $p{empty_element_suffix} ||= ' />'; # Change to ">" for HTML output
222              
223             #$p{heading_ids} = defined $p{heading_ids} ? $p{heading_ids} : 1;
224              
225             # For use with WikiWords and [[Wiki Links]]
226             # NOTE: You can use \WikiWord to prevent a WikiWord from being treated as a link
227 27 100       103 $p{use_wikilinks} = $p{use_wikilinks} ? 1 : 0;
228              
229 27 100       130 $p{heading_ids} = defined $p{heading_ids} ? $p{heading_ids} : 1;
230 27 50       127 $p{img_ids} = defined $p{img_ids} ? $p{img_ids} : 1;
231              
232 27   50     430 $p{bibliography_title} ||= 'Bibliography'; # FIXME - Test and document, can also be in metadata!
233              
234 27   100     157 $p{self_url} ||= ''; # Used in footnotes to prepend anchors
235              
236 27         88 my $self = { params => \%p };
237 27   33     200 bless $self, ref($class) || $class;
238 27         102 return $self;
239             }
240              
241             =head2 markdown
242              
243             The main function as far as the outside world is concerned. See the SYNOPSIS
244             for details on use.
245              
246             =cut
247              
248             sub markdown {
249 89     89 1 64351 my ( $self, $text, $options ) = @_;
250              
251             # Detect functional mode, and create an instance for this run..
252 89 100       416 unless (ref $self) {
253 2 100       12 if ( $self ne __PACKAGE__ ) {
254 1         5 my $ob = __PACKAGE__->new();
255             # $self is text, $text is options
256 1         9 return $ob->markdown($self, $text);
257             }
258             else {
259 1         23 croak('Calling ' . $self . '->markdown (as a class method) is not supported.');
260             }
261             }
262              
263 87   100     473 $options ||= {};
264              
265 87         146 %$self = (%{ $self->{params} }, %$options, params => $self->{params});
  87         1976  
266              
267 87         452 $self->_CleanUpRunData($options);
268              
269 87         1244 return $self->_Markdown($text);
270             }
271              
272             sub _CleanUpRunData {
273 87     87   177 my ($self, $options) = @_;
274             # Clear the global hashes. If we don't clear these, you get conflicts
275             # from other articles when generating a page which contains more than
276             # one article (e.g. an index page that shows the N most recent
277             # articles):
278 87         231 $self->{_crossrefs} = {};
279 87         223 $self->{_footnotes} = {};
280 87         170 $self->{_references} = {};
281 87         191 $self->{_used_footnotes} = []; # Why do we need 2 data structures for footnotes? FIXME
282 87         646 $self->{_used_references} = []; # Ditto for references
283 87         188 $self->{_citation_counter} = 0;
284 87         197 $self->{_metadata} = {};
285 87         203 $self->{_attributes} = {}; # Used for extra attributes on links / images.
286              
287 87         554 $self->SUPER::_CleanUpRunData($options);
288             }
289              
290             sub _Markdown {
291             #
292             # Main function. The order in which other subs are called here is
293             # essential. Link and image substitutions need to happen before
294             # _EscapeSpecialChars(), so that any *'s or _'s in the
295             # and tags get encoded.
296             #
297             # Can't think of any good way to make this inherit from the Markdown version as ordering is so important, so I've left it.
298 87     87   200 my ($self, $text) = @_;
299              
300 87         441 $text = $self->_CleanUpDoc($text);
301              
302             # MMD only. Strip out MetaData
303 87 100 100     70881 $text = $self->_ParseMetaData($text) if ($self->{use_metadata} || $self->{strip_metadata});
304              
305             # Turn block-level HTML blocks into hash entries
306 87         894 $text = $self->_HashHTMLBlocks($text, {interpret_markdown_on_attribute => 1});
307              
308 87         110812 $text = $self->_StripLinkDefinitions($text);
309              
310             # MMD only
311 87         312 $text = $self->_StripMarkdownReferences($text);
312              
313 87         669 $text = $self->_RunBlockGamut($text, {wrap_in_p_tags => 1});
314              
315             # MMD Only
316 87 100       26194 $text = $self->_DoMarkdownCitations($text) unless $self->{disable_bibliography};
317 87 100       515 $text = $self->_DoFootnotes($text) unless $self->{disable_footnotes};
318              
319 87         447 $text = $self->_UnescapeSpecialChars($text);
320              
321             # MMD Only
322             # This must follow _UnescapeSpecialChars
323 87         16644 $text = $self->_UnescapeWikiWords($text);
324 87 100       472 $text = $self->_FixFootnoteParagraphs($text) unless $self->{disable_footnotes}; # TODO: remove. Doesn't make any difference to test suite pass/failure
325 87 100       434 $text .= $self->_PrintFootnotes() unless $self->{disable_footnotes};
326 87 100       443 $text .= $self->_PrintMarkdownBibliography() unless $self->{disable_bibliography};
327              
328 87         643 $text = $self->_ConvertCopyright($text);
329              
330             # MMD Only
331 87 100       48922 if (lc($self->{document_format}) =~ /^complete\s*$/) {
332 4         147 return $self->_xhtmlMetaData() . "\n" . $text . "\n\n";
333             }
334             else {
335 83         302 return $self->_textMetaData() . $text . "\n";
336             }
337              
338             }
339              
340             #
341             # Routines which are overridden for slightly different behaviour in MultiMarkdown
342             #
343              
344             # Delegate to super class, then do wiki links
345             sub _RunSpanGamut {
346 763     763   1160432 my ($self, $text) = @_;
347              
348 763         2675 $text = $self->SUPER::_RunSpanGamut($text);
349              
350             # Process WikiWords
351 763 100       238646 if ($self->_UseWikiLinks()) {
352 13         47 $text = $self->_DoWikiLinks($text);
353              
354             # And then reprocess anchors and images
355             # FIXME - This is needed exactly why?
356 13         40 $text = $self->_DoImages($text);
357 13         1618 $text = $self->_DoAnchors($text);
358             }
359              
360 763         2636 return $text;
361             }
362              
363             # Don't do Wiki Links in Headers, otherwise delegate to super class
364             # Do tables stright after headers
365             sub _DoHeaders {
366 146     146   6433 my ($self, $text) = @_;
367              
368 146         393 local $self->{use_wikilinks} = 0;
369              
370 146         693 $text = $self->SUPER::_DoHeaders($text);
371              
372             # Do tables to populate the table id's for cross-refs
373             # (but after headers as the tables can contain cross-refs to other things, so we want the header cross-refs)
374 146         7044 $text = $self->_DoTables($text);
375             }
376              
377             sub _DoLists {
378 249     249   18027 my ($self, $text) = @_;
379 249 50       1045 $text = $self->_DoDefinitionLists($text)
380             unless $self->{disable_definition_lists};
381 249         1255 $self->SUPER::_DoLists($text);
382             }
383              
384             sub _DoDefinitionLists {
385 249     249   390 my ($self, $text) = @_;
386             # Uses the syntax proposed by Michel Fortin in PHP Markdown Extra
387              
388 249         444 my $less_than_tab = $self->{tab_width} -1;
389              
390 249         1293 my $line_start = qr{
391             [ ]{0,$less_than_tab}
392             }mx;
393              
394 249         1974 my $term = qr{
395             $line_start
396             [^:\s][^\n]*\n
397             }sx;
398              
399 249         1662 my $definition = qr{
400             \n?[ ]{0,$less_than_tab}
401             \:[ \t]+(.*?)\n
402             ((?=\n?\:)|\n|\Z) # Lookahead for next definition, two returns,
403             # or the end of the document
404             }sx;
405              
406 249         2772 my $definition_block = qr{
407             ((?:$term)+) # $1 = one or more terms
408             ((?:$definition)+) # $2 = by one or more definitions
409             }sx;
410              
411 249         2843 my $definition_list = qr{
412             (?:$definition_block\n*)+ # One ore more definition blocks
413             }sx;
414              
415 249         163729 $text =~ s{
416             ($definition_list) # $1 = the whole list
417             }{
418 1         4 my $list = $1;
419 1         4 my $result = $1;
420            
421 1         62 $list =~ s{
422             (?:$definition_block)\n*
423             }{
424 2         7 my $terms = $1;
425 2         6 my $defs = $2;
426              
427 2         28 $terms =~ s{
428             [ ]{0,$less_than_tab}
429             (.*)
430             \s*
431             }{
432 4         10 my $term = $1;
433 4         7 my $result = "";
434 4         21 $term =~ s/^\s*(.*?)\s*$/$1/;
435 4 100       28 if ($term !~ /^\s*$/){
436 2         10 $result = "
" . $self->_RunSpanGamut($1) . "
\n";
437             }
438 4         29 $result;
439             }xmge;
440              
441 2         51 $defs =~ s{
442             $definition
443             }{
444 3         33 my $def = $1 . "\n";
445 3         31 $def =~ s/^[ ]{0,$self->{tab_width}}//gm;
446 3         14 "
\n" . $self->_RunBlockGamut($def) . "\n
\n";
447             }xsge;
448              
449 2         53 $terms . $defs . "\n";
450             }xsge;
451              
452 1         13 "
\n" . $list . "
\n\n";
453             }xsge;
454              
455 249         1519 return $text
456             }
457              
458             # Generating headers automatically generates X-refs in MultiMarkdown (always)
459             # Also, by default, you get id attributes added to your headers, you can turn this
460             # part of the MultiMarkdown behaviour off with the heading_ids flag.
461             sub _GenerateHeader {
462 26     26   381 my ($self, $level, $id) = @_;
463              
464 26 100       110 my $label = $self->{heading_ids} ? $self->_Header2Label($id) : '';
465 26         68 my $header = $self->_RunSpanGamut($id);
466              
467 26 100       69 if ($label ne '') {
468 24         94 $self->{_crossrefs}{$label} = "#$label";
469 24         55 $self->{_titles}{$label} = $header;
470 24         51 $label = qq{ id="$label"};
471             }
472              
473 26         603 return "$header\n\n";
474             }
475              
476             # Protect Wiki Links in Code Blocks (if wiki links are turned on), then delegate to super class.
477             sub _EncodeCode {
478 368     368   29301 my ($self, $text) = @_;
479              
480 368 100       740 if ($self->_UseWikiLinks()) {
481 3         19 $text =~ s/([A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*)/\\$1/gx;
482             }
483              
484 368         1508 return $self->SUPER::_EncodeCode($text);
485             }
486              
487             # Full function pulled out of Text::Markdown as MultiMarkdown supports supplying extra 'attributes' with links and
488             # images which are then pushed back into the generated HTML, and this needs a different regex. It should be possible
489             # to extract the just the regex from Text::Markdown, and use that here, but I haven't done so yet.
490             # Strip footnote definitions at the same time as stripping link definitions.
491             # Also extract images and then replace them straight back in (code smell!) to be able to cross reference images
492             sub _StripLinkDefinitions {
493             #
494             # Strips link definitions from text, stores the URLs and titles in
495             # hash references.
496             #
497 87     87   204 my ($self, $text) = @_;
498              
499 87 100       518 $text = $self->_StripFootnoteDefinitions($text) unless $self->{disable_footnotes};
500              
501 87         213 my $less_than_tab = $self->{tab_width} - 1;
502              
503             # Link defs are in the form: ^[id]: url "optional title"
504             # FIXME - document attributes here.
505 87         3445 while ($text =~ s{
506             # Pattern altered for MultiMarkdown
507             # in order to not match citations or footnotes
508             ^[ ]{0,$less_than_tab}\[([^#^].*)\]: # id = $1
509             [ \t]*
510             \n? # maybe *one* newline
511             [ \t]*
512             ? # url = $2
513             [ \t]*
514             \n? # maybe one newline
515             [ \t]*
516             (?:
517             (?<=\s) # lookbehind for whitespace
518             ["(]
519             (.+?) # title = $3
520             [")]
521             [ \t]*
522             )? # title is optional
523              
524             # MultiMarkdown addition for attribute support
525             \n?
526             ( # Attributes = $4
527             (?<=\s) # lookbehind for whitespace
528             (([ \t]*\n)?[ \t]*((\S+=\S+)|(\S+=".*?")))*
529             )?
530             [ \t]*
531             # /addition
532             (?:\n+|\Z)
533             }
534             {}mx) {
535 45         303 $self->{_urls}{lc $1} = $self->_EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive
536 45 100       762 if ($3) {
537 12         50 $self->{_titles}{lc $1} = $3;
538 12         47 $self->{_titles}{lc $1} =~ s/"/"/g;
539             }
540              
541             # MultiMarkdown addition "
542 45 100       7497 if ($4) {
543 4         135 $self->{_attributes}{lc $1} = $4;
544             }
545             # /addition
546             }
547              
548 87         412 $text = $self->_GenerateImageCrossRefs($text);
549              
550 87         190 return $text;
551             }
552              
553             # Add the extra cross-references to headers that MultiMarkdown supports, and also
554             # the additional link attributes.
555             sub _GenerateAnchor {
556             # FIXME - Fugly, change to named params?
557 160     160   35408 my ($self, $whole_match, $link_text, $link_id, $url, $title, $attributes) = @_;
558              
559             # Allow automatic cross-references to headers
560 160 100       421 if (defined $link_id) {
561 96         243 my $label = $self->_Header2Label($link_id);
562 96 100       318 if (defined $self->{_crossrefs}{$label}) {
563 8   33     39 $url ||= $self->{_crossrefs}{$label};
564             }
565 96 100       271 if ( defined $self->{_titles}{$label} ) {
566 11   33     59 $title ||= $self->{_titles}{$label};
567             }
568 96   66     458 $attributes ||= $self->_DoAttributes($label);
569             }
570 160         658 return $self->SUPER::_GenerateAnchor($whole_match, $link_text, $link_id, $url, $title, $attributes);
571             }
572              
573             # Add the extra cross-references to images that MultiMarkdown supports, and also
574             # the additional attributes.
575             sub _GenerateImage {
576             # FIXME - Fugly, change to named params?
577 6     6   637 my ($self, $whole_match, $alt_text, $link_id, $url, $title, $attributes) = @_;
578              
579 6 50 33     39 if (defined $alt_text && length $alt_text) {
580 6         16 my $label = $self->_Header2Label($alt_text);
581 6         19 $self->{_crossrefs}{$label} = "#$label";
582 6 50       27 $attributes .= $self->{img_ids} ? qq{ id="$label"} : '';
583             }
584              
585 6 100       26 $attributes .= $self->_DoAttributes($link_id) if defined $link_id;
586              
587 6         44 $self->SUPER::_GenerateImage($whole_match, $alt_text, $link_id, $url, $title, $attributes);
588             }
589              
590              
591             #
592             # MultiMarkdown specific routines
593             #
594              
595             # FIXME - This is really really ugly!
596             sub _ParseMetaData {
597 82     82   210 my ($self, $text) = @_;
598 82         152 my $clean_text = "";
599              
600 82         161 my ($inMetaData, $currentKey) = (1, '');
601              
602 82         1242 foreach my $line ( split /\n/, $text ) {
603 3073 50 100     11687 $line =~ /^\s*$/ and $inMetaData = 0 and $clean_text .= $line and next;
      33        
604 3073 100       4431 if ($inMetaData) {
605 99 100       362 next unless $self->{use_metadata}; # We can come in here as use_metadata => 0, strip_metadata => 1
606 97 100       379 if ($line =~ /^([a-zA-Z0-9][0-9a-zA-Z _-]+?):\s*(.*)$/ ) {
607 33         70 $currentKey = $1;
608 33         58 $currentKey =~ s/ / /g;
609 33 50       166 $self->{_metadata}{$currentKey} = defined $2 ? $2 : '';
610 33 100       99 if (lc($currentKey) eq "format") {
611 2         5 $self->{document_format} = $self->{_metadata}{$currentKey};
612             }
613 33 100       265 if (lc($currentKey) eq "base url") {
614 5         17 $self->{base_url} = $self->{_metadata}{$currentKey};
615             }
616 33 50       100 if (lc($currentKey) eq "bibliography title") {
617 0         0 $self->{bibliography_title} = $self->{_metadata}{$currentKey};
618 0         0 $self->{bibliography_title} =~ s/\s*$//;
619             }
620             }
621             else {
622 64 100       203 if ($currentKey eq "") {
623             # No metadata present
624 63         229 $clean_text .= "$line\n";
625 63         101 $inMetaData = 0;
626 63         183 next;
627             }
628 1 50       6 if ($line =~ /^\s*(.+)$/ ) {
629 1         5 $self->{_metadata}{$currentKey} .= "\n$1";
630             }
631             }
632             }
633             else {
634 2974         5187 $clean_text .= "$line\n";
635             }
636             }
637              
638             # Recheck for leading blank lines
639 82         520 $clean_text =~ s/^\n+//s;
640              
641 82         410 return $clean_text;
642             }
643              
644             # FIXME - This is really ugly, why do we match stuff and substitute it with the thing we just matched?
645             sub _GenerateImageCrossRefs {
646 87     87   165 my ($self, $text) = @_;
647              
648             #
649             # First, handle reference-style labeled images: ![alt text][id]
650             #
651 87         380 $text =~ s{
652             ( # wrap whole match in $1
653             !\[
654             (.*?) # alt text = $2
655             \]
656              
657             [ ]? # one optional space
658             (?:\n[ ]*)? # one optional newline followed by spaces
659              
660             \[
661             (.*?) # id = $3
662             \]
663              
664             )
665             }{
666 7         24 my $whole_match = $1;
667 7         19 my $alt_text = $2;
668 7         16 my $link_id = lc $3;
669              
670 7 100       32 if ($link_id eq "") {
671 2         6 $link_id = lc $alt_text; # for shortcut links like ![this][].
672             }
673              
674 7         27 $alt_text =~ s/"/"/g;
675              
676 7 100       36 if (defined $self->{_urls}{$link_id}) {
677 4         19 my $label = $self->_Header2Label($alt_text);
678 4         24 $self->{_crossrefs}{$label} = "#$label";
679             }
680              
681 7         60 $whole_match;
682             }xsge;
683              
684             #
685             # Next, handle inline images: ![alt text](url "optional title")
686             # Don't forget: encode * and _
687              
688 87         591 $text =~ s{
689             ( # wrap whole match in $1
690             !\[
691             (.*?) # alt text = $2
692             \]
693             \( # literal paren
694             [ \t]*
695             ? # src url = $3
696             [ \t]*
697             ( # $4
698             (['"]) # quote char = $5
699             (.*?) # title = $6
700             \5 # matching quote
701             [ \t]*
702             )? # title is optional
703             \)
704             )
705             }{
706 7         15 my $result;
707 7         22 my $whole_match = $1;
708 7         15 my $alt_text = $2;
709              
710 7         12 $alt_text =~ s/"/"/g;
711 7         29 my $label = $self->_Header2Label($alt_text);
712 7         27 $self->{_crossrefs}{$label} = "#$label";
713 7         123 $whole_match;
714             }xsge;
715              
716 87         256 return $text;
717             }
718              
719             sub _StripFootnoteDefinitions {
720 84     84   161 my ($self, $text) = @_;
721 84         308 my $less_than_tab = $self->{tab_width} - 1;
722              
723 84         1381 while ($text =~ s{
724             \n\[\^([^\n]+?)\]\:[ \t]*# id = $1
725             \n?
726             (.*?)\n{1,2} # end at new paragraph
727             ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
728             }
729             {\n}sx)
730             {
731 7         21 my $id = $1;
732 7         18 my $footnote = "$2\n";
733 7         63 $footnote =~ s/^[ ]{0,$self->{tab_width}}//gm;
734              
735 7         37 $self->{_footnotes}{$self->_Id2Footnote($id)} = $footnote;
736             }
737              
738 84         305 return $text;
739             }
740              
741             sub _DoFootnotes {
742 84     84   178 my ($self, $text) = @_;
743              
744 84 50       325 return '' unless length $text;
745              
746             # First, run routines that get skipped in footnotes
747 84         256 foreach my $label (sort keys %{ $self->{_footnotes} }) {
  84         527  
748 7         46 my $footnote = $self->_RunBlockGamut($self->{_footnotes}{$label}, {wrap_in_p_tags => 1});
749 7         209 $footnote = $self->_UnescapeSpecialChars($footnote);
750 7         1254 $footnote = $self->_DoMarkdownCitations($footnote);
751 7         33 $self->{_footnotes}{$label} = $footnote;
752             }
753              
754 84         170 my $footnote_counter = 0;
755              
756 84         366 $text =~ s{
757             \[\^(.*?)\] # id = $1
758             }{
759 7         13 my $result = '';
760 7         22 my $id = $self->_Id2Footnote($1);
761              
762 7 50       86 if (defined $self->{_footnotes}{$id} ) {
763 7         12 $footnote_counter++;
764 7 50       37 if ($self->{_footnotes}{$id} =~ /^glossary:/i) {
765 0         0 $result = qq{$footnote_counter};
766             }
767             else {
768 7         59 $result = qq{$footnote_counter};
769             }
770 7         12 push (@{ $self->{_used_footnotes} }, $id);
  7         18  
771             }
772 7         28 $result;
773             }xsge;
774              
775 84         231 return $text;
776             }
777              
778             # TODO: remove. Doesn't make any difference to test suite pass/failure
779             sub _FixFootnoteParagraphs {
780 84     84   152 my ($self, $text) = @_;
781              
782 84         255 $text =~ s(^

)()gm;

783              
784 84         206 return $text;
785             }
786              
787             sub _PrintFootnotes {
788 84     84   144 my ($self) = @_;
789 84         178 my $footnote_counter = 0;
790 84         150 my $result;
791              
792 84         210 foreach my $id (@{ $self->{_used_footnotes} }) {
  84         266  
793 7         39 $footnote_counter++;
794 7         20 my $footnote = $self->{_footnotes}{$id};
795              
796 7         48 $footnote =~ s/(<\/(p(re)?|ol|ul)>)$//;
797 7         89 my $footnote_closing_tag = $1;
798 7 50       24 $footnote_closing_tag = '' if !defined $footnote_closing_tag;
799              
800 7 50       25 if ($footnote =~ s/^glossary:\s*//i) {
801             # Add some formatting for glossary entries
802              
803 0         0 $footnote =~ s{
804             ^(.*?) # $1 = term
805             \s*
806             (?:\(([^\(\)]*)\)[^\n]*)? # $2 = optional sort key
807             \n
808             }{
809 0         0 my $glossary = qq{$1};
810              
811 0 0       0 if ($2) {
812 0         0 $glossary.= qq{};
813             };
814              
815 0         0 $glossary . q{:

};

816             }egsx;
817              
818 0         0 $result .= qq{
  • $footnote ↩$footnote_closing_tag
  • \n\n};
    819             }
    820             else {
    821 7         43 $result .= qq{
  • $footnote ↩$footnote_closing_tag
  • \n\n};
    822             }
    823             }
    824              
    825 84 100       267 if ($footnote_counter > 0) {
    826 4         25 $result = qq[\n\n
    \n{empty_element_suffix}\n
      \n\n] . $result . "
    \n
    ";
    827             }
    828             else {
    829 80         213 $result = "";
    830             }
    831              
    832 84         213 return $result;
    833             }
    834              
    835             sub _Header2Label {
    836 145     145   6056 my ($self, $header) = @_;
    837 145         243 my $label = lc $header;
    838 145         521 $label =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters
    839 145         503 while ($label =~ s/^[^A-Za-z]//g)
    840             {}; # Strip illegal leading characters
    841 145         321 return $label;
    842             }
    843              
    844             sub _Id2Footnote {
    845             # Since we prepend "fn:", we can allow leading digits in footnotes
    846 14     14   36 my ($self, $id) = @_;
    847 14         28 my $footnote = lc $id;
    848 14         32 $footnote =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters
    849 14         84 return $footnote;
    850             }
    851              
    852             sub _xhtmlMetaData {
    853 4     4   8 my ($self) = @_;
    854             # FIXME: Should not assume encoding
    855 4         8 my $result; # FIXME: This breaks some things in IE 6- = qq{\n};
    856              
    857             # This screws up xsltproc - make sure to use `-nonet -novalid` if you
    858             # have difficulty
    859 4         8 $result .= qq{\n};
    860              
    861 4         7 $result.= "\n\t\n";
    862              
    863 4         9 foreach my $key (sort keys %{$self->{_metadata}} ) {
      4         27  
    864 15 100       266 if (lc($key) eq "title") {
        100          
        100          
    865 3         13 $result.= "\t\t" . encode_entities($self->{_metadata}{$key}) . "\n";
    866             }
    867             elsif (lc($key) eq "css") {
    868 3         16 $result.= qq[\t\t{empty_element_suffix}\n];
    869             }
    870             elsif( lc($key) eq "xhtml header") {
    871 1         4 $result .= qq[\t\t$self->{_metadata}{$key}\n]
    872             }
    873             else {
    874 8         29 $result.= qq[\t\t
    875             . qq[content="] . encode_entities($self->{_metadata}{$key}) . qq["$self->{empty_element_suffix}\n];
    876             }
    877             }
    878 4         32 $result.= "\t\n";
    879              
    880 4         35 return $result;
    881             }
    882              
    883             sub _textMetaData {
    884 83     83   316 my ($self) = @_;
    885 83         240 my $result = "";
    886              
    887 83 100       287 return $result if $self->{strip_metadata};
    888              
    889 81         141 foreach my $key (sort keys %{$self->{_metadata}} ) {
      81         335  
    890 16         70 $result .= "$key: $self->{_metadata}{$key}\n";
    891             }
    892 81         256 $result =~ s/\s*\n/{empty_element_suffix}\n/g;
    893              
    894 81 100       299 if ($result ne "") {
    895 9         19 $result.= "\n";
    896             }
    897              
    898 81         808 return $result;
    899             }
    900              
    901             sub _UseWikiLinks {
    902 1144     1144   1509 my ($self) = @_;
    903 1144 100       3277 return 1 if $self->{use_wikilinks};
    904 1132         1471 my ($k) = grep { /use wikilinks/i } keys %{$self->{_metadata}};
      55         136  
      1132         2695  
    905 1132 100       3936 return unless $k;
    906 17 50       78 return 1 if $self->{_metadata}{$k};
    907 0         0 return;
    908             }
    909              
    910             sub _CreateWikiLink {
    911 12     12   32 my ($self, $title) = @_;
    912              
    913 12         16 my $id = $title;
    914 12         22 $id =~ s/ /_/g;
    915 12         27 $id =~ s/__+/_/g;
    916 12         19 $id =~ s/^_//g;
    917 12         19 $id =~ s/_$//;
    918              
    919 12         16 $title =~ s/_/ /g;
    920              
    921 12         81 return "[$title](" . $self->{base_url} . "$id)";
    922             }
    923              
    924             sub _DoWikiLinks {
    925              
    926 13     13   21 my ($self, $text) = @_;
    927 13         21 my $WikiWord = '[A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*';
    928 13         17 my $FreeLinkPattern = "([-,.()' _0-9A-Za-z\x80-\xff]+)";
    929              
    930 13 50       27 if ($self->_UseWikiLinks()) {
    931             # FreeLinks
    932 13         119 $text =~ s{
    933             \[\[($FreeLinkPattern)\]\]
    934             }{
    935 1         2 my $label = $1;
    936 1         24 $label =~ s{
    937             ([\s\>])($WikiWord)
    938             }{
    939 0         0 $1 ."\\" . $2
    940             }xsge;
    941              
    942 1         6 $self->_CreateWikiLink($label)
    943             }xsge;
    944              
    945             # WikiWords
    946 13         148 $text =~ s{
    947             ([\s])($WikiWord)
    948             }{
    949 8         26 $1 . $self->_CreateWikiLink($2)
    950             }xsge;
    951              
    952             # Catch WikiWords at beginning of text
    953 13         151 $text =~ s{^($WikiWord)
    954             }{
    955 3         8 $self->_CreateWikiLink($1)
    956             }xse;
    957             }
    958              
    959              
    960 13         37 return $text;
    961             }
    962              
    963             sub _UnescapeWikiWords {
    964 87     87   203 my ($self, $text) = @_;
    965 87         586 my $WikiWord = '[A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*';
    966              
    967             # Unescape escaped WikiWords
    968 87         1122 $text =~ s/(?<=\B)\\($WikiWord)/$1/g;
    969              
    970 87         409 return $text;
    971             }
    972              
    973             sub _DoTables {
    974 146     146   227 my ($self, $text) = @_;
    975              
    976 146 100       424 return $text if $self->{disable_tables};
    977              
    978 142         299 my $less_than_tab = $self->{tab_width} - 1;
    979              
    980             # Algorithm inspired by PHP Markdown Extra's
    981             #
    982              
    983             # Reusable regexp's to match table
    984              
    985 142         857 my $line_start = qr{
    986             [ ]{0,$less_than_tab}
    987             }mx;
    988              
    989 142         427 my $table_row = qr{
    990             [^\n]*?\|[^\n]*?\n
    991             }mx;
    992              
    993 142         1318 my $first_row = qr{
    994             $line_start
    995             \S+.*?\|.*?\n
    996             }mx;
    997              
    998 142         1210 my $table_rows = qr{
    999             (\n?$table_row)
    1000             }mx;
    1001              
    1002 142         1070 my $table_caption = qr{
    1003             $line_start
    1004             \[.*?\][ \t]*\n
    1005             }mx;
    1006              
    1007 142         1339 my $table_divider = qr{
    1008             $line_start
    1009             [\|\-\:\.][ \-\|\:\.]* \| [ \-\|\:\.]*
    1010             }mx;
    1011              
    1012 142         3301 my $whole_table = qr{
    1013             ($table_caption)? # Optional caption
    1014             ($first_row # First line must start at beginning
    1015             ($table_row)*?)? # Header Rows
    1016             $table_divider # Divider/Alignment definitions
    1017             $table_rows+ # Body Rows
    1018             ($table_caption)? # Optional caption
    1019             }mx;
    1020              
    1021              
    1022             # Find whole tables, then break them up and process them
    1023              
    1024 142         23195 $text =~ s{
    1025             ^($whole_table) # Whole table in $1
    1026             (\n|\Z) # End of file or 2 blank lines
    1027             }{
    1028 3         9 my $table = $1;
    1029 3         4 my $result = "\n"; \n"; \n"; {empty_element_suffix}\n]; {empty_element_suffix}\n]; {empty_element_suffix}\n]; {empty_element_suffix}\n]; \n"; \n"; \n"; \n\n"; \n\n\n"; \n"; \n"; \n
    1030 3         4 my @alignments;
    1031 3         6 my $use_row_header = 0;
    1032              
    1033             # Add Caption, if present
    1034              
    1035 3 100       60 if ($table =~ s/^$line_start\[\s*(.*?)\s*\](\[\s*(.*?)\s*\])?[ \t]*$//m) {
    1036 2 50       7 if (defined $3) {
    1037             # add caption id to cross-ref list
    1038 2         5 my $table_id = $self->_Header2Label($3);
    1039 2         6 $result .= qq{
    } . $self->_RunSpanGamut($1). "
    1040              
    1041 2         7 $self->{_crossrefs}{$table_id} = "#$table_id";
    1042 2         8 $self->{_titles}{$table_id} = "$1";
    1043             }
    1044             else {
    1045 0         0 $result .= "
    " . $self->_RunSpanGamut($1). "
    1046             }
    1047             }
    1048              
    1049             # If a second "caption" is present, treat it as a summary
    1050             # However, this is not valid in XHTML 1.0 Strict
    1051             # But maybe in future
    1052              
    1053             # A summary might be longer than one line
    1054 3 50       37 if ($table =~ s/\n$line_start\[\s*(.*?)\s*\][ \t]*\n/\n/s) {
    1055             # $result .= "" . $self->_RunSpanGamut($1) . "\n";
    1056             }
    1057              
    1058             # Now, divide table into header, alignment, and body
    1059              
    1060             # First, add leading \n in case there is no header
    1061              
    1062 3         10 $table = "\n" . $table;
    1063              
    1064             # Need to be greedy
    1065              
    1066 3         81 $table =~ s/\n($table_divider)\n(($table_rows)+)//s;
    1067              
    1068 3         9 my $alignment_string = $1;
    1069 3         6 my $body = $2;
    1070              
    1071             # Process column alignment
    1072 3         22 while ($alignment_string =~ /\|?\s*(.+?)\s*(\||\Z)/gs) {
    1073 8         18 my $cell = $self->_RunSpanGamut($1);
    1074 8 100       25 if ($cell =~ /\:$/) {
    1075 4 100       11 if ($cell =~ /^\:/) {
    1076 2         7 $result .= qq[
    1077 2         20 push(@alignments,"center");
    1078             }
    1079             else {
    1080 2         6 $result .= qq[
    1081 2         7 push(@alignments,"right");
    1082             }
    1083             }
    1084             else {
    1085 4 50       11 if ($cell =~ /^\:/) {
    1086 0         0 $result .= qq[
    1087 0         0 push(@alignments,"left");
    1088             }
    1089             else {
    1090 4 50 33     23 if (($cell =~ /^\./) || ($cell =~ /\.$/)) {
    1091 0         0 $result .= qq[
    1092 0         0 push(@alignments,"char");
    1093             }
    1094             else {
    1095 4         10 $result .= "{empty_element_suffix}\n";
    1096 4         25 push(@alignments,"");
    1097             }
    1098             }
    1099             }
    1100             }
    1101              
    1102             # Process headers
    1103 3         11 $table =~ s/^\n+//s;
    1104              
    1105 3         5 $result .= "
    1106              
    1107             # Strip blank lines
    1108 3         7 $table =~ s/\n[ \t]*\n/\n/g;
    1109              
    1110 3         10 foreach my $line (split(/\n/, $table)) {
    1111             # process each line (row) in table
    1112 3         6 $result .= "
    1113 3         3 my $count=0;
    1114 3         23 while ($line =~ /\|?\s*([^\|]+?)\s*(\|+|\Z)/gs) {
    1115             # process contents of each cell
    1116 8         19 my $cell = $self->_RunSpanGamut($1);
    1117 8         17 my $ending = $2;
    1118 8         10 my $colspan = "";
    1119 8 100       23 if ($ending =~ s/^\s*(\|{2,})\s*$/$1/) {
    1120 1         4 $colspan = " colspan=\"" . length($ending) . "\"";
    1121             }
    1122 8         21 $result .= "\t$cell\n";
    1123 8 100       17 if ( $count == 0) {
    1124 3 100       13 if ($cell =~ /^\s*$/) {
    1125 1         2 $use_row_header = 1;
    1126             }
    1127             else {
    1128 2         3 $use_row_header = 0;
    1129             }
    1130             }
    1131 8         83 $count++;
    1132             }
    1133 3         7 $result .= "
    1134             }
    1135              
    1136             # Process body
    1137              
    1138 3         8 $result .= "
    1139              
    1140 3         12 foreach my $line (split(/\n/, $body)) {
    1141             # process each line (row) in table
    1142 13 100       43 if ($line =~ /^\s*$/) {
    1143 2         4 $result .= "
    1144 2         3 next;
    1145             }
    1146 11         16 $result .= "
    1147 11         13 my $count=0;
    1148 11         80 while ($line =~ /\|?\s*([^\|]+?)\s*(\|+|\Z)/gs) {
    1149             # process contents of each cell
    1150 22     22   409 no warnings 'uninitialized';
      22         63  
      22         28365  
    1151 27         63 my $cell = $self->_RunSpanGamut($1);
    1152 27         57 my $ending = $2;
    1153 27         38 my $colspan = "";
    1154 27         32 my $cell_type = "td";
    1155 27 50 66     99 if ($count == 0 && $use_row_header == 1) {
    1156 0         0 $cell_type = "th";
    1157             }
    1158 27 100       76 if ($ending =~ s/^\s*(\|{2,})\s*$/$1/) {
    1159 6         15 $colspan = " colspan=\"" . length($ending) . "\"";
    1160             }
    1161 27 100       96 if ($alignments[$count] !~ /^\s*$/) {
    1162 14         46 $result .= "\t<$cell_type$colspan align=\"$alignments[$count]\">$cell\n";
    1163             }
    1164             else {
    1165 13         36 $result .= "\t<$cell_type$colspan>$cell\n";
    1166             }
    1167 27         139 $count++;
    1168             }
    1169 11         19 $result .= "
    1170             }
    1171              
    1172 3         9 $result .= "
    \n";
    1173 3         488 $result
    1174             }egmx;
    1175              
    1176 142         2078 my $table_body = qr{
    1177             ( # wrap whole match in $2
    1178              
    1179             (.*?\|.*?)\n # wrap headers in $3
    1180              
    1181             [ ]{0,$less_than_tab}
    1182             ($table_divider) # alignment in $4
    1183              
    1184             ( # wrap cells in $5
    1185             $table_rows
    1186             )
    1187             )
    1188             }mx;
    1189              
    1190 142         1268 return $text;
    1191             }
    1192              
    1193             sub _DoAttributes {
    1194 100     100   208 my ($self, $id) = @_;
    1195 100         131 my $result = "";
    1196              
    1197 100 100       269 if (defined $self->{_attributes}{$id}) {
    1198 6         31 while ($self->{_attributes}{$id} =~ s/(\S+)="(.*?)"//) {
    1199 3         20 $result .= qq{ $1="$2"};
    1200             }
    1201 6         32 while ($self->{_attributes}{$id} =~ /(\S+)=(\S+)/g) {
    1202 9         48 $result .= qq{ $1="$2"};
    1203             }
    1204             }
    1205              
    1206 100         320 return $result;
    1207             }
    1208              
    1209             sub _StripMarkdownReferences {
    1210 87     87   167 my ($self, $text) = @_;
    1211 87         191 my $less_than_tab = $self->{tab_width} - 1;
    1212              
    1213 87         1535 while ($text =~ s{
    1214             \n\[\#(.+?)\]:[ \t]* # id = $1
    1215             \n?
    1216             (.*?)\n{1,2} # end at new paragraph
    1217             ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
    1218             }
    1219             {\n}sx)
    1220             {
    1221 5         13 my $id = $1;
    1222 5         14 my $reference = "$2\n";
    1223              
    1224 5         52 $reference =~ s/^[ ]{0,$self->{tab_width}}//gm;
    1225              
    1226 5         30 $reference = $self->_RunBlockGamut($reference, {wrap_in_p_tags => 0});
    1227              
    1228 5         135 $self->{_references}{$id} = $reference;
    1229             }
    1230              
    1231 87         221 return $text;
    1232             }
    1233              
    1234             sub _DoMarkdownCitations {
    1235 91     91   189 my ($self, $text) = @_;
    1236              
    1237 91         338 $text =~ s{
    1238             \[([^\[]*?)\] # citation text = $1
    1239             [ ]? # one optional space
    1240             (?:\n[ ]*)? # one optional newline followed by spaces
    1241             \[\#(.*?)\] # id = $2
    1242             }{
    1243 6         7 my $result;
    1244 6         10 my $anchor_text = $1;
    1245 6         8 my $id = $2;
    1246 6         7 my $count;
    1247              
    1248 6 50       17 if (defined $self->{_references}{$id} ) {
    1249 6         7 my $citation_counter=0;
    1250              
    1251             # See if citation has been used before
    1252 6         6 foreach my $old_id (@{ $self->{_used_references} }) {
      6         12  
    1253 12         10 $citation_counter++;
    1254 12 100       24 $count = $citation_counter if ($old_id eq $id);
    1255             }
    1256              
    1257 6 100       14 if (! defined $count) {
    1258 3         5 $count = ++$self->{_citation_counter};
    1259 3         2 push (@{ $self->{_used_references} }, $id);
      3         6  
    1260             }
    1261              
    1262 6         17 $result = qq[ ($count];
    1263              
    1264 6 100       12 if ($anchor_text ne "") {
    1265 5         11 $result .= qq[, $anchor_text];
    1266             }
    1267              
    1268 6         6 $result .= ")";
    1269             }
    1270             else {
    1271             # No reference exists
    1272 0         0 $result = qq[ ($id];
    1273              
    1274 0 0       0 if ($anchor_text ne "") {
    1275 0         0 $result .= qq[, $anchor_text];
    1276             }
    1277              
    1278 0         0 $result .= ")";
    1279             }
    1280              
    1281 6 100       10 if ($self->_Header2Label($anchor_text) eq "notcited"){
    1282 1         4 $result = qq[];
    1283             }
    1284 6         109 $result;
    1285             }xsge;
    1286              
    1287 91         269 return $text;
    1288             }
    1289              
    1290             sub _PrintMarkdownBibliography {
    1291 84     84   177 my ($self) = @_;
    1292 84         142 my $citation_counter = 0;
    1293 84         119 my $result;
    1294              
    1295 84         149 foreach my $id (@{ $self->{_used_references} }) {
      84         236  
    1296 3         6 $citation_counter++;
    1297 3         17 $result .= qq|

    [$citation_counter] $self->{_references}{$id}

    \n\n|;
    1298             }
    1299 84         178 $result .= "";
    1300              
    1301 84 100       312 if ($citation_counter > 0) {
    1302 1         7 $result = qq[\n\n
    \n{empty_element_suffix}\n

    $self->{bibliography_title}

    \n\n] . $result;
    1303             }
    1304             else {
    1305 83         169 $result = "";
    1306             }
    1307              
    1308 84         212 return $result;
    1309             }
    1310              
    1311             1;
    1312              
    1313             __END__