File Coverage

blib/lib/Text/MultiMarkdown.pm
Criterion Covered Total %
statement 453 474 95.5
branch 136 162 83.9
condition 25 44 56.8
subroutine 41 41 100.0
pod 2 2 100.0
total 657 723 90.8


line stmt bran cond sub pod time code
1             package Text::MultiMarkdown;
2             require 5.008_000;
3 22     22   67792 use strict;
  22         53  
  22         655  
4 22     22   117 use warnings;
  22         45  
  22         648  
5 22     22   139 use re 'eval';
  22         45  
  22         1406  
6              
7 22     22   145 use Digest::MD5 qw(md5_hex);
  22         48  
  22         1400  
8 22     22   12124 use Encode qw();
  22         211647  
  22         608  
9 22     22   159 use Carp qw(croak);
  22         45  
  22         1238  
10 22     22   185 use base qw(Text::Markdown);
  22         44  
  22         14146  
11 22     22   604702 use HTML::Entities qw(encode_entities);
  22         131704  
  22         125792  
12              
13             our $VERSION = '1.001';
14             $VERSION = eval $VERSION;
15             our @EXPORT_OK = qw(markdown);
16              
17             =head1 NAME
18              
19             Text::MultiMarkdown - Convert MultiMarkdown syntax to (X)HTML
20              
21             =head1 SYNOPSIS
22              
23             use Text::MultiMarkdown 'markdown';
24             my $html = markdown($text);
25              
26             use Text::MultiMarkdown 'markdown';
27             my $html = markdown( $text, {
28             empty_element_suffix => '>',
29             tab_width => 2,
30             use_wikilinks => 1,
31             } );
32              
33             use Text::MultiMarkdown;
34             my $m = Text::MultiMarkdown->new;
35             my $html = $m->markdown($text);
36              
37             use Text::MultiMarkdown;
38             my $m = Text::MultiMarkdown->new(
39             empty_element_suffix => '>',
40             tab_width => 2,
41             use_wikilinks => 1,
42             );
43             my $html = $m->markdown( $text );
44              
45             =head1 DESCRIPTION
46              
47             Markdown is a text-to-HTML filter; it translates an easy-to-read /
48             easy-to-write structured text format into HTML. Markdown's text format
49             is most similar to that of plain text email, and supports features such
50             as headers, *emphasis*, code blocks, blockquotes, and links.
51              
52             Markdown's syntax is designed not as a generic markup language, but
53             specifically to serve as a front-end to (X)HTML. You can use span-level
54             HTML tags anywhere in a Markdown document, and you can use block level
55             HTML tags (C<<
>>, C<< >> etc.). Note that by default
56             Markdown isn't interpreted in HTML block-level elements, unless you add
57             a C attribute to the element. See L for
58             details.
59              
60             This module implements the MultiMarkdown markdown syntax extensions from:
61              
62             http://fletcherpenney.net/multimarkdown/
63              
64             =head1 SYNTAX
65              
66             For more information about (original) Markdown's syntax, see:
67              
68             http://daringfireball.net/projects/markdown/
69              
70             This module implements MultiMarkdown, which is an extension to Markdown..
71              
72             The extension is documented at:
73              
74             http://fletcherpenney.net/multimarkdown/
75              
76             and borrows from php-markdown, which lives at:
77              
78             http://michelf.com/projects/php-markdown/extra/
79              
80             This documentation is going to be moved/copied into this module for clearer reading in a future release..
81              
82             =head1 OPTIONS
83              
84             MultiMarkdown supports a number of options to it's processor which control the behaviour of the output document.
85              
86             These options can be supplied to the constructor, on in a hash with the individual calls to the markdown method.
87             See the synopsis for examples of both of the above styles.
88              
89             The options for the processor are:
90              
91             =over
92              
93             =item use_metadata
94              
95             Controls the metadata options below.
96              
97             =item strip_metadata
98              
99             If true, any metadata in the input document is removed from the output document (note - does not take effect in complete document format).
100              
101             =item empty element suffix
102              
103             This option can be used to generate normal HTML output. By default, it is ' />', which is xHTML, change to '>' for normal HTML.
104              
105             =item img_ids
106              
107             Controls if tags generated have an id attribute. Defaults to true.
108             Turn off for compatibility with the original markdown.
109              
110             =item heading_ids
111              
112             Controls if tags generated have an id attribute. Defaults to true.
113             Turn off for compatibility with the original markdown.
114              
115             =item bibliography_title
116              
117             The title of the generated bibliography, defaults to 'Bibliography'.
118              
119             =item tab_width
120              
121             Controls indent width in the generated markup, defaults to 4
122              
123             =item disable_tables
124              
125             If true, this disables the MultiMarkdown table handling.
126              
127             =item disable_footnotes
128              
129             If true, this disables the MultiMarkdown footnotes handling.
130              
131             =item disable_bibliography
132              
133             If true, this disables the MultiMarkdown bibliography/citation handling.
134              
135             =item disable_definition_lists
136              
137             If true, this disables the MultiMarkdown definition list handling.
138              
139             =back
140              
141             A number of possible items of metadata can also be supplied as options.
142             Note that if the use_metadata is true then the metadata in the document will overwrite the settings on command line.
143              
144             Metadata options supported are:
145              
146             =over
147              
148             =item document_format
149              
150             =item use_wikilinks
151              
152             =item base_url
153              
154             =item self_url - The document url is prepended to the "#" anchor of footnotes.
155              
156             =back
157              
158             =head1 METADATA
159              
160             MultiMarkdown supports the concept of 'metadata', which allows you to specify a number of formatting options
161             within the document itself. Metadata should be placed in the top few lines of a file, on value per line as colon separated key/value pairs.
162             The metadata should be separated from the document with a blank line.
163              
164             Most metadata keys are also supported as options to the constructor, or options
165             to the markdown method itself. (Note, as metadata, keys contain space, whereas options the keys are underscore separated.)
166              
167             You can attach arbitrary metadata to a document, which is output in HTML tags if unknown, see t/11document_format.t for more info.
168              
169             A list of 'known' metadata keys, and their effects are listed below:
170              
171             =over
172              
173             =item document format
174              
175             If set to 'complete', MultiMarkdown will render an entire xHTML page, otherwise it will render a document fragment
176              
177             =over
178              
179             =item css
180              
181             Sets a CSS file for the file, if in 'complete' document format.
182              
183             =item title
184              
185             Sets the page title, if in 'complete' document format.
186              
187             =back
188              
189             =item use wikilinks
190              
191             If set to '1' or 'on', causes links that are WikiWords to automatically be processed into links.
192              
193             =item base url
194              
195             This is the base URL for referencing wiki pages. In this is not supplied, all wiki links are relative.
196              
197             =back
198              
199             =head1 METHODS
200              
201             =head2 new
202              
203             A simple constructor, see the SYNTAX and OPTIONS sections for more information.
204              
205             =cut
206              
207             sub new {
208 27     27 1 14977 my ($class, %p) = @_;
209              
210             # Default metadata to 1
211 27 100       138 $p{use_metadata} = 1 unless exists $p{use_metadata};
212             # Squash value to [01]
213 27 100       98 $p{use_metadata} = $p{use_metadata} ? 1 : 0;
214              
215 27   100     179 $p{base_url} ||= ''; # This is the base url to be used for WikiLinks
216              
217 27 50 33     435 $p{tab_width} = 4 unless (defined $p{tab_width} and $p{tab_width} =~ m/^\d+$/);
218              
219 27   50     150 $p{document_format} ||= '';
220              
221 27   50     131 $p{empty_element_suffix} ||= ' />'; # Change to ">" for HTML output
222              
223             #$p{heading_ids} = defined $p{heading_ids} ? $p{heading_ids} : 1;
224              
225             # For use with WikiWords and [[Wiki Links]]
226             # NOTE: You can use \WikiWord to prevent a WikiWord from being treated as a link
227 27 100       69 $p{use_wikilinks} = $p{use_wikilinks} ? 1 : 0;
228              
229 27 100       84 $p{heading_ids} = defined $p{heading_ids} ? $p{heading_ids} : 1;
230 27 50       90 $p{img_ids} = defined $p{img_ids} ? $p{img_ids} : 1;
231              
232 27   50     162 $p{bibliography_title} ||= 'Bibliography'; # FIXME - Test and document, can also be in metadata!
233              
234 27   100     130 $p{self_url} ||= ''; # Used in footnotes to prepend anchors
235              
236 27         68 my $self = { params => \%p };
237 27   33     123 bless $self, ref($class) || $class;
238 27         157 return $self;
239             }
240              
241             =head2 markdown
242              
243             The main function as far as the outside world is concerned. See the SYNOPSIS
244             for details on use.
245              
246             =cut
247              
248             sub markdown {
249 89     89 1 41894 my ( $self, $text, $options ) = @_;
250              
251             # Detect functional mode, and create an instance for this run..
252 89 100       295 unless (ref $self) {
253 2 100       7 if ( $self ne __PACKAGE__ ) {
254 1         4 my $ob = __PACKAGE__->new();
255             # $self is text, $text is options
256 1         4 return $ob->markdown($self, $text);
257             }
258             else {
259 1         21 croak('Calling ' . $self . '->markdown (as a class method) is not supported.');
260             }
261             }
262              
263 87   100     435 $options ||= {};
264              
265 87         140 %$self = (%{ $self->{params} }, %$options, params => $self->{params});
  87         1108  
266              
267 87         311 $self->_CleanUpRunData($options);
268              
269 87         918 return $self->_Markdown($text);
270             }
271              
272             sub _CleanUpRunData {
273 87     87   163 my ($self, $options) = @_;
274             # Clear the global hashes. If we don't clear these, you get conflicts
275             # from other articles when generating a page which contains more than
276             # one article (e.g. an index page that shows the N most recent
277             # articles):
278 87         180 $self->{_crossrefs} = {};
279 87         159 $self->{_footnotes} = {};
280 87         159 $self->{_references} = {};
281 87         176 $self->{_used_footnotes} = []; # Why do we need 2 data structures for footnotes? FIXME
282 87         171 $self->{_used_references} = []; # Ditto for references
283 87         147 $self->{_citation_counter} = 0;
284 87         150 $self->{_metadata} = {};
285 87         150 $self->{_attributes} = {}; # Used for extra attributes on links / images.
286              
287 87         388 $self->SUPER::_CleanUpRunData($options);
288             }
289              
290             sub _Markdown {
291             #
292             # Main function. The order in which other subs are called here is
293             # essential. Link and image substitutions need to happen before
294             # _EscapeSpecialChars(), so that any *'s or _'s in the
295             # and tags get encoded.
296             #
297             # Can't think of any good way to make this inherit from the Markdown version as ordering is so important, so I've left it.
298 87     87   180 my ($self, $text) = @_;
299              
300 87         283 $text = $self->_CleanUpDoc($text);
301              
302             # MMD only. Strip out MetaData
303 87 100 100     54936 $text = $self->_ParseMetaData($text) if ($self->{use_metadata} || $self->{strip_metadata});
304              
305             # Turn block-level HTML blocks into hash entries
306 87         458 $text = $self->_HashHTMLBlocks($text, {interpret_markdown_on_attribute => 1});
307              
308 87         78535 $text = $self->_StripLinkDefinitions($text);
309              
310             # MMD only
311 87         221 $text = $self->_StripMarkdownReferences($text);
312              
313 87         438 $text = $self->_RunBlockGamut($text, {wrap_in_p_tags => 1});
314              
315             # MMD Only
316 87 100       20058 $text = $self->_DoMarkdownCitations($text) unless $self->{disable_bibliography};
317 87 100       350 $text = $self->_DoFootnotes($text) unless $self->{disable_footnotes};
318              
319 87         355 $text = $self->_UnescapeSpecialChars($text);
320              
321             # MMD Only
322             # This must follow _UnescapeSpecialChars
323 87         13394 $text = $self->_UnescapeWikiWords($text);
324 87 100       324 $text = $self->_FixFootnoteParagraphs($text) unless $self->{disable_footnotes}; # TODO: remove. Doesn't make any difference to test suite pass/failure
325 87 100       422 $text .= $self->_PrintFootnotes() unless $self->{disable_footnotes};
326 87 100       300 $text .= $self->_PrintMarkdownBibliography() unless $self->{disable_bibliography};
327              
328 87         415 $text = $self->_ConvertCopyright($text);
329              
330             # MMD Only
331 87 100       29331 if (lc($self->{document_format}) =~ /^complete\s*$/) {
332 4         21 return $self->_xhtmlMetaData() . "\n" . $text . "\n\n";
333             }
334             else {
335 83         263 return $self->_textMetaData() . $text . "\n";
336             }
337              
338             }
339              
340             #
341             # Routines which are overridden for slightly different behaviour in MultiMarkdown
342             #
343              
344             # Delegate to super class, then do wiki links
345             sub _RunSpanGamut {
346 763     763   545467 my ($self, $text) = @_;
347              
348 763         1833 $text = $self->SUPER::_RunSpanGamut($text);
349              
350             # Process WikiWords
351 763 100       222567 if ($self->_UseWikiLinks()) {
352 13         42 $text = $self->_DoWikiLinks($text);
353              
354             # And then reprocess anchors and images
355             # FIXME - This is needed exactly why?
356 13         94 $text = $self->_DoImages($text);
357 13         668 $text = $self->_DoAnchors($text);
358             }
359              
360 763         2376 return $text;
361             }
362              
363             # Don't do Wiki Links in Headers, otherwise delegate to super class
364             # Do tables stright after headers
365             sub _DoHeaders {
366 146     146   5515 my ($self, $text) = @_;
367              
368 146         367 local $self->{use_wikilinks} = 0;
369              
370 146         427 $text = $self->SUPER::_DoHeaders($text);
371              
372             # Do tables to populate the table id's for cross-refs
373             # (but after headers as the tables can contain cross-refs to other things, so we want the header cross-refs)
374 146         3224 $text = $self->_DoTables($text);
375             }
376              
377             sub _DoLists {
378 249     249   16667 my ($self, $text) = @_;
379             $text = $self->_DoDefinitionLists($text)
380 249 50       799 unless $self->{disable_definition_lists};
381 249         959 $self->SUPER::_DoLists($text);
382             }
383              
384             sub _DoDefinitionLists {
385 249     249   464 my ($self, $text) = @_;
386             # Uses the syntax proposed by Michel Fortin in PHP Markdown Extra
387              
388 249         460 my $less_than_tab = $self->{tab_width} -1;
389              
390 249         1018 my $line_start = qr{
391             [ ]{0,$less_than_tab}
392             }mx;
393              
394 249         1592 my $term = qr{
395             $line_start
396             [^:\s][^\n]*\n
397             }sx;
398              
399 249         1524 my $definition = qr{
400             \n?[ ]{0,$less_than_tab}
401             \:[ \t]+(.*?)\n
402             ((?=\n?\:)|\n|\Z) # Lookahead for next definition, two returns,
403             # or the end of the document
404             }sx;
405              
406 249         2256 my $definition_block = qr{
407             ((?:$term)+) # $1 = one or more terms
408             ((?:$definition)+) # $2 = by one or more definitions
409             }sx;
410              
411 249         2186 my $definition_list = qr{
412             (?:$definition_block\n*)+ # One ore more definition blocks
413             }sx;
414              
415 249         113051 $text =~ s{
416             ($definition_list) # $1 = the whole list
417             }{
418 1         6 my $list = $1;
419 1         3 my $result = $1;
420            
421 1         56 $list =~ s{
422             (?:$definition_block)\n*
423             }{
424 2         8 my $terms = $1;
425 2         5 my $defs = $2;
426              
427 2         23 $terms =~ s{
428             [ ]{0,$less_than_tab}
429             (.*)
430             \s*
431             }{
432 4         10 my $term = $1;
433 4         5 my $result = "";
434 4         19 $term =~ s/^\s*(.*?)\s*$/$1/;
435 4 100       16 if ($term !~ /^\s*$/){
436 2         6 $result = "
" . $self->_RunSpanGamut($1) . "
\n";
437             }
438 4         19 $result;
439             }xmge;
440              
441 2         41 $defs =~ s{
442             $definition
443             }{
444 3         30 my $def = $1 . "\n";
445 3         40 $def =~ s/^[ ]{0,$self->{tab_width}}//gm;
446 3         17 "
\n" . $self->_RunBlockGamut($def) . "\n
\n";
447             }xsge;
448              
449 2         43 $terms . $defs . "\n";
450             }xsge;
451              
452 1         6 "
\n" . $list . "
\n\n";
453             }xsge;
454              
455 249         1084 return $text
456             }
457              
458             # Generating headers automatically generates X-refs in MultiMarkdown (always)
459             # Also, by default, you get id attributes added to your headers, you can turn this
460             # part of the MultiMarkdown behaviour off with the heading_ids flag.
461             sub _GenerateHeader {
462 26     26   450 my ($self, $level, $id) = @_;
463              
464 26 100       107 my $label = $self->{heading_ids} ? $self->_Header2Label($id) : '';
465 26         73 my $header = $self->_RunSpanGamut($id);
466              
467 26 100       75 if ($label ne '') {
468 24         92 $self->{_crossrefs}{$label} = "#$label";
469 24         53 $self->{_titles}{$label} = $header;
470 24         55 $label = qq{ id="$label"};
471             }
472              
473 26         383 return "$header\n\n";
474             }
475              
476             # Protect Wiki Links in Code Blocks (if wiki links are turned on), then delegate to super class.
477             sub _EncodeCode {
478 368     368   23752 my ($self, $text) = @_;
479              
480 368 100       667 if ($self->_UseWikiLinks()) {
481 3         32 $text =~ s/([A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*)/\\$1/gx;
482             }
483              
484 368         855 return $self->SUPER::_EncodeCode($text);
485             }
486              
487             # Full function pulled out of Text::Markdown as MultiMarkdown supports supplying extra 'attributes' with links and
488             # images which are then pushed back into the generated HTML, and this needs a different regex. It should be possible
489             # to extract the just the regex from Text::Markdown, and use that here, but I haven't done so yet.
490             # Strip footnote definitions at the same time as stripping link definitions.
491             # Also extract images and then replace them straight back in (code smell!) to be able to cross reference images
492             sub _StripLinkDefinitions {
493             #
494             # Strips link definitions from text, stores the URLs and titles in
495             # hash references.
496             #
497 87     87   223 my ($self, $text) = @_;
498              
499 87 100       349 $text = $self->_StripFootnoteDefinitions($text) unless $self->{disable_footnotes};
500              
501 87         178 my $less_than_tab = $self->{tab_width} - 1;
502              
503             # Link defs are in the form: ^[id]: url "optional title"
504             # FIXME - document attributes here.
505 87         2812 while ($text =~ s{
506             # Pattern altered for MultiMarkdown
507             # in order to not match citations or footnotes
508             ^[ ]{0,$less_than_tab}\[([^#^].*)\]: # id = $1
509             [ \t]*
510             \n? # maybe *one* newline
511             [ \t]*
512             ? # url = $2
513             [ \t]*
514             \n? # maybe one newline
515             [ \t]*
516             (?:
517             (?<=\s) # lookbehind for whitespace
518             ["(]
519             (.+?) # title = $3
520             [")]
521             [ \t]*
522             )? # title is optional
523              
524             # MultiMarkdown addition for attribute support
525             \n?
526             ( # Attributes = $4
527             (?<=\s) # lookbehind for whitespace
528             (([ \t]*\n)?[ \t]*((\S+=\S+)|(\S+=".*?")))*
529             )?
530             [ \t]*
531             # /addition
532             (?:\n+|\Z)
533             }
534 45         175 {}mx) {
535 45 100       853 $self->{_urls}{lc $1} = $self->_EncodeAmpsAndAngles( $2 ); # Link IDs are case-insensitive
536 12         36 if ($3) {
537 12         32 $self->{_titles}{lc $1} = $3;
538             $self->{_titles}{lc $1} =~ s/"/"/g;
539             }
540              
541 45 100       1912 # MultiMarkdown addition "
542 4         105 if ($4) {
543             $self->{_attributes}{lc $1} = $4;
544             }
545             # /addition
546             }
547 87         286  
548             $text = $self->_GenerateImageCrossRefs($text);
549 87         176  
550             return $text;
551             }
552              
553             # Add the extra cross-references to headers that MultiMarkdown supports, and also
554             # the additional link attributes.
555             sub _GenerateAnchor {
556 160     160   36556 # FIXME - Fugly, change to named params?
557             my ($self, $whole_match, $link_text, $link_id, $url, $title, $attributes) = @_;
558              
559 160 100       405 # Allow automatic cross-references to headers
560 96         275 if (defined $link_id) {
561 96 100       285 my $label = $self->_Header2Label($link_id);
562 8   33     31 if (defined $self->{_crossrefs}{$label}) {
563             $url ||= $self->{_crossrefs}{$label};
564 96 100       242 }
565 11   33     43 if ( defined $self->{_titles}{$label} ) {
566             $title ||= $self->{_titles}{$label};
567 96   66     321 }
568             $attributes ||= $self->_DoAttributes($label);
569 160         553 }
570             return $self->SUPER::_GenerateAnchor($whole_match, $link_text, $link_id, $url, $title, $attributes);
571             }
572              
573             # Add the extra cross-references to images that MultiMarkdown supports, and also
574             # the additional attributes.
575             sub _GenerateImage {
576 6     6   829 # FIXME - Fugly, change to named params?
577             my ($self, $whole_match, $alt_text, $link_id, $url, $title, $attributes) = @_;
578 6 50 33     31  
579 6         17 if (defined $alt_text && length $alt_text) {
580 6         30 my $label = $self->_Header2Label($alt_text);
581 6 50       25 $self->{_crossrefs}{$label} = "#$label";
582             $attributes .= $self->{img_ids} ? qq{ id="$label"} : '';
583             }
584 6 100       24  
585             $attributes .= $self->_DoAttributes($link_id) if defined $link_id;
586 6         26  
587             $self->SUPER::_GenerateImage($whole_match, $alt_text, $link_id, $url, $title, $attributes);
588             }
589              
590              
591             #
592             # MultiMarkdown specific routines
593             #
594              
595             # FIXME - This is really really ugly!
596 82     82   169 sub _ParseMetaData {
597 82         134 my ($self, $text) = @_;
598             my $clean_text = "";
599 82         171  
600             my ($inMetaData, $currentKey) = (1, '');
601 82         969  
602 3073 50 100     8224 foreach my $line ( split /\n/, $text ) {
      33        
603 3073 100       4761 $line =~ /^\s*$/ and $inMetaData = 0 and $clean_text .= $line and next;
604 99 100       217 if ($inMetaData) {
605 97 100       292 next unless $self->{use_metadata}; # We can come in here as use_metadata => 0, strip_metadata => 1
606 33         75 if ($line =~ /^([a-zA-Z0-9][0-9a-zA-Z _-]+?):\s*(.*)$/ ) {
607 33         55 $currentKey = $1;
608 33 50       131 $currentKey =~ s/ / /g;
609 33 100       86 $self->{_metadata}{$currentKey} = defined $2 ? $2 : '';
610 2         7 if (lc($currentKey) eq "format") {
611             $self->{document_format} = $self->{_metadata}{$currentKey};
612 33 100       71 }
613 5         18 if (lc($currentKey) eq "base url") {
614             $self->{base_url} = $self->{_metadata}{$currentKey};
615 33 50       76 }
616 0         0 if (lc($currentKey) eq "bibliography title") {
617 0         0 $self->{bibliography_title} = $self->{_metadata}{$currentKey};
618             $self->{bibliography_title} =~ s/\s*$//;
619             }
620             }
621 64 100       166 else {
622             if ($currentKey eq "") {
623 63         169 # No metadata present
624 63         104 $clean_text .= "$line\n";
625 63         143 $inMetaData = 0;
626             next;
627 1 50       5 }
628 1         5 if ($line =~ /^\s*(.+)$/ ) {
629             $self->{_metadata}{$currentKey} .= "\n$1";
630             }
631             }
632             }
633 2974         4779 else {
634             $clean_text .= "$line\n";
635             }
636             }
637              
638 82         379 # Recheck for leading blank lines
639             $clean_text =~ s/^\n+//s;
640 82         369  
641             return $clean_text;
642             }
643              
644             # FIXME - This is really ugly, why do we match stuff and substitute it with the thing we just matched?
645 87     87   178 sub _GenerateImageCrossRefs {
646             my ($self, $text) = @_;
647              
648             #
649             # First, handle reference-style labeled images: ![alt text][id]
650 87         282 #
651             $text =~ s{
652             ( # wrap whole match in $1
653             !\[
654             (.*?) # alt text = $2
655             \]
656              
657             [ ]? # one optional space
658             (?:\n[ ]*)? # one optional newline followed by spaces
659              
660             \[
661             (.*?) # id = $3
662             \]
663              
664             )
665 7         27 }{
666 7         21 my $whole_match = $1;
667 7         15 my $alt_text = $2;
668             my $link_id = lc $3;
669 7 100       25  
670 2         4 if ($link_id eq "") {
671             $link_id = lc $alt_text; # for shortcut links like ![this][].
672             }
673 7         27  
674             $alt_text =~ s/"/"/g;
675 7 100       27  
676 4         11 if (defined $self->{_urls}{$link_id}) {
677 4         13 my $label = $self->_Header2Label($alt_text);
678             $self->{_crossrefs}{$label} = "#$label";
679             }
680 7         118  
681             $whole_match;
682             }xsge;
683              
684             #
685             # Next, handle inline images: ![alt text](url "optional title")
686             # Don't forget: encode * and _
687 87         322  
688             $text =~ s{
689             ( # wrap whole match in $1
690             !\[
691             (.*?) # alt text = $2
692             \]
693             \( # literal paren
694             [ \t]*
695             ? # src url = $3
696             [ \t]*
697             ( # $4
698             (['"]) # quote char = $5
699             (.*?) # title = $6
700             \5 # matching quote
701             [ \t]*
702             )? # title is optional
703             \)
704             )
705 7         21 }{
706 7         22 my $result;
707 7         16 my $whole_match = $1;
708             my $alt_text = $2;
709 7         15  
710 7         27 $alt_text =~ s/"/"/g;
711 7         29 my $label = $self->_Header2Label($alt_text);
712 7         86 $self->{_crossrefs}{$label} = "#$label";
713             $whole_match;
714             }xsge;
715 87         255  
716             return $text;
717             }
718              
719 84     84   172 sub _StripFootnoteDefinitions {
720 84         179 my ($self, $text) = @_;
721             my $less_than_tab = $self->{tab_width} - 1;
722 84         1045  
723             while ($text =~ s{
724             \n\[\^([^\n]+?)\]\:[ \t]*# id = $1
725             \n?
726             (.*?)\n{1,2} # end at new paragraph
727             ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
728             }
729 7         25 {\n}sx)
730 7         22 {
731 7         62 my $id = $1;
732             my $footnote = "$2\n";
733 7         28 $footnote =~ s/^[ ]{0,$self->{tab_width}}//gm;
734              
735             $self->{_footnotes}{$self->_Id2Footnote($id)} = $footnote;
736 84         221 }
737              
738             return $text;
739             }
740 84     84   165  
741             sub _DoFootnotes {
742 84 50       204 my ($self, $text) = @_;
743              
744             return '' unless length $text;
745 84         168  
  84         373  
746 7         30 # First, run routines that get skipped in footnotes
747 7         166 foreach my $label (sort keys %{ $self->{_footnotes} }) {
748 7         927 my $footnote = $self->_RunBlockGamut($self->{_footnotes}{$label}, {wrap_in_p_tags => 1});
749 7         20 $footnote = $self->_UnescapeSpecialChars($footnote);
750             $footnote = $self->_DoMarkdownCitations($footnote);
751             $self->{_footnotes}{$label} = $footnote;
752 84         159 }
753              
754 84         187 my $footnote_counter = 0;
755              
756             $text =~ s{
757 7         20 \[\^(.*?)\] # id = $1
758 7         20 }{
759             my $result = '';
760 7 50       29 my $id = $self->_Id2Footnote($1);
761 7         13  
762 7 50       26 if (defined $self->{_footnotes}{$id} ) {
763 0         0 $footnote_counter++;
764             if ($self->{_footnotes}{$id} =~ /^glossary:/i) {
765             $result = qq{$footnote_counter};
766 7         29 }
767             else {
768 7         10 $result = qq{$footnote_counter};
  7         20  
769             }
770 7         28 push (@{ $self->{_used_footnotes} }, $id);
771             }
772             $result;
773 84         178 }xsge;
774              
775             return $text;
776             }
777              
778 84     84   256 # TODO: remove. Doesn't make any difference to test suite pass/failure
779             sub _FixFootnoteParagraphs {
780 84         279 my ($self, $text) = @_;
781              
782 84         157 $text =~ s(^

)()gm;

783              
784             return $text;
785             }
786 84     84   233  
787 84         136 sub _PrintFootnotes {
788 84         125 my ($self) = @_;
789             my $footnote_counter = 0;
790 84         117 my $result;
  84         232  
791 7         15  
792 7         14 foreach my $id (@{ $self->{_used_footnotes} }) {
793             $footnote_counter++;
794 7         42 my $footnote = $self->{_footnotes}{$id};
795 7         19  
796 7 50       22 $footnote =~ s/(<\/(p(re)?|ol|ul)>)$//;
797             my $footnote_closing_tag = $1;
798 7 50       23 $footnote_closing_tag = '' if !defined $footnote_closing_tag;
799              
800             if ($footnote =~ s/^glossary:\s*//i) {
801 0         0 # Add some formatting for glossary entries
802              
803             $footnote =~ s{
804             ^(.*?) # $1 = term
805             \s*
806             (?:\(([^\(\)]*)\)[^\n]*)? # $2 = optional sort key
807 0         0 \n
808             }{
809 0 0       0 my $glossary = qq{$1};
810 0         0  
811             if ($2) {
812             $glossary.= qq{};
813 0         0 };
814              
815             $glossary . q{:

};

816 0         0 }egsx;
817              
818             $result .= qq{
  • $footnote ↩$footnote_closing_tag
  • \n\n};
    819 7         49 }
    820             else {
    821             $result .= qq{
  • $footnote ↩$footnote_closing_tag
  • \n\n};
    822             }
    823 84 100       263 }
    824 4         18  
    825             if ($footnote_counter > 0) {
    826             $result = qq[\n\n
    \n{empty_element_suffix}\n
      \n\n] . $result . "
    \n
    ";
    827 80         137 }
    828             else {
    829             $result = "";
    830 84         249 }
    831              
    832             return $result;
    833             }
    834 145     145   278  
    835 145         261 sub _Header2Label {
    836 145         465 my ($self, $header) = @_;
    837 145         483 my $label = lc $header;
    838             $label =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters
    839 145         372 while ($label =~ s/^[^A-Za-z]//g)
    840             {}; # Strip illegal leading characters
    841             return $label;
    842             }
    843              
    844 14     14   38 sub _Id2Footnote {
    845 14         31 # Since we prepend "fn:", we can allow leading digits in footnotes
    846 14         29 my ($self, $id) = @_;
    847 14         73 my $footnote = lc $id;
    848             $footnote =~ s/[^A-Za-z0-9:_.-]//g; # Strip illegal characters
    849             return $footnote;
    850             }
    851 4     4   8  
    852             sub _xhtmlMetaData {
    853 4         9 my ($self) = @_;
    854             # FIXME: Should not assume encoding
    855             my $result; # FIXME: This breaks some things in IE 6- = qq{\n};
    856              
    857 4         8 # This screws up xsltproc - make sure to use `-nonet -novalid` if you
    858             # have difficulty
    859 4         9 $result .= qq{\n};
    860              
    861 4         9 $result.= "\n\t\n";
      4         25  
    862 15 100       338  
        100          
        100          
    863 3         11 foreach my $key (sort keys %{$self->{_metadata}} ) {
    864             if (lc($key) eq "title") {
    865             $result.= "\t\t" . encode_entities($self->{_metadata}{$key}) . "\n";
    866 3         14 }
    867             elsif (lc($key) eq "css") {
    868             $result.= qq[\t\t{empty_element_suffix}\n];
    869 1         5 }
    870             elsif( lc($key) eq "xhtml header") {
    871             $result .= qq[\t\t$self->{_metadata}{$key}\n]
    872             }
    873 8         29 else {
    874             $result.= qq[\t\t
    875             . qq[content="] . encode_entities($self->{_metadata}{$key}) . qq["$self->{empty_element_suffix}\n];
    876 4         36 }
    877             }
    878 4         30 $result.= "\t\n";
    879              
    880             return $result;
    881             }
    882 83     83   150  
    883 83         185 sub _textMetaData {
    884             my ($self) = @_;
    885 83 100       241 my $result = "";
    886              
    887 81         122 return $result if $self->{strip_metadata};
      81         306  
    888 16         61  
    889             foreach my $key (sort keys %{$self->{_metadata}} ) {
    890 81         233 $result .= "$key: $self->{_metadata}{$key}\n";
    891             }
    892 81 100       253 $result =~ s/\s*\n/{empty_element_suffix}\n/g;
    893 9         18  
    894             if ($result ne "") {
    895             $result.= "\n";
    896 81         729 }
    897              
    898             return $result;
    899             }
    900 1144     1144   1943  
    901 1144 100       2730 sub _UseWikiLinks {
    902 1132         1650 my ($self) = @_;
      55         152  
      1132         2727  
    903 1132 100       3204 return 1 if $self->{use_wikilinks};
    904 17 50       70 my ($k) = grep { /use wikilinks/i } keys %{$self->{_metadata}};
    905 0         0 return unless $k;
    906             return 1 if $self->{_metadata}{$k};
    907             return;
    908             }
    909 12     12   42  
    910             sub _CreateWikiLink {
    911 12         22 my ($self, $title) = @_;
    912 12         80  
    913 12         21 my $id = $title;
    914 12         21 $id =~ s/ /_/g;
    915 12         23 $id =~ s/__+/_/g;
    916             $id =~ s/^_//g;
    917 12         21 $id =~ s/_$//;
    918              
    919 12         72 $title =~ s/_/ /g;
    920              
    921             return "[$title](" . $self->{base_url} . "$id)";
    922             }
    923              
    924 13     13   26 sub _DoWikiLinks {
    925 13         24  
    926 13         21 my ($self, $text) = @_;
    927             my $WikiWord = '[A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*';
    928 13 50       26 my $FreeLinkPattern = "([-,.()' _0-9A-Za-z\x80-\xff]+)";
    929              
    930 13         142 if ($self->_UseWikiLinks()) {
    931             # FreeLinks
    932             $text =~ s{
    933 1         4 \[\[($FreeLinkPattern)\]\]
    934 1         27 }{
    935             my $label = $1;
    936             $label =~ s{
    937 0         0 ([\s\>])($WikiWord)
    938             }{
    939             $1 ."\\" . $2
    940 1         7 }xsge;
    941              
    942             $self->_CreateWikiLink($label)
    943             }xsge;
    944 13         215  
    945             # WikiWords
    946             $text =~ s{
    947 8         31 ([\s])($WikiWord)
    948             }{
    949             $1 . $self->_CreateWikiLink($2)
    950             }xsge;
    951 13         182  
    952             # Catch WikiWords at beginning of text
    953 3         12 $text =~ s{^($WikiWord)
    954             }{
    955             $self->_CreateWikiLink($1)
    956             }xse;
    957             }
    958 13         42  
    959              
    960             return $text;
    961             }
    962 87     87   180  
    963 87         147 sub _UnescapeWikiWords {
    964             my ($self, $text) = @_;
    965             my $WikiWord = '[A-Z]+[a-z\x80-\xff]+[A-Z][A-Za-z\x80-\xff]*';
    966 87         773  
    967             # Unescape escaped WikiWords
    968 87         262 $text =~ s/(?<=\B)\\($WikiWord)/$1/g;
    969              
    970             return $text;
    971             }
    972 146     146   258  
    973             sub _DoTables {
    974 146 100       370 my ($self, $text) = @_;
    975              
    976 142         238 return $text if $self->{disable_tables};
    977              
    978             my $less_than_tab = $self->{tab_width} - 1;
    979              
    980             # Algorithm inspired by PHP Markdown Extra's
    981             #
    982              
    983 142         726 # Reusable regexp's to match table
    984              
    985             my $line_start = qr{
    986             [ ]{0,$less_than_tab}
    987 142         364 }mx;
    988              
    989             my $table_row = qr{
    990             [^\n]*?\|[^\n]*?\n
    991 142         831 }mx;
    992              
    993             my $first_row = qr{
    994             $line_start
    995             \S+.*?\|.*?\n
    996 142         800 }mx;
    997              
    998             my $table_rows = qr{
    999             (\n?$table_row)
    1000 142         771 }mx;
    1001              
    1002             my $table_caption = qr{
    1003             $line_start
    1004             \[.*?\][ \t]*\n
    1005 142         898 }mx;
    1006              
    1007             my $table_divider = qr{
    1008             $line_start
    1009             [\|\-\:\.][ \-\|\:\.]* \| [ \-\|\:\.]*
    1010 142         2216 }mx;
    1011              
    1012             my $whole_table = qr{
    1013             ($table_caption)? # Optional caption
    1014             ($first_row # First line must start at beginning
    1015             ($table_row)*?)? # Header Rows
    1016             $table_divider # Divider/Alignment definitions
    1017             $table_rows+ # Body Rows
    1018             ($table_caption)? # Optional caption
    1019             }mx;
    1020              
    1021              
    1022 142         11973 # Find whole tables, then break them up and process them
    1023              
    1024             $text =~ s{
    1025             ^($whole_table) # Whole table in $1
    1026 3         13 (\n|\Z) # End of file or 2 blank lines
    1027 3         5 }{
    1028 3         5 my $table = $1;
    1029 3         5 my $result = "\n"; \n"; \n"; {empty_element_suffix}\n]; {empty_element_suffix}\n]; {empty_element_suffix}\n]; {empty_element_suffix}\n]; \n"; \n"; \n"; \n\n"; \n\n\n"; \n"; \n"; \n
    1030             my @alignments;
    1031             my $use_row_header = 0;
    1032              
    1033 3 100       55 # Add Caption, if present
    1034 2 50       8  
    1035             if ($table =~ s/^$line_start\[\s*(.*?)\s*\](\[\s*(.*?)\s*\])?[ \t]*$//m) {
    1036 2         10 if (defined $3) {
    1037 2         7 # add caption id to cross-ref list
    1038             my $table_id = $self->_Header2Label($3);
    1039 2         7 $result .= qq{
    } . $self->_RunSpanGamut($1). "
    1040 2         8  
    1041             $self->{_crossrefs}{$table_id} = "#$table_id";
    1042             $self->{_titles}{$table_id} = "$1";
    1043 0         0 }
    1044             else {
    1045             $result .= "
    " . $self->_RunSpanGamut($1). "
    1046             }
    1047             }
    1048              
    1049             # If a second "caption" is present, treat it as a summary
    1050             # However, this is not valid in XHTML 1.0 Strict
    1051             # But maybe in future
    1052 3 50       31  
    1053             # A summary might be longer than one line
    1054             if ($table =~ s/\n$line_start\[\s*(.*?)\s*\][ \t]*\n/\n/s) {
    1055             # $result .= "" . $self->_RunSpanGamut($1) . "\n";
    1056             }
    1057              
    1058             # Now, divide table into header, alignment, and body
    1059              
    1060 3         10 # First, add leading \n in case there is no header
    1061              
    1062             $table = "\n" . $table;
    1063              
    1064 3         57 # Need to be greedy
    1065              
    1066 3         10 $table =~ s/\n($table_divider)\n(($table_rows)+)//s;
    1067 3         7  
    1068             my $alignment_string = $1;
    1069             my $body = $2;
    1070 3         19  
    1071 8         18 # Process column alignment
    1072 8 100       27 while ($alignment_string =~ /\|?\s*(.+?)\s*(\||\Z)/gs) {
    1073 4 100       11 my $cell = $self->_RunSpanGamut($1);
    1074 2         7 if ($cell =~ /\:$/) {
    1075 2         14 if ($cell =~ /^\:/) {
    1076             $result .= qq[
    1077             push(@alignments,"center");
    1078 2         7 }
    1079 2         18 else {
    1080             $result .= qq[
    1081             push(@alignments,"right");
    1082             }
    1083 4 50       10 }
    1084 0         0 else {
    1085 0         0 if ($cell =~ /^\:/) {
    1086             $result .= qq[
    1087             push(@alignments,"left");
    1088 4 50 33     20 }
    1089 0         0 else {
    1090 0         0 if (($cell =~ /^\./) || ($cell =~ /\.$/)) {
    1091             $result .= qq[
    1092             push(@alignments,"char");
    1093 4         14 }
    1094 4         24 else {
    1095             $result .= "{empty_element_suffix}\n";
    1096             push(@alignments,"");
    1097             }
    1098             }
    1099             }
    1100             }
    1101 3         13  
    1102             # Process headers
    1103 3         7 $table =~ s/^\n+//s;
    1104              
    1105             $result .= "
    1106 3         8  
    1107             # Strip blank lines
    1108 3         10 $table =~ s/\n[ \t]*\n/\n/g;
    1109              
    1110 3         6 foreach my $line (split(/\n/, $table)) {
    1111 3         5 # process each line (row) in table
    1112 3         20 $result .= "
    1113             my $count=0;
    1114 8         19 while ($line =~ /\|?\s*([^\|]+?)\s*(\|+|\Z)/gs) {
    1115 8         21 # process contents of each cell
    1116 8         14 my $cell = $self->_RunSpanGamut($1);
    1117 8 100       22 my $ending = $2;
    1118 1         12 my $colspan = "";
    1119             if ($ending =~ s/^\s*(\|{2,})\s*$/$1/) {
    1120 8         25 $colspan = " colspan=\"" . length($ending) . "\"";
    1121 8 100       16 }
    1122 3 100       14 $result .= "\t$cell\n";
    1123 1         2 if ( $count == 0) {
    1124             if ($cell =~ /^\s*$/) {
    1125             $use_row_header = 1;
    1126 2         5 }
    1127             else {
    1128             $use_row_header = 0;
    1129 8         43 }
    1130             }
    1131 3         6 $count++;
    1132             }
    1133             $result .= "
    1134             }
    1135              
    1136 3         8 # Process body
    1137              
    1138 3         12 $result .= "
    1139              
    1140 13 100       37 foreach my $line (split(/\n/, $body)) {
    1141 2         3 # process each line (row) in table
    1142 2         5 if ($line =~ /^\s*$/) {
    1143             $result .= "
    1144 11         21 next;
    1145 11         14 }
    1146 11         70 $result .= "
    1147             my $count=0;
    1148 22     22   255 while ($line =~ /\|?\s*([^\|]+?)\s*(\|+|\Z)/gs) {
      22         106  
      22         23108  
    1149 27         70 # process contents of each cell
    1150 27         69 no warnings 'uninitialized';
    1151 27         39 my $cell = $self->_RunSpanGamut($1);
    1152 27         41 my $ending = $2;
    1153 27 50 66     76 my $colspan = "";
    1154 0         0 my $cell_type = "td";
    1155             if ($count == 0 && $use_row_header == 1) {
    1156 27 100       72 $cell_type = "th";
    1157 6         17 }
    1158             if ($ending =~ s/^\s*(\|{2,})\s*$/$1/) {
    1159 27 100       106 $colspan = " colspan=\"" . length($ending) . "\"";
    1160 14         44 }
    1161             if ($alignments[$count] !~ /^\s*$/) {
    1162             $result .= "\t<$cell_type$colspan align=\"$alignments[$count]\">$cell\n";
    1163 13         38 }
    1164             else {
    1165 27         117 $result .= "\t<$cell_type$colspan>$cell\n";
    1166             }
    1167 11         22 $count++;
    1168             }
    1169             $result .= "
    1170 3         10 }
    1171 3         283  
    1172             $result .= "
    \n";
    1173             $result
    1174 142         1645 }egmx;
    1175              
    1176             my $table_body = qr{
    1177             ( # wrap whole match in $2
    1178              
    1179             (.*?\|.*?)\n # wrap headers in $3
    1180              
    1181             [ ]{0,$less_than_tab}
    1182             ($table_divider) # alignment in $4
    1183              
    1184             ( # wrap cells in $5
    1185             $table_rows
    1186             )
    1187             )
    1188 142         936 }mx;
    1189              
    1190             return $text;
    1191             }
    1192 100     100   205  
    1193 100         166 sub _DoAttributes {
    1194             my ($self, $id) = @_;
    1195 100 100       224 my $result = "";
    1196 6         31  
    1197 3         17 if (defined $self->{_attributes}{$id}) {
    1198             while ($self->{_attributes}{$id} =~ s/(\S+)="(.*?)"//) {
    1199 6         28 $result .= qq{ $1="$2"};
    1200 9         44 }
    1201             while ($self->{_attributes}{$id} =~ /(\S+)=(\S+)/g) {
    1202             $result .= qq{ $1="$2"};
    1203             }
    1204 100         296 }
    1205              
    1206             return $result;
    1207             }
    1208 87     87   165  
    1209 87         163 sub _StripMarkdownReferences {
    1210             my ($self, $text) = @_;
    1211 87         913 my $less_than_tab = $self->{tab_width} - 1;
    1212              
    1213             while ($text =~ s{
    1214             \n\[\#(.+?)\]:[ \t]* # id = $1
    1215             \n?
    1216             (.*?)\n{1,2} # end at new paragraph
    1217             ((?=\n[ ]{0,$less_than_tab}\S)|\Z) # Lookahead for non-space at line-start, or end of doc
    1218 5         15 }
    1219 5         17 {\n}sx)
    1220             {
    1221 5         44 my $id = $1;
    1222             my $reference = "$2\n";
    1223 5         24  
    1224             $reference =~ s/^[ ]{0,$self->{tab_width}}//gm;
    1225 5         126  
    1226             $reference = $self->_RunBlockGamut($reference, {wrap_in_p_tags => 0});
    1227              
    1228 87         216 $self->{_references}{$id} = $reference;
    1229             }
    1230              
    1231             return $text;
    1232 91     91   204 }
    1233              
    1234 91         239 sub _DoMarkdownCitations {
    1235             my ($self, $text) = @_;
    1236              
    1237             $text =~ s{
    1238             \[([^\[]*?)\] # citation text = $1
    1239             [ ]? # one optional space
    1240 6         11 (?:\n[ ]*)? # one optional newline followed by spaces
    1241 6         13 \[\#(.*?)\] # id = $2
    1242 6         10 }{
    1243 6         9 my $result;
    1244             my $anchor_text = $1;
    1245 6 50       17 my $id = $2;
    1246 6         9 my $count;
    1247              
    1248             if (defined $self->{_references}{$id} ) {
    1249 6         9 my $citation_counter=0;
      6         14  
    1250 12         16  
    1251 12 100       30 # See if citation has been used before
    1252             foreach my $old_id (@{ $self->{_used_references} }) {
    1253             $citation_counter++;
    1254 6 100       15 $count = $citation_counter if ($old_id eq $id);
    1255 3         4 }
    1256 3         7  
      3         5  
    1257             if (! defined $count) {
    1258             $count = ++$self->{_citation_counter};
    1259 6         15 push (@{ $self->{_used_references} }, $id);
    1260             }
    1261 6 100       13  
    1262 5         12 $result = qq[ ($count];
    1263              
    1264             if ($anchor_text ne "") {
    1265 6         10 $result .= qq[, $anchor_text];
    1266             }
    1267              
    1268             $result .= ")";
    1269 0         0 }
    1270             else {
    1271 0 0       0 # No reference exists
    1272 0         0 $result = qq[ ($id];
    1273              
    1274             if ($anchor_text ne "") {
    1275 0         0 $result .= qq[, $anchor_text];
    1276             }
    1277              
    1278 6 100       13 $result .= ")";
    1279 1         5 }
    1280              
    1281 6         30 if ($self->_Header2Label($anchor_text) eq "notcited"){
    1282             $result = qq[];
    1283             }
    1284 91         235 $result;
    1285             }xsge;
    1286              
    1287             return $text;
    1288 84     84   167 }
    1289 84         132  
    1290 84         175 sub _PrintMarkdownBibliography {
    1291             my ($self) = @_;
    1292 84         139 my $citation_counter = 0;
      84         172  
    1293 3         9 my $result;
    1294 3         14  
    1295             foreach my $id (@{ $self->{_used_references} }) {
    1296 84         163 $citation_counter++;
    1297             $result .= qq|

    [$citation_counter] $self->{_references}{$id}

    \n\n|;
    1298 84 100       178 }
    1299 1         6 $result .= "";
    1300              
    1301             if ($citation_counter > 0) {
    1302 83         150 $result = qq[\n\n
    \n{empty_element_suffix}\n

    $self->{bibliography_title}

    \n\n] . $result;
    1303             }
    1304             else {
    1305 84         182 $result = "";
    1306             }
    1307              
    1308             return $result;
    1309             }
    1310              
    1311             1;
    1312              
    1313             __END__