File Coverage

blib/lib/Text/Smart.pm
Criterion Covered Total %
statement 106 129 82.1
branch 19 28 67.8
condition 7 12 58.3
subroutine 11 21 52.3
pod 12 12 100.0
total 155 202 76.7


line stmt bran cond sub pod time code
1             # -*- perl -*-
2             #
3             # Text::Smart by Daniel Berrange
4             #
5             # Copyright (C) 2000-2004 Daniel P. Berrange
6             #
7             # This program is free software; you can redistribute it and/or modify
8             # it under the terms of the GNU General Public License as published by
9             # the Free Software Foundation; either version 2 of the License, or
10             # (at your option) any later version.
11             #
12             # This program is distributed in the hope that it will be useful,
13             # but WITHOUT ANY WARRANTY; without even the implied warranty of
14             # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15             # GNU General Public License for more details.
16             #
17             # You should have received a copy of the GNU General Public License
18             # along with this program; if not, write to the Free Software
19             # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20             #
21             # $Id: Smart.pm,v 1.3 2004/12/31 16:00:45 dan Exp $
22              
23             =pod
24              
25             =head1 NAME
26              
27             Text::Smart - Processor for 'smart text' markup
28              
29             =head1 SYNOPSIS
30              
31             use Text::Smart;
32              
33             my $markup = Text::Smart->new(entities => \%entities);
34              
35             my $text = $markup->process($text, %opts);
36              
37             my $text = $markup->process_divider($text);
38             my $text = $markup->process_itemize($text, %opts);
39             my $text = $markup->process_enumeration($text, %opts);
40             my $text = $markup->process_paragraph($text, %opts);
41             my $text = $markup->process_smart($text, %opts);
42              
43             # Virtual methods for subclasses to implement
44             my $text = $markup->generate_divider();
45             my $text = $markup->generate_itemize(@items);
46             my $text = $markup->generate_enumeration(@items);
47             my $text = $markup->generate_paragraph($text);
48             my $text = $markup->generate_bold($text);
49             my $text = $markup->generate_italic($text)
50             my $text = $markup->generate_monospace($text);
51             my $text = $markup->generate_link($text, $url);
52             my $text = $markup->escape($text);
53              
54             =head1 DESCRIPTION
55              
56             This module provides an interface for converting
57             smarttext markup into an arbitrary text based markup
58             language, such as HTML, Latex, or Troff.
59              
60             =head2 SMARTTEXT MARKUP
61              
62             Smarttext markup can be split into two categories,
63             block level and inline. Block level elements are
64             separated by one or more completely blank lines.
65             Inline elements encompass one or more words within
66             a block. Valid inline markup is:
67              
68             *foo* - Puts the word 'foo' in bold face
69             /foo/ - Puts the word 'foo' in italic face
70             =foo= - Puts the word 'foo' in fixed width face
71             @foo(bar) - Makes the word 'foo' a link to the url 'bar'
72              
73             There are six pre-defined entities
74              
75             (C) - Insert copyright symbol
76             (TM) - Insert trademark symbol
77             (R) - Insert registered symbol
78              
79             1/2 - insert a fraction
80             1/4 - insert a fraction
81             3/4 - insert a fraction
82              
83             There are six levels of heading available
84              
85             &title(Main document heading)
86             &subtitle(Secondary document heading)
87             §ion(Section heading)
88             &subsection(Secondary section heading)
89             &subsubsection(Tertiary section heading)
90             ¶graph(Paragraph heading)
91              
92             There are three special blocks. Horizontal dividing bars
93             can be formed using
94              
95             ---
96             ___
97              
98             Numbered lists using
99              
100             + item one
101             + item two
102             + item three
103              
104             Bulleted lists using
105              
106             * item one
107             * item two
108             * item three
109              
110             Anything not fitting these forms is treated as a
111             standard paragraph.
112              
113             =head2 OPTIONS
114              
115             All the C family of methods accept a number
116             of options which control which pieces of markup are
117             permitted in the text. The following options are recognised:
118              
119             no_links
120             no_symbols
121             no_lists
122             no_rules
123             no_inline
124              
125             To use this options pass them as a named parameter:
126              
127             $markup->process($text, no_links => 1, no_lists => 1);
128              
129             =head2 SUBCLASSING
130              
131             This module provides the basic parsing routines and
132             framework for outputting text, however, it must be
133             subclassed to generate the markup language specific
134             tags. The primary subclass is L which
135             is used to generate HTML markup.
136              
137             =head1 METHODS
138              
139             The 'generate_XXX' methods are virtual and need to be implemented
140             by subclasses.
141              
142             =over 4
143              
144             =cut
145              
146             package Text::Smart;
147              
148 2     2   35391 use strict;
  2         5  
  2         72  
149 2     2   11 use warnings;
  2         4  
  2         4806  
150              
151             our $VERSION = '1.0.2';
152              
153             =item my $proc = Text::Smart->new();
154              
155             Create a new smart text processor. This constructor would
156             not normally be called by application code, since this module
157             must be sub-classed to be useful. The primary subclass is for
158             generating HTML, L.
159              
160             =cut
161              
162             sub new {
163 2     2 1 23 my $proto = shift;
164 2   33     111 my $class = ref($proto) || $proto;
165 2         10 my $self = {};
166 2         6 my %params = @_;
167              
168 2         8 bless $self, $class;
169              
170 2         7 return $self;
171             }
172              
173             =item my $markup = $proc->process($text)
174              
175             Converts a piece of smart text, passed in as the parameter into the
176             target markup language of this processor. The markedup text is returned
177              
178             =cut
179              
180             sub process {
181 2     2 1 28 my $self = shift;
182 2         6 my $text = shift;
183 2         6 my %params = @_;
184              
185 2 50       94 my @blocks = split /\r?\n(\r?\n)+/, (ref($text) ? ${$text} : $text);
  0         0  
186              
187 2         10 foreach (@blocks) {
188 50 100 66     566 if (/^\s*(___+|---+)\s*$/ && !$params{no_rules}) {
    100 66        
    100 66        
    100          
    100          
189 2         16 $_ = $self->_process_divider($_);
190             } elsif (/^\*\s/ && !$params{no_lists}) {
191 2         19 $_ = $self->_process_itemize($_, @_);
192             } elsif (/^\+\s/ && !$params{no_lists}) {
193 2         17 $_ = $self->_process_enumeration($_, @_);
194             } elsif (/^&\w+\(.*\)/) {
195 12         35 $_ = $self->_process_heading($_, @_);
196             } elsif (/\w/) {
197 8         54 $_ = $self->_process_paragraph($_, @_);
198             }
199             }
200              
201 2         35 return join("\n", @blocks);
202             }
203              
204              
205             sub _process_divider {
206 2     2   4 my $self = shift;
207 2         4 local $_ = shift;
208              
209 2         663 return $self->generate_divider();
210             }
211              
212              
213             sub _process_itemize {
214 2     2   5 my $self = shift;
215 2         4 local $_ = shift;
216              
217 2         17 my @items = split /^\*\s+/m;
218 2 50       23 shift @items if $items[0] eq '';
219              
220 2         8 return $self->generate_itemize(map { $self->_process_smart($_, @_) } @items);
  6         18  
221             }
222              
223              
224             sub _process_enumeration {
225 2     2   3 my $self = shift;
226 2         5 local $_ = shift;
227              
228 2         16 my @items = split /^\+\s*/m;
229 2 50       8 shift @items if $items[0] eq '';
230              
231 2         6 return $self->generate_enumeration(map { $self->_process_smart($_, @_) } @items);
  4         11  
232             }
233              
234              
235             sub _process_heading {
236 12     12   16 my $self = shift;
237 12         14 local $_ = shift;
238              
239 12         35 $_ =~ /^&(\w+)\((.*)\)/;
240              
241 12         25 return $self->generate_heading($self->_process_smart($2, @_), $1);
242             }
243              
244             sub _process_paragraph {
245 8     8   11 my $self = shift;
246 8         16 local $_ = shift;
247              
248 8         27 return $self->generate_paragraph($self->_process_smart($_, @_));
249             }
250              
251              
252             sub _process_smart {
253 30     30   39 my $self = shift;
254 30         79 local $_ = $self->escape(shift);
255 30         90 my %params = @_;
256              
257 30         49 my $links = {};
258              
259             # We're going to use the octal characters \001 and \002 for
260             # escaping stuff, so we'd better make sure there aren't any
261             # in the text.
262 30         46 s/\001//g;
263 30         35 s/\002//g;
264 30         616 s/\003//g;
265              
266 30 50       82 unless ($params{no_links}) {
267             # We've got to protect the url of links before we go further,
268             # however we can't actually generate the link yet because
269             # that interferes with the monospace stuff below....
270 30         35 s|@@|\001|gx;
271 30         38 s|@([^\(@]+)\(([^\)]+)\)|'@' . $1 . '(' . $self->_obscure($2, $links) . ')'|gex;
  1         9  
272 30         37 s|\001|@@|gx;
273             }
274              
275 30 50       63 unless ($params{no_symbols}) {
276             # We transform a few common symbols
277             # We don't substitute them straight in because the
278             # substituted text might interfere with stuff that
279             # follows...
280 30         41 s|\b1/4\b|"\003fraction14\003"|gex;
  1         5  
281 30         41 s|\b1/2\b|"\003fraction12\003"|gex;
  1         4  
282 30         35 s|\b3/4\b|"\003fraction34\003"|gex;
  1         4  
283              
284 30         37 s|\(C\)|"\003copyright\003"|gex;
  1         3  
285 30         67 s|\(R\)|"\003registered\003"|gex;
  1         3  
286 30         37 s|\(TM\)|"\003trademark\003"|gex;
  1         3  
287             }
288              
289 30 50       58 unless ($params{no_links}) {
290             # We protect hyperlinks so that the '/' or '@' doesn't get
291             # mistaken for a block of italics / link
292 30         38 s|([a-z]+://[^\s,\(\)><]*)|'@' . $self->_obscure($1, $links) . '(' . $self->_obscure($1, $links) . ')'|gex;
  0         0  
293 30         35 s|(mailto:[^\s,\(\)><]*)|'@' . $self->_obscure($1, $links) . '(' . $self->_obscure($1, $links) . ')'|gex;
  0         0  
294             }
295              
296 30 50       62 unless ($params{no_inline}) {
297             # Next lets process italics /italic/
298             # NB. this must be first, otherwise closing tags
299             # interfere with the pattern matching
300 30         39 s|//|\001|gx;
301 30         2339 s|(?generate_italic($1)|gex;
  1         9  
302 30         34 s|\001|/|gx;
303              
304             # Lets process bold text *bold*
305 30         37 s|\*\*|\001|gx;
306 30         39 s|(?generate_bold($1)|gex;
  1         6  
307 30         35 s|\001|\*|gx;
308              
309             # Now we're onto the monospace stuff =monospace=
310 30         35 s|==|\001|gx;
311 30         36 s|(?generate_monospace($1)|gex;
  1         7  
312 30         35 s|\001|=|gx;
313             }
314              
315 30 50       190 unless ($params{no_links}) {
316             # Links are next on the list @text(url)
317 30         36 s|@@|\001|gx;
318 30         38 s|@([^\(@]+)\(([^\)]+)\)|$self->generate_link($2, $1)|gex;
  1         6  
319 30         59 s|\001|@|gx;
320              
321             # Finally we can unobscure the hyperlinks
322 30         36 s|\002([^\002]+)\002|$links->{$1}|gex;
  1         4  
323             }
324              
325 30 50       55 unless ($params{no_symbols}) {
326             # And those entities
327 30         40 s|\003([^\003]+)\003|$self->generate_entity($1)|gex;
  6         17  
328             }
329              
330 30         145 return $_;
331             }
332              
333              
334             sub _obscure {
335 1     1   3 my $self = shift;
336 1         15 my $link = shift;
337 1         2 my $map = shift;
338              
339 1         2 my @keys = keys %{$map};
  1         3  
340 1         3 my $id = $#keys + 1;
341              
342 1         3 $map->{$id} = $link;
343              
344 1         6 return "\002$id\002";
345             }
346              
347             =item my $markup = $proc->generate_divider()
348              
349             Called to generate a horizontal section divider. The generated text must
350             be returned in string format. This method must be implemented by subclasses.
351              
352             =cut
353              
354             sub generate_divider {
355 0     0 1   my $self = shift;
356              
357 0           die "class " . ref($self) . " did not implement the generate_divider method";
358             }
359              
360             =item my $markup = $proc->generate_itemize(@items)
361              
362             Called to generate an itemized list of bullet points. The (already marked up) text
363             for each item is passed as a list of parameters. The generated text must be returned
364             in string format. This method must be implemented by subclasses.
365              
366             =cut
367              
368             sub generate_itemize {
369 0     0 1   my $self = shift;
370              
371 0           die "class " . ref($self) . " did not implement the generate_itemize method";
372             }
373              
374             =item my $markup = $proc->generate_enumeration(@items)
375              
376             Called to generate an itemized list of numbered points. The (already marked up) text
377             for each item is passed as a list of parameters. The generated text must be returned
378             in string format. This method must be implemented by subclasses.
379              
380             =cut
381              
382             sub generate_enumeration {
383 0     0 1   my $self = shift;
384              
385 0           die "class " . ref($self) . " did not implement the generate_enumeration method";
386             }
387              
388             =item my $markup = $proc->generate_paragraph($text)
389              
390             Called to generate a paragraph of text. The (already marked up) text for the body
391             of the paragraph is passed in as the only parameter. The generated text must be
392             returned in string format. This method must be implemented by subclasses.
393              
394             =cut
395              
396             sub generate_paragraph {
397 0     0 1   my $self = shift;
398              
399 0           die "class " . ref($self) . " did not implement the generate_paragraph method";
400             }
401              
402             =item my $markup = $proc->generate_bold($text)
403              
404             Called to generate bold text. The plain text is passed in as the parameter, and
405             the marked up text should be returned in string format. This method must be
406             implemented by subclasses.
407              
408             =cut
409              
410             sub generate_bold {
411 0     0 1   my $self = shift;
412              
413 0           die "class " . ref($self) . " did not implement the generate_bold method";
414             }
415              
416              
417             =item my $markup = $proc->generate_italic($text)
418              
419             Called to generate italic text. The plain text is passed in as the parameter, and
420             the marked up text should be returned in string format. This method must be
421             implemented by subclasses.
422              
423             =cut
424              
425             sub generate_italic {
426 0     0 1   my $self = shift;
427              
428 0           die "class " . ref($self) . " did not implement the generate_italic method";
429             }
430              
431              
432             =item my $markup = $proc->generate_monospace($text)
433              
434             Called to generate fixed-width text. The plain text is passed in as the parameter, and
435             the marked up text should be returned in string format. This method must be
436             implemented by subclasses.
437              
438             =cut
439              
440             sub generate_monospace {
441 0     0 1   my $self = shift;
442              
443 0           die "class " . ref($self) . " did not implement the generate_monospace method";
444             }
445              
446              
447              
448             =item my $markup = $proc->generate_link($url, $text)
449              
450             Called to generate a hyperlink. The destination of the link is the first parameter,
451             and the text being linked is the second parameter. The marked up text must be returned
452             in string format. This method must be implemented by subclasses.
453              
454             =cut
455              
456              
457             sub generate_link {
458 0     0 1   my $self = shift;
459              
460 0           die "class " . ref($self) . " did not implement the generate_link method";
461             }
462              
463             =item my $markup = $proc->generate_entity($name);
464              
465             Called to generated a special named entity. There are 6 named entities which
466             need to be supported:
467              
468             =over 4
469              
470             =item fraction12
471              
472             The fraction 1/2
473              
474             =item fraction14
475              
476             The fraction 1/4
477              
478             =item fraction 34
479              
480             The fraction 3/4
481              
482             =item copyright
483              
484             The copyright symbol
485              
486             =item trademark
487              
488             The trademark symbol
489              
490             =item registered
491              
492             The rights registered symbol
493              
494             =back
495              
496             The markup corresponding to the specified entity must be returned in string
497             format.
498              
499             =cut
500              
501             sub generate_entity {
502 0     0 1   my $self = shift;
503              
504 0           die "class " . ref($self) . " did not implement the generate_entity method";
505             }
506              
507             =item my $text = $proc->escape($text)
508              
509             Called to escape any characters which have special meaning in the destination
510             markup language. For example, in HTML, this would escape angle brackets and
511             the ampersand symbol. The escaped text must be returned in string format.
512              
513             =cut
514              
515             sub escape {
516 0     0 1   my $self = shift;
517              
518 0           die "class " . ref($self) . " did not implement the escape method";
519             }
520              
521             1 # So that the require or use succeeds.
522              
523             __END__