File Coverage

blib/lib/HTML/WikiConverter/PbWiki.pm
Criterion Covered Total %
statement 9 65 13.8
branch 0 26 0.0
condition 0 31 0.0
subroutine 3 14 21.4
pod 0 2 0.0
total 12 138 8.7


line stmt bran cond sub pod time code
1             package HTML::WikiConverter::PbWiki;
2              
3 1     1   28848 use warnings;
  1         2  
  1         32  
4 1     1   6 use strict;
  1         2  
  1         38  
5              
6 1     1   5 use base 'HTML::WikiConverter';
  1         6  
  1         2047  
7              
8             our $VERSION = '0.01';
9              
10             my $PB_IMG_DIR = "f/"; #hard-coded URI for images used by PbWiki.com. Subject to change.
11              
12             =head1 NAME
13              
14             HTML::WikiConverter::PbWiki - Convert HTML to PbWiki markup
15              
16             =head1 SYNOPSIS
17              
18             use HTML::WikiConverter;
19             my $wc = new HTML::WikiConverter( dialect => 'PbWiki' );
20             print $wc->html2wiki( $html );
21              
22             - or -
23              
24             html2wiki --dialect PbWiki --base-uri=http://yoursite.pbwiki.com/ index.html
25              
26             =head1 DESCRIPTION
27              
28             This module contains rules for converting HTML into PbWiki markup, the wiki dialect used by pbwiki.com.
29             See L for additional usage details.
30              
31             =cut
32              
33             sub rules {
34 0     0 0   my %rules = (
35             hr => { replace => "\n---\n" },
36             br => { replace => "\n" },
37             font => { preserve => 1, attributes => [ qw/ font size color face / ] },
38             center => { preserve => 1},
39              
40             h1 => { start => '! ', block => 1, trim => 'both', line_format => 'single' },
41             h2 => { start => '!! ', block => 1, trim => 'both', line_format => 'single' },
42             h3 => { start => '!!! ', block => 1, trim => 'both', line_format => 'single' },
43             h4 => { start => '!!!! ', block => 1, trim => 'both', line_format => 'single' },
44             h5 => { start => '!!!!! ', block => 1, trim => 'both', line_format => 'single' },
45             h6 => { start => '!!!!!! ', block => 1, trim => 'both', line_format => 'single' },
46              
47             pre => { line_prefix => ' ', block => 1 },
48             p => { block => 1, trim => 'both', line_format => 'multi' },
49              
50             b => { start => "**", end => "**" },
51             strong => { alias => 'b' },
52             i => { start => "''", end => "''" },
53             em => { alias => 'i' },
54             u => { start => '__', end => '__'},
55             strike => { start => ' -', end => '- '},
56             s => { alias => 'strike' },
57              
58             ul => { line_format => 'multi', block => 1 },
59             ol => { alias => 'ul' },
60              
61             li => { start => \&_li_start, trim => 'leading' },
62             dt => { alias => 'li' },
63             dd => { alias => 'li' },
64              
65             a => { replace => \&_link },
66             img => { replace => \&_image },
67              
68             table => { start => \&_table_start, block => 1, line_format => 'single' },
69             tr => { start => "", line_format => 'single' },
70             td => { start => \&_td_start, end => \&_td_end, trim => 'both', line_format => 'single' },
71             th => { alias => 'td' }
72             );
73              
74 0           return \%rules;
75             }
76              
77             sub _table_start {
78 0     0     my( $self, $node, $rules ) = @_;
79 0           my @attrs = (); #qw/ border cellpadding cellspacing width bgcolor align /;
80 0           return '| '.$self->get_attr_str( $node, @attrs );
81             }
82              
83             sub _td_start {
84 0     0     my( $self, $node, $rules ) = @_;
85 0 0         my $prefix = $node->tag eq 'th' ? '!' : '';
86              
87 0   0       my $align = $node->attr('align') || 'left';
88 0 0 0       $prefix .= ' ' if $align eq 'center' or $align eq 'right';
89              
90 0           return $prefix;
91             }
92              
93             sub _td_end {
94 0     0     my( $self, $node, $rules ) = @_;
95 0   0       my $colspan = $node->attr('colspan') || 1;
96 0           my $suffix = ( '|' ) x $colspan;
97              
98 0   0       my $align = $node->attr('align') || 'left';
99 0 0 0       $suffix = ' '.$suffix if $align eq 'center' or $align eq 'left';
100              
101 0           return $suffix;
102             }
103              
104             sub _blockquote_start {
105 0     0     my( $self, $node, $rules ) = @_;
106 0           my @parent_bqs = $node->look_up( _tag => 'blockquote' );
107 0           my $depth = @parent_bqs;
108            
109 0           my $start = ( '-' ) x $depth;
110 0           return "\n".$start.'>';
111             }
112              
113             sub _li_start {
114 0     0     my( $self, $node, $rules ) = @_;
115 0           my @parent_lists = $node->look_up( _tag => qr/ul|ol|dl/ );
116 0           my $depth = @parent_lists;
117              
118 0           my $bullet = '';
119 0 0         $bullet = '*' if $node->parent->tag eq 'ul';
120 0 0         $bullet = '#' if $node->parent->tag eq 'ol';
121              
122 0           my $prefix = ( $bullet ) x $depth;
123 0           return "\n".$prefix.' ';
124             }
125              
126             sub _link {
127 0     0     my( $self, $node, $rules ) = @_;
128 0 0         return $self->_anchor($node, $rules) if $node->attr('name');
129              
130 0   0       my $url = $node->attr('href') || '';
131 0   0       my $text = $self->get_elem_contents($node) || '';
132              
133             #remove '.html' and any subdirs
134 0           $url =~ s/.*?\/?(\w+\.\w+)$/$1/;
135 0           $url =~ s/\.html?//;
136             # $url =~ s/[\.\/]//g;
137              
138 0 0         return "[$text]" if uc($text) eq uc($url);
139 0           return "[$url | $text]";
140             }
141              
142             sub _anchor {
143 0     0     my( $self, $node, $rules ) = @_;
144             #keeping this out until PbWiki implements normal anchors.
145             }
146              
147             sub _image {
148 0     0     my( $self, $node, $rules ) = @_;
149              
150 0   0       my $str = $node->attr('src') || '';
151 0           return "[".$str."]";
152             }
153              
154             #PbWiki requires absolute image URIs, but by default links will be relative.
155             #If there is interest absolute links could also be added.
156             sub preprocess_node {
157 0     0 0   my( $self, $node ) = @_;
158              
159             #TODO: This is a really bad kludge. It will be placed in the attribute() method in a PbWiki 0.53+ compatible version. This was why the attribute() method was added to begin with.
160 0 0         if (!$self->base_uri) {die "The PbWiki dialect requires a base uri to create image links. Please provide one using --base_uri=something.pbwiki.com.\n";}
  0            
161             #TODO: prepend base_uri with 'http://' and append '.pbwiki.com' if they did not, so you can just specify your wiki as 'wikiname' on the commandline.
162              
163 0   0       my $tag = $node->tag || ''; #gives warning if $nodes->tag is null and we try to compare it to a string
164 0 0 0       $self->_move_aname($node) if $tag eq 'a' and $node->attr('name');
165 0 0         $self->caption2para($node) if $tag eq 'caption';
166              
167 0 0 0       if ($tag eq 'a' and $node->attr('href')) {
168 0           $node->attr( href => URI->new($node->attr('href'))->rel($self->base_uri)->as_string );
169             }
170              
171 0 0 0       if ($tag eq 'img' and $node->attr('src')) {
172 0   0       my $str = $node->attr('src') || '';
173 0           $str =~ s/.*?\/?(\w+\.\w+)$/$1/; #strip of any directory info to get just the image name
174 0           $str = $self->base_uri() . $PB_IMG_DIR . $str;
175 0           $node->attr( src => $str);
176             }
177             }
178              
179             #Note: feature removed until PbWiki supports anchor tags other than !Sections.
180             sub _move_aname {
181 0     0     my( $self, $node ) = @_;
182              
183             # Keep 'a href's around
184 0 0         $node->replace_with_content->delete unless $node->attr('href');
185             }
186              
187             =head1 AUTHOR
188              
189             Dave Schaefer, C<< >>.
190             Many thanks to David J. Iberri, C<< >> for help with and advice on writing this dialect.
191              
192              
193             =head1 BUGS
194              
195             Please report any bugs or feature requests to
196             C, or through the web
197             interface at
198             L.
199             David J. Iberri will be notified, and then you'll automatically be notified of
200             progress on your bug as changes are made.
201              
202             =head1 SUPPORT
203              
204             You can find documentation for this module with the perldoc command.
205              
206             perldoc HTML::WikiConverter::PbWiki
207              
208             You can also look for information at:
209              
210             =over 4
211              
212             =item * AnnoCPAN: Annotated CPAN documentation
213              
214             L
215              
216             =item * CPAN Ratings
217              
218             L
219              
220             =item * RT: CPAN's request tracker
221            
222             L
223              
224             =item * Search CPAN
225              
226             L
227              
228             =back
229              
230             =head1 COPYRIGHT & LICENSE
231              
232             Copyright 2006 Dave Schaefer, all rights reserved.
233              
234             This program is free software; you can redistribute it and/or modify
235             it under the same terms as Perl itself.
236              
237             =cut
238              
239             1;