File Coverage

blib/lib/Pod/HTML2Pod.pm
Criterion Covered Total %
statement 307 540 56.8
branch 77 256 30.0
condition 14 92 15.2
subroutine 40 42 95.2
pod 1 30 3.3
total 439 960 45.7


line stmt bran cond sub pod time code
1              
2             require 5;
3             # Time-stamp: "2004-12-29 18:41:19 AST"
4              
5             package Pod::HTML2Pod;
6 2     2   27892 use strict;
  2         4  
  2         72  
7 2     2   2387 use integer; # haul aaaaaaaaaass!
  2         22  
  2         12  
8 2     2   2746 use UNIVERSAL ();
  2         31  
  2         47  
9 2     2   15 use Carp ();
  2         3  
  2         46  
10 2     2   2510 use HTML::TreeBuilder 3.01 ();
  2         125915  
  2         62  
11 2     2   25 use HTML::Element 3.05 ();
  2         27  
  2         33  
12 2     2   9 use HTML::Tagset (); # presumably used by HTML::TreeBuilder anyhow
  2         6  
  2         28  
13 2     2   11 use HTML::Entities (); # presumably used by HTML::Parser anyhow
  2         5  
  2         44  
14 2         39414 use vars qw($Debug $VERSION %Phrasal %Char2ent
15 2     2   9 $nbsp $E_slash $E_vbar $counter);
  2         5  
16              
17             $VERSION = '4.05';
18             $Debug = 0 unless defined $Debug;
19              
20             =head1 NAME
21              
22             Pod::HTML2Pod -- translate HTML into POD
23              
24             =head1 SYNOPSIS
25              
26             # Use the program 'html2pod' that comes in this dist, or:
27             use Pod::HTML2Pod;
28             print Pod::HTML2Pod::convert(
29             'file' => 'my_stuff.html', # input file
30             'a_href' => 1, # try converting links
31             );
32              
33             =head1 DESCRIPTION
34              
35             Larry Wall once said (1999-08-27, on the C list, I
36             do believe): "The whole point of pod is to get people to document stuff
37             they wouldn't document in any other form."
38              
39             To that end, I wrote this module so that people who are unpracticed
40             with POD but in a hurry to simply document their programs or modules,
41             could write their documentation in simple HTML, and convert that to
42             POD. That's what this module does.
43              
44             Specifically, this module bends over backwards to try to turn even
45             vaguely plausable HTML into POD -- and when in doubt, it simply ignores
46             things that it doesn't know about, or can't render.
47              
48             =head1 FUNCTIONS
49              
50             This module provides one documented function, which it does not export:
51              
52             =over
53              
54             =item Pod::HTML2Pod::convert( ...options... )
55              
56             =back
57              
58             This returns a single scalar value containing the converted POD text,
59             with some comments after the end.
60              
61             This function takes options:
62              
63             =over
64              
65             =item 'file' => FILENAME,
66              
67             Specifies that the HTML code is to be read from the filename given.
68              
69             =item 'handle' => *HANDLE,
70              
71             Specifies that the HTML code is to be read from the open filehandle
72             given (e.g., C<$fh_obj>, C<*HANDLE>, C<*HANDLE{IO}>, etc.) If you
73             specify this, but fail to specify an actual handle object, inscrutible
74             errors may result.
75              
76             =item 'content' => STRING,
77              
78             Specifies that the HTML code is in the string given. (Alternately,
79             pass a reference to the scalar: C<'content' =E \$stuff>.)
80              
81             =item 'tree' => OBJ,
82              
83             Specifies that the HTML document is contained in the given
84             HTML::TreeBuilder object (or HTML::Element object, at least).
85              
86             =item 'a_name' => BOOLEAN,
87              
88             Specifies whether you want to try converting Ca name="..."E>
89             elements. By default this is off -- i.e., such elements are ignored.
90              
91             =item 'a_href' => BOOLEAN,
92              
93             Specifies whether you want to try converting Ca href="..."E>
94             elements. By default this is off -- i.e., such elements are ignored.
95             If on, bear in mind that relative URLs cannot be properly converted to
96             POD -- any relative URLs will be complained about in comments after
97             the end of the document. Normal absolute URLs will be treated as best
98             they can be. Note that URLs beginning "pod:..." will be turned into
99             POD links to whatever follows; that is, "pod:Getopt::Std" is turned
100             into CGetopt::StdE>
101              
102             =item 'debug' => INTEGER,
103              
104             Puts Pod::HTML2Pod into verbose debug mode for the duration of
105             processing this this HTML document. INTEGER can be 0 for no debug
106             output, 1 for a moderate amount that will cause the HTML syntax tree
107             to be be dumped at the start of the conversion, and 2 for that plus a
108             dump of the intermediate POD doctree, plus a few more inscrutible
109             diagnostic messages. Looking at the trees dumped might be helpful in
110             making sense of error messages that refer to a particular node in the
111             parse tree.
112              
113             =item
114              
115             =back
116              
117             =head1 GUIDELINES
118              
119             Don't write crappy HTML and expect this module to understand it.
120              
121             Don't take the output of C and feed it to this, just because
122             you think it'd be neat to try it. You'll just learn really unpleasant
123             things about C -- and that's fine if that means you'll use
124             it to improve C, but it's rather the long way around.
125              
126             However, I use this module to convert simple HTML into POD,
127             bearing in mind these simple truths:
128              
129             POD can't do tables, images, forms, imagemaps, layers, CSS, embedded
130             Java applets or any other kind of object, FONT, or BLINK. So don't
131             try to do any of these things.
132              
133             Use Ch1E> and Ch2E> for headings.
134              
135             If you want to have a block of literal example code, put it in a
136             CpreE>.
137              
138             Keep things simple.
139              
140             Remember: Just because it comes I of Pod::HTML2Pod doesn't mean
141             it's happy normal pod. You can do lots of things in HTML that will
142             produce POD that is strange but technically legal (like having huge
143             and complex content in a Ch1E>/C<=head1>) but that will make
144             perldoc scream bloody murder about nroff macros stretched past their
145             limit.
146              
147             Try to avoid using a WYSIWYG HTML editor, as they often produce scary
148             source. Ditto for taking selecting "Save as... HTML" in your word
149             processor. You can always try it, but look at the HTML to survey the
150             damage before you try converting it to POD.
151              
152             Always look at the POD that's been output by HTML2Pod -- never just
153             blindly include it.
154              
155             Consider starting from this template:
156              
157            
158            
159             Things::Stuff
160            
161            
162            
163            

NAME

164            
165             Things::Stuff -- does some things with stuff
166            
167            

SYNOPSIS

168            
169            
 
170             use HTML::Stuff;
171             do some more stuff;
172             la la la la la;
173             oogah;
174            
175            
176            

DESCRIPTION

177            
178             This module does things with stuff. It exports these functions:
179            
180            
181            
thingify( ... )
182            
This function takes stuff, and returns their value as things.
183            
184            
destuffulate( ... )
185            
This function returns the things, from stuff.
186            

It will throw a fatal exception if applied to things.

187            
So don't do that.
188            
189            
enthinction( ... )
190            
This is where I run out of ways to make up silly sentences
191             involving "thing" and "stuff". Mostly.
192            
193            
194            
195            

Caveats and WYA's

196            
197             Things to be wary of:
198            
199            
200            
  • The things.
  • 201            
  • And the stuff
  • 202            

    Don't forget about that stuff. Gotta keep an eye on that.

    203            
    204            
    205            

    BUGS

    206            
    207             Stuff is hard.
    208            
    209            

    SEE ALSO

    210            
    211             Class::Classless,
    212             strict,
    213            
    214             >Lingua::EN::Numbers::Ordinate,
    215             perlvar,
    216            
    217            
    219            
    220            

    COPYRIGHT

    221            
    222             Copyright 2000, Joey Jo-Jo Jr. Shabadoo.
    223            
    224            
    225            

    This library is free software; you can redistribute it and/or modify

    226             it under the same terms as Perl itself.
    227            
    228            

    AUTHOR

    229             Joey Jo-Jo Jr. Shabadoo, jojojo@shabadoo.int
    230            
    231            
    232              
    233             =head1 BUG REPORTS
    234              
    235             If you do find a case where this converter misinterprets what you
    236             consider straightforward HTML (which you should really really have run
    237             thru an HTML syntax checker, by the way!), report it to me as a bug, at
    238             C.
    239              
    240             Be sure to include the entire document that causes the error -- then
    241             specify exactly what you consider the error to be.
    242              
    243             =head1 BUGS AND CAVEATS
    244              
    245             * Doesn't try to turn "smart quotes" characters into simple " and '.
    246             Maybe should?
    247              
    248             * Fails to turn
    249              
    250             foo thing bar baz quux
    251              
    252             into
    253              
    254             foo S quux
    255              
    256             I.e., currently just turns C< >'s into normal spaces.
    257              
    258             * Numeric entities (CnumE>) are used when necessary -- but these
    259             are not understood by some older POD converters.
    260              
    261             * No HTML that you provide will turn into C...E>
    262              
    263             * Currently maps
    264              
    265             bar
    266              
    267             to
    268              
    269             Xbar
    270              
    271             but is this correct?
    272              
    273             =head1 SEE ALSO
    274              
    275             L, L, L
    276              
    277             And HTML Tidy, at C
    278              
    279             =head1 COPYRIGHT
    280              
    281             Copyright (c) 2000 Sean M. Burke. All rights reserved.
    282              
    283             This library is free software; you can redistribute it and/or modify
    284             it under the same terms as Perl itself.
    285              
    286             =head1 AUTHOR
    287              
    288             Sean M. Burke C
    289              
    290             =cut
    291              
    292             # TODO: test whether anchors and references to them actually work
    293             # in extremis? (see what recent pod2html versions do to them?)
    294              
    295             #--------------------------------------------------------------------------
    296              
    297             sub convert {
    298 1 50   1 1 607 Carp::croak(__PACKAGE__ . '::convert needs parameters!')unless @_;
    299 1 50       19 Carp::croak(
    300             "odd number of elements in options to " . __PACKAGE__ . "::convert")
    301             if @_ % 2;
    302              
    303 1         6 my %o = @_;
    304 1         3 local($Debug) = $Debug;
    305 1 50       5 if(exists $o{'debug'}) { $Debug = $o{'debug'} }
      0         0  
    306            
    307 1         11 my $tree = HTML::TreeBuilder->new();
    308            
    309 1         318 $tree->ignore_ignorable_whitespace(1);
    310            
    311 1         11 my $comments = [ __PACKAGE__ . ' conversion notes:' ];
    312            
    313 1 50       4 if(exists $o{'tree'}) {
    314 0         0 $tree->delete; # never mind that one
    315 0         0 $tree = $o{'tree'};
    316 0 0       0 die "but the 'tree' value is undef" unless defined $tree;
    317 0 0       0 die "but the 'tree' value isn't an object" unless ref $tree;
    318 0 0       0 die "but the 'tree' value object's class isn't based on HTML::Element"
    319             unless $tree->isa('HTML::Element');
    320 0         0 $tree = $tree->clone;
    321              
    322             } else {
    323              
    324 1 50       5 if(exists $o{'file'}) {
    325 0 0       0 die "File $o{'file'} doesn't exist" unless -e $o{'file'};
    326 0         0 local(*IN);
    327 0 0       0 open(IN, "<$o{'file'}") or die "Can't open $o{'file'}: $!";
    328 0         0 $o{'handle'} = *IN{IO};
    329 0         0 ++$o{'_close_after'};
    330 0 0       0 print "Input from $o{'file'} ($o{'handle'})\n" if $Debug;
    331 0         0 push @$comments, "#From file $o{'file'}";
    332             }
    333              
    334 1 50       4 if(exists $o{'handle'}) {
    335 0         0 local $/;
    336 0         0 my $fh = $o{'handle'};
    337 0         0 my $x;
    338 0         0 $x = <$fh>;
    339 0 0       0 close($fh) if $o{'_close_after'};
    340 0         0 $o{'content'} = \$x;
    341 0 0       0 print "Input from handle ($o{'handle'})\n" if $Debug;
    342             }
    343              
    344 1 50       5 if(exists $o{'content'}) {
    345 1         3 my($content_r, $is_copy);
    346 1 50       8 if(!defined $o{'content'}) { # undef content?
        50          
    347 0         0 die "content is undef";
    348             } elsif(ref $o{'content'}) { # scalar ref
    349 0 0       0 die "content only accepts scalars or scalar refs"
    350             unless ref $o{'content'} eq 'SCALAR';
    351 0         0 $content_r = $o{'content'};
    352 0         0 $is_copy = 0;
    353             } else { # simple scalar
    354 1         2 $content_r = \$o{'content'};
    355 1         3 $is_copy = 1;
    356             }
    357              
    358             # Nativize newlines, if possible and if need be.
    359             # Otherwise PREs will be hard to reckon.
    360 1 50       15 if("\n" ne "\cm" and "\n" ne "\cm\cj" and "\n" ne "\cj") {
    361             print "I don't recognize what \"\\n\" means on this system!" if $Debug;
    362 0         0 } elsif($$content_r =~ m/(\cm\cj|\cm|\cj)/) {
    363 0         0 my $nl = $1;
    364 0 0       0 if($nl eq "\n") {
    365             # no-op
    366 0 0       0 print "# Already in native newline format\n" if $Debug;
    367             } else {
    368 0 0       0 unless($is_copy) {
    369 0         0 my $x = $$content_r;
    370 0         0 $content_r = \$x; # copy
    371 0         0 $is_copy = 1;
    372             }
    373 0 0       0 if($nl eq "\cm") {
        0          
        0          
    374 0         0 $$content_r =~ tr/\cm/\n/;
    375 0 0       0 print "# Nativizing newlines from \\cm to \\n\n" if $Debug;
    376             } elsif($nl eq "\cj") {
    377 0         0 $$content_r =~ tr/\cj/\n/;
    378 0 0       0 print "# Nativizing newlines from \\cj to \\n\n" if $Debug;
    379             } elsif($nl eq "\cm\cj") {
    380 0         0 $$content_r =~ tr/\cj//d;
    381 0         0 $$content_r =~ tr/\cm/\n/ unless "\cm" eq "\n";
    382 0 0       0 print "# Nativizing newlines from \\cm\\cj to \\n\n" if $Debug;
    383             }
    384             }
    385             }
    386              
    387 1         5 push @$comments,
    388             '# ' . length($$content_r) . ' bytes of input';
    389 1         31 $tree->parse($$content_r);
    390 1         3086 $tree->eof;
    391 1         127 delete $o{'content'};
    392             } else {
    393 0         0 die "No input source specified?";
    394             }
    395             }
    396              
    397             {
    398             # The BODY is all we need. Discard the rest.
    399 1   50     3 my $body = $tree->find_by_tag_name('body') || die "No BODY in tree?";
      1         31  
    400 1         49 $body->detach;
    401 1         19 $tree->delete;
    402 1         38 $tree = $body;
    403             }
    404              
    405 1   50     116 push @$comments, scalar(localtime) . ' ' . ($ENV{'USER'} || '');
    406 1         7 $tree->attr('_pod_comments', $comments);
    407            
    408             # More options:
    409 1 50       16 if($o{'a_name'}) {
    410 0         0 $tree->attr('_a_name', 1);
    411 0         0 push @$comments, " Will try to render ";
    412             } else {
    413 1         3 push @$comments,
    414             " No a_name switch not specified, so will not try to render ";
    415             }
    416 1 50       3 if($o{'a_href'}) {
    417 0         0 $tree->attr('_a_href', 1);
    418 0         0 push @$comments, " Will try to render ";
    419             } else {
    420 1         3 push @$comments,
    421             " No a_href switch not specified, so will not try to render ";
    422             }
    423            
    424 1         4 twist_tree($tree);
    425              
    426 1         4 my $rendering_r = tree_as_pod($tree);
    427 1         5 $tree->delete;
    428 1         81 return $$rendering_r;
    429             }
    430              
    431             ###########################################################################
    432             #
    433             # The code below this point is not happy nice readable undocumented code.
    434             # It is angry cryptic code, of the sort that you will find little use in
    435             # reading.
    436             #
    437             # When I first thought of writing this module, several years ago, I had
    438             # noble dreams that I could write some sort of universal markup-language
    439             # mixmaster, which would only need be fed some information about the
    440             # source language and the target language, and a few simple facts about
    441             # what constructs are equivalent (that HTML "h1" is POD "head1", for
    442             # example), and then magic would happen, and documents would be converted.
    443             #
    444             # Well, I've not yet found that mixmaster, so I've had to write some
    445             # very spooky crusty strange code. It seems to work rather well when fed
    446             # simple HTML, and seems to degrade gracefully when fed too-complex HTML.
    447             #
    448             # The code can be used as-is, but it's not conceivably adaptable to other
    449             # tasks, or even easily maintainable, regrettably. However, as HTML or
    450             # POD are not likely to mutate significantly any time soon, I think
    451             # substantial maintenance will not be needed -- just minor tweaking or
    452             # bugfixes on my part.
    453             #
    454             ###########################################################################
    455             # SO STOP READING NOW, IF YOU VALUE YOUR SANITY
    456             ###########################################################################
    457             #
    458             # Stay away!
    459             # STAY AWAY!
    460             # Stay away!
    461             # You might end up like me!
    462             #
    463             # It's the pain
    464             # that keeps us alive,
    465             # but that beauty is all that we need to survive.
    466             #
    467             # That damned beauty is all that we need to survive.
    468             #
    469             # -- David Byrne, "They Are In Love"
    470             #
    471             ###########################################################################
    472              
    473             # Initialization code:
    474              
    475             # TODO: replace this with a hardwired table?
    476             %Phrasal = %HTML::Tagset::isPhraseMarkup;
    477             delete @Phrasal{'br', 'hr'};
    478             for (qw(~literal ~texticle)) { $Phrasal{$_} = 1 }
    479             $counter = 0 unless defined $counter;
    480              
    481             $Debug = 2 unless defined $Debug;
    482              
    483             # Fill out Char2ent:
    484             {
    485             die "\%HTML::Entities::char2entity is empty?"
    486             unless keys %HTML::Entities::char2entity;
    487            
    488             my($c,$e);
    489             while(($c,$e) = each(%HTML::Entities::char2entity)) {
    490             if($e =~ m{^&#(\d+);$}s) {
    491             $Char2ent{$c} = "E<$1>";
    492             #print "num $e => E<$1>\n";
    493             # { => E<123>
    494             } elsif($e =~ m{^&([^;]+);$}s) {
    495             $Char2ent{$c} = "E<$1>";
    496             #print "eng $e => E<$1>\n";
    497             # é => E
    498             } else {
    499             warn "Unknown thingy in %HTML::Entities::char2entity: $e"
    500             # if $^W;
    501             }
    502             }
    503            
    504             # Points of difference between HTML entities and POD entities:
    505            
    506             $Char2ent{"\xA0"} = "E<160>"; # there is no E
    507            
    508             $Char2ent{"\xAB"} = "E";
    509             $Char2ent{"\xBB"} = "E";
    510             # Altho new POD processors also know E and E
    511            
    512             # Old POD processors don't know these two -- so leave numeric
    513             # $Char2ent{'/'} = 'E';
    514             # $Char2ent{'|'} = 'E';
    515             }
    516              
    517             # Set up some initial values we'll need later.
    518             unless(defined $nbsp) {
    519             my $nb = ' ';
    520             HTML::Entities::decode_entities($nb);
    521             if(!defined $nb) {
    522             die "  decodes to undef?";
    523             } elsif($nb eq '') {
    524             die "  decodes to empty-string?";
    525             } elsif($nb eq ' ') {
    526             die "  doesn't decode?";
    527             } elsif($nb eq ' ') {
    528             $nbsp = undef;
    529             } else {
    530             $nbsp = $nb;
    531             }
    532             }
    533              
    534             unless(defined $E_slash) {
    535             my $x = '/';
    536             encode_entities_harder($x);
    537             if(!defined $x or !length $x) {
    538             die "'/' encodes to nothing??";
    539             } elsif($x eq '/') {
    540             # no-op
    541             } elsif($x =~ m{^E<[^>]+>$}s) {
    542             $E_slash = $x;
    543             } else {
    544             die "'/' encodes as $x?!";
    545             }
    546             }
    547              
    548             unless(defined $E_vbar) {
    549             my $x = '|';
    550             encode_entities_harder($x);
    551             if(!defined $x or !length $x) {
    552             die "'|' encodes to nothing??";
    553             } elsif($x eq '|') {
    554             # no-op
    555             } elsif($x =~ m{^E<[^>]+>$}s) {
    556             $E_vbar = $x;
    557             } else {
    558             die "'|' encodes as $x?!";
    559             }
    560             }
    561              
    562             # Last chance to save your sanity: stop reading now...
    563              
    564             #--------------------------------------------------------------------------
    565              
    566             # TODO: make all P's go byebye once we've texticulated?
    567              
    568             sub twist_tree {
    569 1     1 0 3 my $tree = $_[0];
    570              
    571 1         5 html_node_name($tree);
    572              
    573 1         5 delete_unknowns($tree);
    574              
    575 1         4 special_splice_div($tree);
    576              
    577 1 50       14 print("Input tree:\n"), $tree->dump, sleep(0) if $Debug;
    578              
    579 1         9 prune_by_tag_name( $tree,
    580             [qw~ script style ~],
    581             [qw~ map style isindex select textarea del input embed bgsound basefont ~],
    582             );
    583              
    584 1         13 splice_by_tag_name($tree,
    585             [qw~
    586             big small acronym sub sup multicol
    587             applet param object
    588             table tr caption col thead tbody tfoot colgroup
    589             noscript center font bdo fieldset ins
    590             form label legend button link layer object
    591             span abbr blink strike wbr
    592             frame frameset ilayer layer nolayer
    593             address nobr
    594             ~],
    595             );
    596              
    597 1         17 remap_tags($tree, {qw~
    598             td p
    599             th p
    600             em i
    601             strong b
    602             cite i
    603             code code
    604             tt code
    605             kbd code
    606             samp code
    607             var i
    608             dfn b
    609             listing pre
    610             plaintext pre
    611             xmp pre
    612             dd p
    613             ~});
    614             # CODE for C<>
    615             # I for I<>
    616             # B for B<>
    617              
    618             # TODO: Warn of cases where heading has too-complex text in it?
    619              
    620 1         6 p_unnest($tree);
    621              
    622 1         32 pre_render($tree);
    623 1         4 q_render($tree);
    624              
    625 1         3 images_render($tree);
    626 1         4 hr_render($tree);
    627 1         3 br_render($tree);
    628 1         4 lists_render($tree);
    629             #wrangle_body_children($tree);
    630              
    631 1         4 literalize_text_under($tree);
    632              
    633 1         4 winge_about_phrasal_paradoxes($tree);
    634              
    635 1         4 texticulate($tree);
    636 1         3 promote_some_secondary_children($tree);
    637 1         5 goodify_p_elements($tree);
    638              
    639 1         3 render_headings($tree); # busts up the headings
    640              
    641 1         4 a_tweak($tree);
    642             #bust_up($tree, qw~h1 h2 h3 h4 h5 h6 p~);
    643              
    644 1         4 pod_node_name($tree);
    645 1 50       8 $tree->dump, sleep(0) if $Debug > 1;
    646 1         2 return;
    647             }
    648              
    649             #==========================================================================
    650             # Subs below here are in no particular order. Ahwell.
    651              
    652             sub a_tweak {
    653            
    654             #Scratch:
    655 1     1 0 2 my($a_name, $parent, $grandparent, $gptag, @cl, $text);
    656            
    657 1         4 foreach my $a ($_[0]->find_by_tag_name('a')) {
    658             # The configuration we're after looks like this:
    659             #

    @0.0

    660             # <~texticle -pod-id="~texticle_1" id="``G55"> @0.0.0
    661             # @0.0.0.0
    662             # NAME @0.0.0.0.0
    663 0         0 $a_name = $a->attr('name');
    664 0 0       0 next unless defined $a_name;
    665            
    666 0   0     0 $parent = $a->parent || next;
    667 0 0       0 next unless $parent->tag eq '~texticle';
    668 0   0     0 $grandparent = $parent->parent || next;
    669 0         0 $gptag = $grandparent->tag;
    670 0 0 0     0 next unless $gptag eq 'h1' or $gptag eq 'h2' or $gptag eq 'item';
          0        
    671 0 0 0     0 next unless $parent->content_list == 1
    672             and $grandparent->content_list == 1; # only child of an only child
    673 0         0 @cl = $a->content_list; # with one child, a texticle
    674 0 0 0     0 next unless @cl == 1 and ref $cl[0] and $cl[0]->tag eq '~literal';
          0        
    675 0         0 $text = $cl[0]->attr('text');
    676 0 0       0 next unless defined $text;
    677 0         0 $text =~ s/^\s+//s;
    678 0         0 $text =~ s/\s+$//s;
    679 0 0       0 if($a_name eq $text) {
    680 0         0 $a->replace_with_content;
    681 0 0       0 print "a_tweak applies to ", $a->attr('id'), "\n" if $Debug > 1
    682             } else {
    683 0 0       0 print "a_tweak can't apply to ",
    684             $a->attr('id'), ": [$a_name] ne [$text]\n"
    685             if $Debug > 1;
    686             # hack can't apply
    687             }
    688             }
    689            
    690 1         41 return;
    691             }
    692              
    693             sub p_unnest {
    694 1     1 0 2 my $tree = $_[0];
    695             # Now, p's can't nest in HTML, but once we've spliced out and remapped
    696             # things, we can end up with p's containing p's in our parse tree:
    697             #
    Foo
    Bar
    baz
    698             # =

    Foo

    Bar

    Baz

    699 1         4 foreach my $p (reverse $tree->find_by_tag_name('p')) {
    700 0 0       0 if($p->parent->tag eq 'p') {
    701 0         0 my @c = $p->detach_content;
    702 0         0 $p->replace_with(
    703             HTML::Element->new( 'br',
    704             'id', '``G' . ++$counter),
    705             @c,
    706             HTML::Element->new( 'br',
    707             'id', '``G' . ++$counter),
    708             );
    709             }
    710             }
    711             }
    712              
    713             #==========================================================================
    714              
    715             sub delete_unknowns {
    716 1     1 0 3 my $tree = $_[0];
    717 1         3 my $map_r = $tree->tagname_map;
    718 1         104 delete @$map_r{keys %HTML::Tagset::isKnown};
    719 1         8 my($tag, $elements);
    720 1         6 while(($tag,$elements) = each %$map_r) {
    721 0         0 commentate($tree, join ", ",
    722             "# Unknown \"$tag\" elements deleted: ",
    723             map $_->attr('id'), @$elements
    724             );
    725 0         0 foreach my $e (@$elements) { $e->replace_with_content }
      0         0  
    726             }
    727 1         3 return;
    728             }
    729              
    730             #==========================================================================
    731             sub special_splice_div {
    732 1     1 0 6 foreach my $div ($_[0]->find_by_tag_name('div', 'iframe')) {
    733 0         0 $div->replace_with(
    734             HTML::Element->new( 'br',
    735             'id', '``G' . ++$counter),
    736             $div->content_list(),
    737             HTML::Element->new( 'br',
    738             'id', '``G' . ++$counter),
    739             );
    740             }
    741 1         36 return;
    742             }
    743              
    744             #==========================================================================
    745              
    746             sub winge_about_phrasal_paradoxes {
    747 1     1 0 3 my $tree = $_[0];
    748 1         1 my @non_phrasal_children;
    749 1         12 foreach my $p (reverse $tree->find_by_tag_name(keys %Phrasal)) {
    750 1         238 @non_phrasal_children = ();
    751 1         5 foreach my $c ($p->content_list) {
    752 0 0 0     0 push @non_phrasal_children, $c
    753             if ref $c and not $Phrasal{$c->tag};
    754             }
    755 1 50       56 if(@non_phrasal_children) {
    756 0         0 my $tag = $p->tag;
    757 0         0 commentate( $tree,
    758             join '',
    759             " Deleting phrasal \"$tag\" element (",
    760             $p->attr('id'),
    761             ") because it has super-phrasal elements (",
    762             join(", ",
    763             map $_->attr('id'), @non_phrasal_children
    764             ), ") as children.",
    765             )
    766             ;
    767 0         0 $p->replace_with_content;
    768             }
    769             }
    770 1         5 return;
    771             }
    772              
    773             #==========================================================================
    774              
    775             sub commentate {
    776 0     0 0 0 my $tree = shift;
    777 0         0 push @{ $tree->attr('_pod_comments') }, @_;
      0         0  
    778 0         0 return;
    779             }
    780              
    781             #==========================================================================
    782              
    783             sub html_node_name {
    784 1     1 0 8 my $map_r = $_[0]->tagname_map;
    785              
    786 1         28 my($name, $nodes);
    787 1         11 while(($name, $nodes) = each %$map_r) {
    788 2         34 my $counter = 0;
    789 2         5 foreach my $node (@$nodes) {
    790 2         3 ++$counter;
    791 2   33     14 $node->attr('id',
    792             $node->attr('id') || ( '`' . $name . '_' . $counter )
    793             )
    794             ;
    795             }
    796             }
    797              
    798 1         27 return;
    799             }
    800              
    801             sub pod_node_name {
    802 1     1 0 4 my $map_r = $_[0]->tagname_map;
    803              
    804 1         34 my($name, $nodes);
    805 1         6 while(($name, $nodes) = each %$map_r) {
    806 4         46 my $counter = 0;
    807 4         7 foreach my $node (@$nodes) {
    808 4         4 ++$counter;
    809 4         21 $node->attr('-pod-id',
    810             $name . '_' . $counter
    811             )
    812             ;
    813             }
    814             }
    815              
    816 1         16 return;
    817             }
    818              
    819             #==========================================================================
    820              
    821             sub render_headings {
    822 1     1 0 2 my $tree = $_[0];
    823 1         4 my $map_r = $tree->tagname_map;
    824 1         47 my @levels = sort grep m/^h[1-9]+$/s, keys %$map_r;
    825 1         3 my @headings;
    826              
    827 1 50       4 if(@levels == 0) { # no headings!?!
    828             # TODO: insert something?
    829             } else {
    830 1 50       4 print "# Highest heading level: $levels[0] Making that =head1\n"
    831             if $Debug;
    832 1         2 foreach my $h (@{$map_r->{shift @levels}}) {
      1         3  
    833 1         1 push @headings, $h;
    834 1         4 $h->attr('was-tag', $h->tag);
    835 1         26 $h->attr('_tag', 'h1');
    836             }
    837             # And, for any sub-primary levels...
    838 1 0 33     15 print "# Lower levels: @levels. Making those =head2\n"
    839             if @levels and $Debug;
    840 1         3 foreach my $h (map @{$map_r->{$_}}, @levels) {
      0         0  
    841 0         0 push @headings, $h;
    842 0         0 $h->attr('was-tag', $h->tag);
    843 0         0 $h->attr('_tag', 'h2');
    844             }
    845             }
    846              
    847 1         3 foreach my $h (@headings) {
    848 1 50       9 if($h->parent->is_inside('h1', 'h2')) {
    849             # Don't put headings inside other headings. It's just stupid.
    850 0         0 $h->replace_with_content;
    851 0         0 undef($h);
    852             }
    853             }
    854              
    855 1         33 foreach my $h (grep defined($_), @headings) {
    856 1         4 my @c = $h->content_list;
    857 1 50       10 if(!@c) {
        50          
    858 0         0 $h->delete;
    859             } elsif($c[0]->tag ne '~texticle') {
    860 0         0 $h->replace_with_content;
    861             # Don't have things other than texticles in headings
    862             } else {
    863 1 50       10 if(@c > 1) {
    864             # promote all but the first element
    865 0         0 $h->detach_content;
    866 0         0 $h->push_content(shift @c);
    867 0         0 $h->postinsert(@c);
    868             # SHOULD HAVE HAPPENED ANYWAY.
    869             }
    870             # else @c is just one element, a texticle -- which is ideal.
    871 1 50       3 commentate($tree,
    872             "# Icky: heading " . $h->attr('id')
    873             . " not immediately under body."
    874             ) unless $h->parent eq $tree;
    875             }
    876             }
    877              
    878 1         13 return;
    879             }
    880              
    881             #--------------------------------------------------------------------------
    882              
    883             sub goodify_p_elements {
    884 1     1 0 5 foreach my $x ($_[0], $_[0]->find_by_tag_name('over', 'item')) {
    885 1         44 my $dirty;
    886 1         4 my @children = $x->content_list;
    887            
    888 1         9 for(my $i = 0; $i < @children; ++$i) {
    889 1 50       3 if($children[$i]->tag eq 'p') {
    890 0         0 my $p = $children[$i];
    891 0         0 my @p_content = $p->detach_content;
    892 0         0 $p->delete;
    893 0         0 $dirty = 1;
    894              
    895             # Replace the p in the list with its content, and update $i:
    896 0         0 splice @children, $i, 1, @p_content;
    897 0         0 $i += scalar(@p_content) - 1;
    898             # Properly,
    899             # Leaves $i alone if @p_content == 1.
    900             # Decrements $i if @p_content == 0.
    901             # Adds to $i appropriately for other sizes of @p_content.
    902             }
    903             }
    904            
    905 1 50       12 if($dirty) {
    906 0         0 $x->detach_content;
    907 0         0 $x->push_content(@children);
    908             }
    909             }
    910              
    911 1         2 my @c;
    912             # /Try/ to delete all p's
    913 1         4 foreach my $p ($_[0]->find_by_tag_name('p')) {
    914 0         0 @c = $p->content_list;
    915 0 0       0 if(!@c) {
        0          
    916 0         0 $p->delete; # always right?
    917              
    918 0         0 } elsif(@c == grep {; $_->tag eq '~texticle'} @c) {
    919             #all texticles!
    920 0         0 $p->replace_with_content;
    921             } else {
    922 0         0 print
    923             "# Odd: content of p (",
    924             $p->attr('id'),
    925             ") is not all texticles: [",
    926             join(' ', map $_->tag, @c), "]\n"
    927             ;
    928             # Shouldn't happen, I think.
    929             }
    930             }
    931              
    932 1         40 return;
    933             }
    934              
    935             #--------------------------------------------------------------------------
    936              
    937             sub promote_some_secondary_children {
    938 1     1 0 9 foreach my $x (reverse($_[0]->find_by_tag_name('item', 'h1' .. 'h6'))) {
    939 1         66 my @c = $x->content_list;
    940 1 50       20 if(@c > 1) {
    941             # Take all children after the first, and move them up to
    942             # being right sisters of this node.
    943 0 0       0 print
    944             "# Promote_some_secondary_children applies to ",
    945             $x->attr('id'),
    946             ": (",
    947             join(", ", map $_->attr('id'), @c), ")\n" if $Debug;
    948 0         0 $x->detach_content;
    949 0         0 $x->push_content(shift @c);
    950 0         0 $x->postinsert(@c);
    951             #print "Done\n" if $Debug;
    952             }
    953             }
    954             #print "Returning\n" if $Debug;
    955 1         23 return;
    956             }
    957              
    958             sub literalize_text_under {
    959             # Traverse tree, turning text segments into ~literal pseudoelements
    960 2     2 0 4 my $node = $_[0];
    961 2         4 my(@children, $dirty);
    962 2         9 foreach my $c (@children = $node->content_list) {
    963 2 100       16 if(ref $c) {
    964 1         14 literalize_text_under($c);
    965             } else {
    966 1         2 $dirty = 1;
    967 1         15 $c = HTML::Element->new('~literal', 'text' => $c,
    968             'id', '``G' . ++$counter);
    969             }
    970             }
    971 2 100       49 if($dirty) {
    972 1         5 $node->detach_content;
    973 1         13 $node->push_content(@children);
    974             }
    975 2         18 return;
    976             }
    977              
    978             #--------------------------------------------------------------------------
    979              
    980             sub texticulate {
    981             # group ~literals and phrasals into texticles
    982             # -- maximally high-and-merged phrasal/text groups
    983 3     3 0 5 my $node = $_[0];
    984 3         4 my $dirty;
    985 3         9 my(@children) = $node->content_list;
    986              
    987             #foreach my $c (@children) {
    988             # texticulate($c);
    989             #}
    990              
    991             #print "Applying to $node = ", $node->tag, "\n";
    992              
    993 3 100       19 if(! $Phrasal{$node->tag}) {
    994             # Only non-phrasals can have texticles as children!
    995 2         14 my $last_tag;
    996 2         8 for(my $i = 0; $i < @children; $i++) {
    997 2         9 texticulate($children[$i]); # RECURSE!
    998 2 100       18 next unless $Phrasal{$children[$i]->tag};
    999            
    1000 1 50 33     18 if($i == 0
        0          
    1001             or
    1002             !$Phrasal{
    1003             $last_tag = $children[$i - 1]->tag
    1004             }
    1005             ) {
    1006             # start a new texticle group
    1007 1         2 $dirty = 1;
    1008 1         2 my $old = $children[$i];
    1009 1         7 $children[$i] = HTML::Element->new('~texticle',
    1010             'id', '``G' . ++$counter);
    1011 1         59 $children[$i]->push_content($old); # and demote the phrasal to under it
    1012             } elsif($last_tag eq '~texticle') {
    1013             # move this under preceding texticle
    1014 0         0 $dirty = 1;
    1015 0         0 $children[$i - 1]->push_content( splice @children, $i, 1 );
    1016 0         0 --$i;
    1017             } else {
    1018 0         0 die "SPORK 1231233312!";
    1019             }
    1020             }
    1021            
    1022             #if(0) {
    1023             # foreach my $c (@children) {
    1024             # # Now fold the texticular content up
    1025             # if($c->tag eq '~texticle') {
    1026             # $c->attr('~folded' => [$c->detach_content]);
    1027             # }
    1028             # }
    1029             #}
    1030             }
    1031              
    1032             # Now delete all br's!
    1033             # (Would it be better to delete BRs only adjacent to a texticle?)
    1034 3         49 for(my $i = 0; $i < @children; $i++) {
    1035 2 50       7 if($children[$i]->tag eq 'br') {
    1036 0         0 splice @children, $i, 1;
    1037 0         0 --$i;
    1038 0         0 $dirty = 1;
    1039             }
    1040             }
    1041             # So, the only purpose/effect of BRs is that they serve as barriers
    1042             # to unifying adjacent phrasal elements under a common texticle.
    1043             # Once we've unified things, we just delete them from the tree.
    1044              
    1045 3 100       23 if($dirty) {
    1046 1         3 $node->detach_content;
    1047 1         8 $node->push_content(@children);
    1048             }
    1049             }
    1050              
    1051             #==========================================================================
    1052              
    1053             sub remap_tags {
    1054 1     1 0 3 my($tree, $hr) = @_;
    1055 1 50 33     11 die unless $hr and ref($hr) eq 'HASH';
    1056 1         133 my($recursor, $tag);
    1057             $recursor = sub {
    1058 2     2   10 foreach my $c ($_[0]->content_list) {
    1059 2 100       67 if(ref $c) {
    1060 1 50 33     4 if(($tag = $c->tag) and defined $tag and exists $hr->{$tag}) {
          33        
    1061 0         0 $c->attr('_tag', $hr->{$tag});
    1062             }
    1063 1         19 $recursor->($c); # recurse!
    1064             }
    1065             }
    1066 2         31 return;
    1067 1         10 };
    1068            
    1069 1         3 $recursor->($tree); # Run the recursion.
    1070            
    1071 1         2 undef $recursor; # So the lambda's refcount can hit 0, and can GC.
    1072 1         6 return;
    1073             }
    1074              
    1075             #--------------------------------------------------------------------------
    1076              
    1077             sub wrangle_body_children {
    1078 0     0 0 0 my $tree = $_[0];
    1079 0         0 my @children = $tree->content_list;
    1080 0         0 my $dirty = 0;
    1081              
    1082 0         0 my $c;
    1083 0         0 $tree->normalize_content; # NB: doesn't recurse
    1084              
    1085 0         0 for(my $i = 0; $i < @children; ++$i) {
    1086 0         0 my $c = $children[$i];
    1087 0 0       0 if(!ref($c)) {
    1088             # put under a new p
    1089 0         0 $dirty = 1;
    1090             (
    1091 0         0 $children[$i] = HTML::Element->new('p', 'superimplicit' => 1,
    1092             'id', '``G' . ++$counter
    1093             )
    1094             )->push_content($c);
    1095             #} elsif($c->tag eq 'hr') {
    1096             # # do anything special?
    1097             }
    1098             }
    1099              
    1100 0 0       0 if($dirty) {
    1101 0         0 $tree->detach_content;
    1102 0         0 $tree->push_content(@children);
    1103             }
    1104              
    1105 0         0 return;
    1106             }
    1107              
    1108             #--------------------------------------------------------------------------
    1109              
    1110             sub lists_render { # Recursive.
    1111 2     2 0 19 my $node = $_[0];
    1112 2         4 my $tag;
    1113 2 50 33     6 if(($tag = $node->tag) eq 'ul' or $tag eq 'menu') {
        50          
        50          
        50          
    1114 0         0 $node->attr('was-tag', $tag);
    1115 0         0 $node->attr('_tag', 'over');
    1116 0         0 foreach my $c ($node->content_list) {
    1117 0 0 0     0 next unless ref($c) and $c->tag eq 'li';
    1118 0         0 $c->attr('_tag', 'item');
    1119 0         0 $c->unshift_content('* ');
    1120             # TODO: support bullet types other than this?
    1121             }
    1122              
    1123             } elsif($tag eq 'ol') {
    1124 0         0 $node->attr('was-tag', $tag);
    1125 0         0 $node->attr('_tag', 'over');
    1126 0         0 my $x = 0;
    1127 0         0 foreach my $c ($node->content_list) {
    1128 0 0 0     0 next unless ref($c) and $c->tag eq 'li';
    1129 0         0 $c->attr('_tag', 'item');
    1130 0         0 $c->unshift_content(++$x . '. ');
    1131             # TODO: support number styles other than this?
    1132             }
    1133              
    1134             } elsif($tag eq 'dl') {
    1135 0         0 $node->attr('was-tag', $tag);
    1136 0         0 $node->attr('_tag', 'over');
    1137 0         0 my $tag;
    1138 0         0 foreach my $c ($node->content_list) {
    1139 0 0       0 next unless ref($c);
    1140 0 0       0 if(($tag = $c->tag) eq 'dt') {
        0          
    1141 0         0 $c->attr('was-tag', $tag);
    1142 0         0 $c->attr('_tag', 'item');
    1143             } elsif($tag eq 'dd') {
    1144 0         0 $c->attr('was-tag', $tag);
    1145 0         0 $c->attr('_tag', 'item');
    1146             # Altho really, earlier on, we will have turned all dd's into p's!
    1147             # This code is here just in case we decide that that wasn't
    1148             # such a hot idea.
    1149             # Instead of turning dd's into items, consider replacing with
    1150             # content, with a br on each side? Or too late for that?
    1151             }
    1152             # else just moooove along
    1153             }
    1154              
    1155             } elsif($tag eq 'blockquote') { # not really a list, but hey.
    1156 0         0 $node->attr('was-tag', $tag);
    1157 0         0 $node->attr('_tag', 'over');
    1158             }
    1159              
    1160             # In any case, recurse...
    1161 2         36 foreach my $c ($node->content_list) {
    1162 2 100       19 lists_render($c) if ref $c;
    1163             }
    1164             }
    1165              
    1166             #--------------------------------------------------------------------------
    1167              
    1168             sub br_render {
    1169             # render BRs.
    1170              
    1171             # TODO: anything necessary?
    1172              
    1173 1     1 0 2 return;
    1174             }
    1175              
    1176              
    1177             sub hr_render {
    1178 1     1 0 2 my $tree = $_[0];
    1179 1         2 my $alt;
    1180 1         4 foreach my $hr ($tree->find_by_tag_name('hr')) {
    1181 0 0       0 if($hr->parent->tag eq 'body') {
    1182             # Special sauce. SPECIAL SAUCE!
    1183 0         0 $hr->attr('_tag', 'p');
    1184 0         0 $hr->attr('was-tag', 'hr');
    1185 0         0 $hr->push_content('----');
    1186             } else {
    1187 0         0 $hr->replace_with(
    1188             $hr->new('br', 'was-tag' => 'hr', 'id' => '``G' . ++$counter),
    1189             '----',
    1190             $hr->new('br', 'was-tag' => 'hr', 'id' => '``G' . ++$counter),
    1191             );
    1192             }
    1193             }
    1194 1         29 return;
    1195             }
    1196              
    1197              
    1198             sub pre_render {
    1199 1     1 0 2 my $tree = $_[0];
    1200 1         4 foreach my $p ($tree->find_by_tag_name('pre')) {
    1201             # Delete left or right ignorable WS nodes...
    1202             {
    1203 0         0 my $left = $p->left;
      0         0  
    1204             #print "Left of $p is $left\n";
    1205 0 0 0     0 if(defined $left and !ref $left and $left =~ m<^\s*$>s) {
          0        
    1206             # all nil or WS.
    1207             #print "Delendum left at", $p->attr('id') || $p->address, "!\n";
    1208 0         0 $p->parent->splice_content($p->pindex - 1, 1); # delete preceding WS.
    1209             }
    1210             }
    1211             {
    1212 0         0 my $right = $p->right;
      0         0  
    1213 0 0 0     0 if(defined $right and !ref $right and $right =~ m<^\s*$>s) {
          0        
    1214             # all nil or WS.
    1215             #print "Delendum right at", $p->attr('id') || $p->address, "!\n";
    1216 0         0 $p->parent->splice_content($p->pindex + 1, 1); # delete following WS.
    1217             }
    1218             }
    1219              
    1220             # Now acually render, simply...
    1221 0         0 my $text_content = $p->as_text;
    1222 0 0       0 unless($text_content =~ m/\S+/) {
    1223 0         0 $p->delete;
    1224 0         0 next;
    1225             }
    1226            
    1227 0         0 $text_content =~ s/^\n+//s; # Kill leading newlines
    1228 0         0 $text_content =~ s/\n+$//s; # Kill trailing newlines
    1229              
    1230 0         0 my $left = $p->left;
    1231 0 0 0     0 if($left and ref($left) and $left->tag eq 'pre') {
          0        
    1232             # prepend to the immediately preceding pre's content
    1233 0         0 ${
    1234 0         0 $left->attr('~pre_content_r')
    1235             } .= "\n" . $text_content;
    1236 0         0 $p->delete;
    1237             } else {
    1238 0         0 $p->delete_content;
    1239 0         0 $p->attr('~pre_content_r', \$text_content);
    1240             #print "Pre content [[",$text_content,"]]\n";
    1241             }
    1242             }
    1243 1         26 return;
    1244             }
    1245              
    1246             sub q_render {
    1247 1     1 0 2 my $tree = $_[0];
    1248 1         4 foreach my $q ($tree->find_by_tag_name('q')) {
    1249 0         0 $q->push_content('"');
    1250 0         0 $q->unshift_content('"');
    1251 0         0 $q->replace_with_content;
    1252             }
    1253 1         25 return;
    1254             }
    1255              
    1256             sub images_render {
    1257 1     1 0 13 my $tree = $_[0];
    1258 1         15 foreach my $img ($tree->find_by_tag_name('img')) {
    1259 0         0 my $alt;
    1260 0 0       0 if(defined($alt = $img->attr('alt'))) {
    1261 0         0 $img->replace_with($alt);
    1262             } else {
    1263 0 0       0 $img->replace_with(
    1264             $Debug ?
    1265             ('[IMAGE' . $img->attr('id') . ']') :
    1266             '[IMAGE]'
    1267             );
    1268             #?? $img->delete;
    1269             }
    1270             }
    1271 1         27 return;
    1272             }
    1273              
    1274             #--------------------------------------------------------------------------
    1275              
    1276             sub prune_by_tag_name {
    1277 1     1 0 2 my($tree, @o) = @_;
    1278 1         3 foreach my $o (@o) {
    1279 2 50       82 foreach my $x ($tree->find_by_tag_name(ref $o ? @$o : $o)) {
    1280 0         0 $x->delete;
    1281             }
    1282             }
    1283 1         102 return;
    1284             }
    1285              
    1286             sub splice_by_tag_name {
    1287 1     1 0 3 my($tree, @o) = @_;
    1288 1         3 foreach my $o (@o) {
    1289 1 50       7 foreach my $x ($tree->find_by_tag_name(ref $o ? @$o : $o)) {
    1290 0         0 $x->replace_with_content;
    1291             }
    1292             }
    1293 1         144 return;
    1294             }
    1295              
    1296             #--------------------------------------------------------------------------
    1297             sub tree_as_pod {
    1298 1     1 0 2 my $tree = $_[0];
    1299              
    1300 1         2 my @lines;
    1301 1         3 my $comments = $tree->attr('_pod_comments');
    1302              
    1303 1         11 my $bender;
    1304              
    1305             $bender = sub {
    1306 3     3   6 my(@post, $node);
    1307 3         9 my $tag = ($node = $_[0])->tag;
    1308              
    1309 3 100       35 if($tag eq 'body') {
        50          
        50          
        50          
        100          
        50          
        50          
    1310             # no-op
    1311             } elsif($tag eq 'pre') {
    1312 0         0 push @lines, ${$node->attr('~pre_content_r')};
      0         0  
    1313 0 0       0 $lines[-1] =~ s/^/ /gm if $lines[-1] =~ m/^\S/m;
    1314             # bump everything over if there's any lines that start with
    1315             # anything non-spaceys
    1316 0         0 while($lines[-1] =~ s/\n\n/\n \n/) { }
    1317             # have there be no zero-length lines.
    1318             } elsif($tag eq 'over') {
    1319 0         0 push @lines, "=over";
    1320 0         0 push @post, "=back";
    1321             } elsif($tag eq 'item') {
    1322 0         0 push @lines, "=item";
    1323             } elsif($tag eq 'h1') {
    1324 1         13 push @lines, "=head1";
    1325             } elsif($tag eq 'h2') {
    1326 0         0 push @lines, "=head2";
    1327             } elsif($tag eq '~texticle') {
    1328 1         4 my $text = render_texticle($tree,$node);
    1329 1         4 $text =~ s/^\s+//s;
    1330 1         3 $text =~ s/\s+$//s;
    1331 1         2 $text =~ s/^=/E<61>/s;
    1332             # So that this can't be mistaken for a directive -- on the
    1333             # off chance that text content starts with a '='
    1334            
    1335             #$text = "{$text}";
    1336            
    1337 1 50 33     25 if(
          33        
          33        
    1338             @lines and
    1339             $lines[-1] =~ m/^=(\w{1,10})$/s and
    1340             ( $1 eq 'item' or $1 eq 'head1' or $1 eq 'head2' )
    1341             ) {
    1342             # Merge this text with the directive:
    1343 1         3 $text = pop(@lines) . ' ' . $text;
    1344             }
    1345            
    1346 1         5 push @lines, wrap72_dammit($text);
    1347 1         4 $lines[-1] =~ s/\s+$//s; # Make REALLY sure there's no tailing WS
    1348 1 50       4 pop @lines unless length $lines[-1]; # Sanity check.
    1349            
    1350 1         4 return;
    1351             # Don't recurse under texticles (because nothing should be there!)
    1352             } else {
    1353 0 0       0 print "unrenderable element \"$tag\" in phrasal-pass\n" if $Debug;
    1354             }
    1355            
    1356 2         6 foreach my $c ($node->content_list) {
    1357 2         23 $bender->($c);
    1358             }
    1359            
    1360 2         4 push @lines, @post;
    1361 2         5 return;
    1362 1         7 };
    1363 1         3 $bender->($tree);
    1364 1         1 undef $bender;
    1365              
    1366 1 50 33     24 unshift @lines, "=pod" unless @lines and $lines[0] =~ m<^=>s;
    1367              
    1368 1         3 push @lines, "=cut\n\n"; # get extra double-newline at end
    1369              
    1370 1         4 my $pod = join "\n\n", @lines;
    1371              
    1372 1 50 33     11 if($comments and @$comments) {
    1373 1         2 foreach my $c (@$comments) {
    1374 5         11 $c =~ tr<\cm\cj>< >s;
    1375 5 100       22 $c = "#" . $c unless $c =~ m<^\s*#>s;
    1376             }
    1377 1         5 $pod .= join "\n", @$comments, '';
    1378             }
    1379              
    1380 1 50       20 sleep(0), print("#Start pod\n\n$pod\n"), sleep(0) if $Debug > 1;
    1381 1         5 return \$pod;
    1382             }
    1383              
    1384             #--------------------------------------------------------------------------
    1385             sub render_texticle {
    1386 1     1 0 3 my($tree, $t) = @_;
    1387 1         2 my $text = '';
    1388 1         2 my $bender;
    1389            
    1390 1         3 my $a_name = $tree->attr('_a_name');
    1391 1         17 my $a_href = $tree->attr('_a_href');
    1392            
    1393 1         9 my $under_l_count = 0;
    1394             $bender = sub {
    1395 2     2   7 my $tag = (my $node = $_[0])->tag;
    1396 2         11 my $post = '>';
    1397 2         3 my $decr_under_l_count_post = 0;
    1398 2 100       7 if($tag eq '~texticle') {
        50          
        0          
        0          
        0          
        0          
    1399             # no-op -- just a container
    1400 1         2 $post = '';
    1401             } elsif($tag eq '~literal') {
    1402 1         4 my $content = $node->attr('text');
    1403             #print "Text from ~literal : ", $node->attr('text'), "\n";
    1404 1 50       45 $content =~ s/\Q$nbsp/ /og if defined $nbsp;
    1405             # Kill nbsps. Why?
    1406             # First off, most of them are lame editor artifacts.
    1407             # Second off, actually treating them correctly (with S<...>)
    1408             # would be a real pain.
    1409              
    1410 1 50       4 if($under_l_count) {
    1411 0         0 encode_entities_harder($content);
    1412             } else {
    1413 1         3 encode_entities($content);
    1414             }
    1415             #if(defined $E_slash) {
    1416             # # Delete at least most of the optional E's
    1417             # while( $content =~ s{^([^<>]*)\Q$E_slash\E}{$1/}so ) {}
    1418             # while( $content =~ s{\Q$E_slash\E([^<>]*)$}{/$1}so ) {}
    1419             #}
    1420             #if(defined $E_vbar) {
    1421             # # Delete at least most of the optional E's
    1422             # while( $content =~ s{^([^<>]*)\Q$E_vbar\E}{$1|}so ) {}
    1423             # while( $content =~ s{\Q$E_vbar\E([^<>]*)$}{|$1}so ) {}
    1424             #}
    1425 1 50       4 print "\$text is undef?" unless defined $content;
    1426 1         2 $text .= $content;
    1427 1         2 $post = '';
    1428             } elsif($tag eq 'code') {
    1429 0         0 $text .= 'C<';
    1430             } elsif($tag eq 'i') {
    1431 0         0 $text .= 'I<';
    1432             } elsif($tag eq 'b') {
    1433 0         0 $text .= 'B<';
    1434             } elsif($tag eq 'a') {
    1435 0         0 my($name, $href);
    1436 0 0       0 $name = $a_name ? $node->attr('name') : undef;
    1437 0 0       0 $href = $a_href ? $node->attr('href') : undef;
    1438 0         0 $post = '';
    1439              
    1440 0 0 0     0 if(defined $name and length $name) {
    1441 0         0 $text .= 'X<' . $name . '>';
    1442             }
    1443              
    1444 0 0 0     0 if(defined $href and length $href) {
    1445 0         0 encode_entities($href);
    1446             #print "{Link text:{$href}}\n";
    1447 0 0       0 if($href =~ s/^#//s) {
        0          
        0          
    1448             # internal relative href
    1449 0         0 $text .= 'L<';
    1450 0         0 $post .= "|/$href>";
    1451 0         0 ++$under_l_count;
    1452 0         0 $decr_under_l_count_post = 1;
    1453             } elsif($href =~ s/^pod://s) {
    1454             # Pass that thru.
    1455             # A back door for making straightforward pod links.
    1456 0         0 $text .= 'L<';
    1457 0         0 $post .= "|$href>";
    1458 0         0 ++$under_l_count;
    1459 0         0 $decr_under_l_count_post = 1;
    1460             } elsif($href =~ m<^[-+.a-z0-9A-Z]+\:[^:]>s) {
    1461             # It matches RFC 1738's idea of an absolute URL.
    1462             # Pass it thru: the podulator should detect that it's a URL
    1463             # and handle appropriately.
    1464 0         0 $post .= " ($href)";
    1465             } else {
    1466             # a relative link??
    1467 0         0 $href = $href;
    1468 0         0 commentate($t->root, "# Untranslatable link: \"$href\"");
    1469             }
    1470             }
    1471             } else {
    1472 0         0 print "Unrenderable sub-phrasal element $tag: ignoring\n";
    1473 0         0 $post = '';
    1474             }
    1475              
    1476             # Recurse!
    1477 2         8 foreach my $c ($node->content_list) {
    1478 1         24 $bender->($c);
    1479             }
    1480            
    1481             # Now, post-order things:
    1482            
    1483 2         9 $text .= $post;
    1484 2 50       6 $under_l_count-- if $decr_under_l_count_post;
    1485 2         5 return;
    1486 1         8 };
    1487 1         3 $bender->($t);
    1488 1         2 undef $bender;
    1489              
    1490 1         21 $text =~ s/\s+/ /g;
    1491              
    1492             # A weensy bit of cleanup:
    1493 1         3 $text =~ s/ ?> ?$/>/s;
    1494 1         3 $text =~ s/^((?:\w<)+) ([^>])/$1$2/;
    1495              
    1496             #print "{$text}\n";
    1497              
    1498 1         3 return $text;
    1499             }
    1500              
    1501             #--------------------------------------------------------------------------
    1502             sub COLMAX () {72}
    1503              
    1504             sub wrap72_dammit {
    1505             # All because Text::Wrap::wrap DIES when it hits an unwrappably
    1506             # large text chunk, DAMMIT.
    1507              
    1508             # So this is a stupid wrapper: knows nothing about tabs or anything.
    1509 1     1 0 3 my $text = '';
    1510 1         2 my $col = 0;
    1511 1         12 foreach my $w (split /\s+/, $_[0]) {
    1512 2 50       7 next unless length $w;
    1513 2 50       8 if(length($w) >= COLMAX) {
        50          
    1514             # Unwrappably large chunk.
    1515 0 0       0 if($col) {
    1516 0         0 $text .= "\n$w\n";
    1517             } else {
    1518 0         0 $text .= "$w\n";
    1519             }
    1520 0         0 $col = 0;
    1521             } elsif ((1 + $col + length $w) < COLMAX) {
    1522             # The word will fit on /this/ line
    1523 2 100       5 if($col) {
    1524 1         4 $text .= " $w";
    1525 1         3 $col += 1 + length $w;
    1526             } else {
    1527 1         3 $text .= $w ;
    1528 1         2 $col += length $w;
    1529             }
    1530             } else {
    1531             # Start a new line
    1532 0 0       0 if($col) {
    1533 0         0 $text .= "\n$w";
    1534             } else {
    1535 0         0 $text .= $w; # never applies?
    1536             }
    1537 0         0 $col = length $w;
    1538             }
    1539             }
    1540 1         4 $text =~ s/\n+$//s; # nix and trailing newlines
    1541              
    1542 1         3 return $text;
    1543             }
    1544              
    1545              
    1546             #==========================================================================
    1547             # Adapted from Gisle Aas's HTML::Entities::encode_entities:
    1548              
    1549             sub encode_entities {
    1550 1     1 0 4 $_[0] =~ s/([^\n\t !-;=?-~])/$Char2ent{$1}/g;
    1551             # Encode control chars, high bit chars and '<' and '>'
    1552 1         3 return;
    1553             }
    1554              
    1555             sub encode_entities_harder {
    1556 4     4 0 31 $_[0] =~ s/([^\n\t !\#\$%\'-.0-=?-{}~])/$Char2ent{$1}/g;
    1557             # Encode control chars, high bit chars and '<', '&', '>', '"',
    1558             # '|', '/'
    1559 4         8 return;
    1560             }
    1561              
    1562             #--------------------------------------------------------------------------
    1563              
    1564             __END__