File Coverage

blib/lib/Pod/PXML.pm
Criterion Covered Total %
statement 19 278 6.8
branch 1 172 0.5
condition 0 42 0.0
subroutine 7 19 36.8
pod 2 7 28.5
total 29 518 5.6


line stmt bran cond sub pod time code
1              
2             #Time-stamp: "2004-12-29 18:34:27 AST"
3             #TODO: for xml2pod:
4             # Make utf8/Latin-1 an option (default utf8?)
5             # Make E<>ification an option (default to all)
6             # Option for whether to delete highbit things in codeblocks (default: no?)
7             #TODO: for pod2xml:
8             # Option: choice of XML encoding (Latin-1 or UTF-8)
9             # Option: whether to represent things as literals, or as numeric entities.
10             # (and whether to use decimal entities, or hex??)
11              
12             require 5;
13             package Pod::PXML;
14 2     2   14321 use strict;
  2         5  
  2         91  
15 2         386 use vars qw($VERSION $XMLNS %Char2podent %Char2xmlent
16             $LATIN_1 $XML_VALIDATE $LINK_TEXT_INFER $FUSE_ADJACENT_PRES
17             $HIGH_BIT_OK
18 2     2   10 );
  2         3  
19             $XMLNS = 'http://www.perl.com/CPAN/authors/id/S/SB/SBURKE/pxml_0.01.dtd';
20             $VERSION = '0.12';
21             # I'm going to try to keep the major version numbers in the DTD and the
22             # module in synch. I dunno about the fractional part, tho.
23             $LATIN_1 = 1;
24             $XML_VALIDATE = 1;
25             $HIGH_BIT_OK = 0;
26              
27             $LINK_TEXT_INFER = 0;
28              
29             $FUSE_ADJACENT_PRES = 1;
30             # Whether to make " foo\n\n bar" as a single PRE,
31             # as if it were from " foo\n \n bar\n\n"
32             # TODO: set to 1
33              
34 2 50   2   72 BEGIN { *DEBUG = sub () {0} unless defined &DEBUG }
35              
36             my $nil = [];
37              
38 2     2   20 use Carp;
  2         3  
  2         138  
39 2     2   2023 use utf8;
  2         20  
  2         11  
40              
41             # POD entities are just HTML entities plus verbar and sol
42             #------------------------------------------------------------------------
43              
44             # Fill out Char2podent, Char2xmlent.
45             {
46 2     2   1689 use HTML::Entities ();
  2         13813  
  2         7573  
47             die "\%HTML::Entities::char2entity is empty?"
48             unless keys %HTML::Entities::char2entity;
49            
50             my($c,$e);
51             while(($c,$e) = each(%HTML::Entities::char2entity)) {
52             if($e =~ m{^&#(\d+);$}s) {
53             $Char2podent{ord $c} = "E<$1>";
54             #print "num $e => E<$1>\n";
55             # { => E<123>
56             # $Char2xmlent{ord $c} = $e;
57             } elsif($e =~ m{^&([^;]+);$}s) {
58             $Char2podent{ord $c} = "E<$1>";
59             #print "eng $e => E<$1>\n";
60             # é => E
61             # $Char2xmlent{ord $c} = $e;
62             } else {
63             warn "Unknown thingy in %HTML::Entities::char2entity: $c => $e"
64             # if $^W;
65             }
66             }
67            
68             # Points of difference between HTML entities and POD entities:
69            
70             $Char2podent{ord "\xA0"} = "E<160>"; # there is no E
71            
72             $Char2podent{ord "\xAB"} = "E";
73             $Char2podent{ord "\xBB"} = "E";
74             # Altho new POD processors also know E and E
75            
76             # Old POD processors don't know these two -- so leave numeric
77             # $Char2podent{ord '/'} = 'E';
78             # $Char2podent{ord '|'} = 'E';
79            
80             # And a few that we have to make completely sure are present.
81             $Char2xmlent{ord '"'} = '"' ;
82             $Char2xmlent{ord '<'} = '<' ;
83             $Char2xmlent{ord '>'} = '>' ;
84             $Char2podent{ord '<'} = 'E' ;
85             $Char2podent{ord '>'} = 'E' ;
86             }
87              
88             #print STDERR "Sanity: 214 is ", $Char2podent{214}, "\n";
89              
90             #------------------------------------------------------------------------
91              
92             sub pod2xml ($) {
93 0     0 1   require Pod::Tree;
94            
95 0           my $content = $_[0];
96              
97 0           my $tree = Pod::Tree->new;
98 0 0         if(ref($content) eq 'SCALAR') {
99 0           $tree->load_string($$content);
100             } else {
101 0           $tree->load_file($content);
102             }
103 0 0         unless($tree->loaded) { croak("Couldn't load pod") }
  0            
104 0           return _pod_tree_as_xml($tree);
105             }
106              
107             #------------------------------------------------------------------------
108             # Real work:
109              
110             sub _pod_tree_as_xml {
111 0     0     my $root = $_[0]->get_root;
112 0           DEBUG > 2 and print "TREE DUMP: <<\n", $_[0]->dump, ">>\n\n";
113            
114 0 0         return "\n\n" unless $root;
115 0           my $out = '';
116              
117 0           my $trav;
118             my $x; # scratch
119             $trav = sub {
120 0     0     my $it = $_[0];
121 0           my $type = $it->get_type;
122 0           my $post = '';
123 0           DEBUG and print "Hitting $type\n";
124 0 0         if($type eq 'root') {
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
125 0           $out .= join "\n",
126             qq{},
127             qq{
128             qq{ "$XMLNS">},
129             qq{},
130             "",
131             '',
132             '',
133             ;
134              
135 0           $post = "\n"; # harmless newline, I figure.
136            
137             } elsif($type eq 'for') {
138 0           $out .= "get_arg) . "\">";
139 0           $out .= xml_escape_maybe_cdata($it->get_text);
140 0           $out .= "\n\n";
141 0           return;
142            
143             } elsif($type eq 'sequence') {
144 0           $type = lc($it->get_letter);
145 0           DEBUG and print "Sequence type \"$type\"\n";
146 0 0         if($type eq 'e') {
    0          
147             # An unresolved entity.
148 0           $x = $it->get_children;
149 0 0 0       if($x and @$x ==1 and $x->[0]->get_type eq 'text') {
      0        
150 0           $x = $x->[0]->get_text;
151 0 0         die "Impossible entity name \"$x\"" if $x =~ m/[ \t<>]/s;
152             # minimal sanity
153 0           $out .= '&' . $x . ';';
154             } else {
155             # $out .= '&WHAT;';
156 0           die "Aberrant E<..> content \"", $it->get_deep_text, "\"";
157             }
158 0           return;
159             } elsif($type eq 'l') {
160             # At time of writing, Pod::Tree is less than sterling in its
161             # treatment of L<...> sequences.
162              
163             #use Data::Dumper;
164             #print "LINK DUMP: {{\n", Dumper($it), "}}\n";
165            
166             # Some special treatment...
167 0   0       my $target = $it->get_target || die 'targetless link?';
168            
169 0           my($page, $section);
170 0           $out .= "
171 0           $page = xml_attr_escape( $target->get_page );
172 0 0         $out .= " page=\"$page\"" if length $page;
173 0           $section = xml_attr_escape( $target->get_section );
174 0 0         $out .= " section=\"$section\"" if length $section;
175 0           $out .= ">";
176            
177             #if(!$LINK_TEXT_INFER and not(($x = $target->get_children) and @$x)) {
178 0 0 0       unless(($x = $target->get_children) and @$x) {
179             # There was no gloss (i.e., the bit after the "|").
180 0 0         if(! $LINK_TEXT_INFER) {
181             # subvert the normal processing of children of this sequence.
182 0           $out .= "";
183 0           return;
184             } else {
185             # Infer the text instead.
186 0           my $ch;
187 0 0 0       if(($ch = $it->get_children) and @$ch == 1
      0        
188             and $ch->[0]->get_type eq 'text'
189             ) {
190             # So this /is/ just some text bit that Pod::Tree implicated.
191              
192             # To replicate Pod::Text's inscrutible weirdness as
193             # best we can, for sake of continuity if not actual
194             # good sense or clarity.
195              
196             # The moral of the story is to always have L !!!
197              
198 0           $x = '';
199 0 0         if (!length $section) {
    0          
200 0 0         $x = "the $page manpage" if length $page;
201             } elsif ($section =~ m/^[:\w]+(?:\(\))?/) {
202 0           $x .= "the $section entry";
203 0 0         $x .= (length $page) ? " in the $page manpage"
204             : " elsewhere in this document";
205             } else {
206 0           $section =~ s/^\"\s*//;
207 0           $section =~ s/\s*\"$//;
208 0           $x .= 'the section on "' . $section . '"';
209 0 0         $x .= " in the $page manpage" if length $page;
210             }
211 0           $out .= "$x";
212 0           return; # subvert the usual processing.
213             }
214             # Else it's complicated and scary. Fall thru.
215             }
216             }
217 0           $post = '';
218            
219             } else {
220             # Unknown sequence. Ahwell, pass thru.
221 0           $out .= "<$type>";
222 0           $post = "";
223             }
224             } elsif($type eq 'list') {
225 0           $x = xml_attr_escape($it->get_arg);
226 0 0         $out .= length($x) ? "\n\n" : "\n\n";
227              
228             # used to have:
229             # sprintf "\n\n",
230             # xml_attr_escape($it->get_list_type),
231             # xml_attr_escape($it->get_arg) ;
232              
233 0           $post = "\n\n";
234              
235             } elsif($type eq 'ordinary') {
236 0           $out .= "

";

237 0           $post = "

\n\n";
238              
239             } elsif($type eq 'command') {
240 0           $x = $it->get_command();
241 0 0         if($x =~ m/^head[1234]$/is) {
242 0           $x = lc($x);
243 0           $out .= "<$x>";
244 0           $post = "\n\n";
245             } else {
246 0           die "Unknown POD command \"$x\"";
247             }
248            
249             } elsif($type eq 'item') {
250             # Needs special recursion!
251 0           $out .= '';
252             # used to have: sprintf '',
253             # xml_attr_escape($it->get_item_type);
254              
255             # Recurse for the item's children:
256 0 0         foreach my $c (@{ $it->get_children || $nil }) { $trav->($c) }
  0            
  0            
257 0           $out .= "\n\n";
258              
259             # Then recurse for the bastards further down...
260            
261             } elsif($type eq 'verbatim') {
262 0 0 0       ( $FUSE_ADJACENT_PRES and $out =~ s/<\/pre>\n\n$//s )
263             or $out .= "
"; 
264             # possibly combine adjacent verbatims into a single 'pre'
265 0           $out .= xml_escape_maybe_cdata("\n" . $it->get_text . "\n");
266 0 0         $out =~ s/]]>$/s;
267             # combining adjacent CDATA sections is nice, and always harmless
268 0           $out .= "\n\n";
269 0           return;
270            
271             } elsif($type eq 'text') {
272 0           $out .= xml_escape($it->get_text);
273 0           return;
274            
275             } else {
276 0           $out .= "\n\n";
277 0           return;
278             }
279              
280 0 0         foreach my $c (@{ # Recurse...
  0 0          
281             (($type eq 'item') ? $it->get_siblings() : $it->get_children())
282             || $nil
283 0           }) { $trav->($c) }
284              
285 0           $out .= $post;
286 0           return;
287 0           };
288 0           $trav->($root);
289 0           undef $trav; # break cyclicity
290 0           print "\n\n" if DEBUG;
291              
292 0           sanitize_newlines($out);
293              
294 0           return $out;
295             }
296              
297             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
298              
299             sub xml_escape_maybe_cdata { # not destructive
300 0     0 0   my $x;
301 0 0         $x = '' unless defined($x = $_[0]);
302 0 0 0       if($x =~ m/[&<>]/ and not $x =~ m/[^\x00-\x80]/) {
303             # CDATA only if uses those [&<>], and does not use anything highbit.
304 0           $x =~ s/]]>/]]>]]>
305 0           $x = "";
306             } else {
307             # Otherwise escape things.
308 0           $x =~ s/&/&/g;
309 0           $x =~ s/
310 0           $x =~ s/>/>/g;
311            
312             #$x =~ s/([^\x00-\x7E])/$Char2xmlent{ord $1} or "&#".ord($1).";"/eg;
313 0 0         $x =~ s/([^\x00-\x7E])/"&#".ord($1).";"/eg unless $HIGH_BIT_OK;
  0            
314            
315             # Why care about highbittyness? Even tho we're declaring this content
316             # to be in UTF8, might as well entitify what we can.
317             }
318 0           return $x;
319             }
320              
321             sub xml_escape { # not destructive
322 0     0 0   my $x;
323 0 0         return '' unless defined($x = $_[0]);
324 0 0         if($HIGH_BIT_OK) {
325 0 0         $x =~ s/([&<>])/$Char2xmlent{ord $1} or "&#".ord($1).";"/eg;
  0            
326             # Encode '&', and '<' and '>'
327             } else {
328 0 0         $x =~ s/([^\cm\cj\f\t !-%'-;=?-~])/$Char2xmlent{ord $1} or "&#".ord($1).";"/eg;
  0            
329             # Encode control chars, high bit chars, '&', and '<' and '>'
330             }
331 0           return $x;
332             }
333              
334             sub xml_attr_escape { # not destructive
335 0     0 0   my $x;
336 0 0         return '' unless defined($x = $_[0]);
337              
338 0 0         if($HIGH_BIT_OK) {
339 0 0         $x =~ s/([&<>"])/$Char2xmlent{ord $1} or "&#".ord($1).";"/eg;
  0            
340             # Encode '&', '"', and '<' and '>'
341             } else {
342 0 0         $x =~ s/([^\cm\cj\f\t !\#-\%'-;=?-~])/$Char2xmlent{ord $1} or "&#".ord($1).";"/eg;
  0            
343             # Encode control chars, high bit chars, '"', '&', and '<' and '>'
344             }
345 0           return $x;
346             }
347              
348             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
349             sub sanitize_newlines { # DESTRUCTIVE
350 0     0 0   if("\n" eq "\cm") {
351             $_[0] =~ s/\cm?\cj/\n/g; # turn \cj and \cm\cj into \n
352             } elsif("\n" eq "\cj") {
353 0           $_[0] =~ s/\cm\cj/\n/g; # turn \cm and \cm\cj into \n
354             } else {
355             $_[0] =~ s/(?:(?:\cm?\cj)|\cm)/\n/g;
356             # turn \cm\cj, \cj, or \cm into \n
357             }
358 0           return;
359             }
360              
361             ###########################################################################
362             ###########################################################################
363              
364 2     2   22 use vars qw(%Acceptable_children);
  2         5  
  2         5855  
365             {
366             # This just recapitulates what's in the DTD:
367             my $style = {map{;$_,1} qw(b i c x f s link)};
368             my $pstyle = {'#PCDATA',1, %$style};
369             my $pcdata = {'#PCDATA',1};
370             %Acceptable_children = (
371             'pod' => {map{;$_,1} qw(head1 head2 head3 head4 p pre list for)},
372             map(($_=>$pstyle), qw(head1 head2 head3 head4 p)),
373             'pre' => $pcdata,
374             'list' => {map{;$_,1} qw(item p pre list for)},
375             'item' => $pstyle,
376             'for' => $pcdata,
377             map(($_=>$pstyle), qw(link b i c f x s)),
378             );
379             }
380              
381             sub xml2pod ($) {
382 0     0 1   my $content = $_[0];
383 0           require XML::Parser;
384            
385 0           my $out;
386 0           my($gi, %attr, $text, $cm_set); # scratch
387            
388 0           my(@stack);
389 0           my @paragraph_stack;
390             # pop/pushed only by paragraph-containing elements, and link
391 0           my @for_stack; # kept by 'for' elements
392 0           my @link_stack; # kept by 'link' elements
393             my $xml = XML::Parser->new( 'Handlers' => {
394              
395             ##
396             ##
397             ## On the way in...
398            
399             'Start' => sub {
400 0     0     (undef, $gi, %attr) = @_;
401 0           push @stack, $gi;
402 0           DEBUG > 1 and print ' ', join('.', @stack), "+\n";
403            
404 0 0         if($XML_VALIDATE) {
405 0 0         if(@stack < 2) {
    0          
406 0 0         unless($gi eq 'pod') {
407             # I think XML::Parser would catch this, but anyway.
408 0           die "Can't have a childless \"$gi\" element, in $content";
409             }
410             } elsif(defined($cm_set = $Acceptable_children{$stack[-2]})) {
411 0 0         die "Can't have a \"$gi\" in a \"$stack[-2]\", in $content (stack @stack)"
412             unless $cm_set->{$gi};
413             } else {
414 0           die "Unknown element \"$gi\"";
415             }
416             # TODO: attribute validation!
417             }
418            
419 0 0 0       if($gi =~ m/^[bicxfs]$/s) {
    0          
    0          
    0          
    0          
    0          
    0          
    0          
420 0           $paragraph_stack[-1] .= "\U$gi<";
421             } elsif($gi eq 'p' or $gi eq 'pre') {
422 0           push @paragraph_stack, '';
423             } elsif($gi eq 'for') {
424 0   0       $text = $attr{'target'} || '????';
425 0           push @for_stack, $text;
426 0           push @paragraph_stack, '';
427             } elsif($gi eq 'list') {
428 0           $text = $attr{'indent'};
429 0 0 0       $out .= (defined($text) && length($text))
430             ? "=over $text\n\n" : "=over\n\n";
431             } elsif($gi eq 'item') {
432 0           $out .= '=item ';
433 0           push @paragraph_stack, '';
434             } elsif($gi =~ m/^head[1234]$/s) {
435 0           push @paragraph_stack, '=' . $gi . ' ';
436             } elsif($gi eq 'link') { # a hack
437 0           push @link_stack, [$attr{'page'}, $attr{'section'}];
438 0           push @paragraph_stack, '';
439             } elsif($gi eq 'pod') {
440 0   0       my $text = $attr{'xmlns'} || $XMLNS;
441 0 0         die "pod has a foreign namespace: \"$text\" instead of \"$XMLNS\""
442             unless $text eq $XMLNS;
443             } else {
444 0           DEBUG and print "Opening unknown element \"$gi\"\n";
445             }
446 0           return;
447             },
448              
449             ##
450             ##
451             ## And on the way out...
452              
453             'End' => sub {
454 0     0     $gi = $_[1];
455 0           DEBUG > 1 and print ' ', join('.', @stack), "-\n";
456 0 0         die "INSANE! Stack mismatch! $text ne $gi"
457             unless $gi eq ($text = pop @stack);
458              
459 0 0         if($gi =~ m/^[bicxfs]$/s) {
    0          
    0          
    0          
    0          
    0          
    0          
    0          
    0          
460 0           $paragraph_stack[-1] .= ">";
461             } elsif($gi eq 'p') {
462             # A paragraph must start with non-WS, non-=, and must contain
463             # no \n\n's until its very end.
464            
465 0           $text = pop @paragraph_stack;
466 0           $text =~ s/^(\s)/Z<>$1/s; # make sure we're NOT indented
467 0           $text =~ s/^=/Z<>=/s; # make sure we're NOT =-initial
468 0           $text =~ s/\n+$//s; # nix terminal newlines!
469 0           $text =~ s/\n(?=\n)/\n /g; # separate double-newlines
470 0 0         unless(length $text) {
471 0           DEBUG and print "Odd, null p-paragraph\n";
472 0           return;
473             }
474            
475             # These don't beautify /everything/ beautifiable, but they try.
476 0           while($text =~ s/([^a-zA-Z<])E/$1
  0            
477             # Turn E's that obviously don't need escaping, back into <'s
478 0           while($text =~ s/^([^<]*)E/$1>/) {1}
  0            
479             # Turn obviously harmless E's back into ">"'s.
480            
481 0           $text .= "\n\n";
482 0           $out .= $text;
483             } elsif($gi eq 'pre') {
484             # A verbatim paragraph must start with WS, and must contain
485             # no \n\n's until its very end.
486              
487 0           $text = pop @paragraph_stack;
488 0           $text =~ s/^\n+//s; # nix leading strictly-blank lines
489 0           $text =~ s/^(\S)/ \n$1/s; # make sure we ARE indented
490             # that means we don't have to make sure we don't start with a '='
491 0           $text =~ s/\n+$//s; # nix terminal newlines!
492 0           $text =~ s/\n(?=\n)/\n /g; # separate double-newlines
493             #$text =~ tr/\0-\xFF//CU if $LATIN_1; # since we can't E<..> things
494 0 0         unless(length $text) {
495 0           DEBUG and print "Odd, null pre-paragraph\n";
496 0           return;
497             }
498 0           $text .= "\n\n";
499 0           $out .= $text;
500              
501             } elsif($gi eq 'for') {
502 0           my $kind = pop @for_stack;
503 0           $text = "\n\n=begin $kind\n\n" . pop @paragraph_stack;
504 0           $text =~ s/\n+$//s; # nix terminal newlines!
505 0           $text =~ s/\n(?=\n)/\n /g; # separate double-newlines
506 0           $text .= "\n\n=end $kind\n\n";
507 0           $out .= $text;
508              
509             } elsif($gi eq 'list') {
510 0           $out .= "=back\n\n";
511              
512             } elsif($gi eq 'item') {
513 0           $text = pop @paragraph_stack;
514 0           $text =~ s/^\s*//s; # kill leading space
515 0           $text =~ s/\n+$//s; # nix terminal newlines!
516 0           $text =~ s/\n(?=\n)/\n /g; # separate double-newlines
517 0           $text .= "\n\n";
518              
519             # These don't beautify /everything/ beautifiable, but they try.
520 0           while($text =~ s/([^a-zA-Z<])E/$1
  0            
521             # Turn E's that obviously don't need escaping, back into <'s
522 0           while($text =~ s/^([^<]*)E/$1>/) {1}
  0            
523             # Turn obviously harmless E's back into ">"'s.
524              
525 0           $out .= $text;
526              
527             } elsif($gi =~ m/^head[1234]$/s) {
528 0           $text = pop @paragraph_stack;
529 0           $text =~ s/^(\s)/Z<>$1/s; # make sure we're NOT (visibly) indented
530 0           $text =~ s/\n+$//s; # nix terminal newlines!
531 0           $text =~ s/\n(?=\n)/\n /g; # nix any double-newlines
532 0           $text .= "\n\n";
533              
534             # These don't beautify /everything/ beautifiable, but they try.
535 0           while($text =~ s/([^a-zA-Z<])E/$1
  0            
536             # Turn E's that obviously don't need escaping, back into <'s
537 0           while($text =~ s/^([^<]*)E/$1>/) {1}
  0            
538             # Turn obviously harmless E's back into ">"'s.
539              
540 0           $out .= $text;
541            
542             } elsif($gi eq 'link') { # a hack
543 0           $text = pop @paragraph_stack;
544             # "Text cannot contain the characters '/' and '|'"
545 0           $text =~ s/\|/E<124>/g; # AKA verbar
546 0           $text =~ s{/}{E<47>}g; # AKA sol
547 0           $text =~ s/\n(?=\n)/\n /g;
548             # nix any double-newlines, just for good measure
549 0 0         $text .= '|' if length $text;
550            
551 0           my($xref, $section) = @{pop @link_stack};
  0            
552 0 0         $xref = '' unless defined $xref; # "" means 'in this document'
553 0 0         $section = '' unless defined $section;
554              
555 0           $xref = pod_escape($xref);
556 0           $xref =~ s{/}{E<47>}g;
557 0 0         $section = pod_escape("/\"$section\"") if length $section;
558            
559 0 0 0       $section = '/"???"' unless length $xref or length $section;
560             # signals aberrant input!
561            
562 0           $paragraph_stack[-1] .= "L<$text$xref$section>";
563              
564             } elsif($gi eq 'pod') {
565             # no-op
566             } else {
567 0           DEBUG and print "Closing unknown element \"$gi\"\n";
568             }
569 0           return;
570             },
571            
572             ##
573             ##
574             ## Character data! MATANGA!!!
575             'Char' => sub {
576 0     0     shift;
577 0 0 0       return unless defined $_[0] and length $_[0]; # sanity
578              
579 0 0         if(!@stack) {
580 0 0         die "Non-WS text on empty stack: \"$_[0]\""
581             unless $_[0] =~ m/^\s+$/s;
582             } else {
583 0 0 0       if(($Acceptable_children{$stack[-1]}
584             || die "Putting text under unknown element \"$stack[-1]\""
585             )->{'#PCDATA'}) {
586             # This is the only case where we can add:
587 0 0         die "\@paragraph_stack is empty? (stack: @stack)"
588             unless @paragraph_stack;
589 0 0         if($stack[-1] eq 'pre') {
590 0           $paragraph_stack[-1] .= $_[0];
591             } else {
592 0           $paragraph_stack[-1] .= pod_escape($_[0]);
593             }
594             } else {
595             # doesn't allow PCDATA
596 0 0         die "Can't have non-WS text in a \"$stack[-1]\""
597             unless $_[0] =~ m/^\s+$/s;
598             # Else it's just ignorable whitespace.
599             }
600             }
601              
602 0           return;
603             },
604            
605             # 'Comment' => sub { },
606             # 'Proc' => sub { },
607             # 'Attlist' => sub { },
608             # 'Element' => sub { },
609             # 'Doctype' => sub { },
610 0           });
611              
612             # Now actually process...
613 0           $out = "";
614 0 0         if(ref($content) eq 'SCALAR') {
615 0           $xml->parse($$content);
616             } else {
617 0           $xml->parsefile($content);
618             }
619            
620 0           $out =~ s/^([^=])/=pod\n\n$1/;
621             # make sure that we start with a =-thingie, one way or another.
622            
623 0           $out .= "=cut\n\n";
624              
625 0           sanitize_newlines($out);
626 0           return $out;
627             }
628              
629             # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
630              
631             {
632             my %e = ('<' => 'E', '>' => 'E' );
633             sub pod_escape {
634             #print STDERR "IN: <$_[0]>\n";
635 0     0 0   my $it = $_[0];
636 0 0         $it =~ s/([^\cm\cj\f\t !-;=?-~])/$Char2podent{ord $1} or "E<".ord($1).">"/eg;
  0            
637             # Encode control chars, high bit chars and '<' and '>'
638             #print STDERR "OUT: <$_[0]>\n\n";
639 0           return $it;
640             }
641             }
642              
643             ###########################################################################
644             ###########################################################################
645             1;
646