line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
|
2
|
|
|
|
|
|
|
#Time-stamp: "2004-12-29 18:34:27 AST" |
3
|
|
|
|
|
|
|
#TODO: for xml2pod: |
4
|
|
|
|
|
|
|
# Make utf8/Latin-1 an option (default utf8?) |
5
|
|
|
|
|
|
|
# Make E<>ification an option (default to all) |
6
|
|
|
|
|
|
|
# Option for whether to delete highbit things in codeblocks (default: no?) |
7
|
|
|
|
|
|
|
#TODO: for pod2xml: |
8
|
|
|
|
|
|
|
# Option: choice of XML encoding (Latin-1 or UTF-8) |
9
|
|
|
|
|
|
|
# Option: whether to represent things as literals, or as numeric entities. |
10
|
|
|
|
|
|
|
# (and whether to use decimal entities, or hex??) |
11
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
require 5; |
13
|
|
|
|
|
|
|
package Pod::PXML; |
14
|
2
|
|
|
2
|
|
14321
|
use strict; |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
91
|
|
15
|
2
|
|
|
|
|
386
|
use vars qw($VERSION $XMLNS %Char2podent %Char2xmlent |
16
|
|
|
|
|
|
|
$LATIN_1 $XML_VALIDATE $LINK_TEXT_INFER $FUSE_ADJACENT_PRES |
17
|
|
|
|
|
|
|
$HIGH_BIT_OK |
18
|
2
|
|
|
2
|
|
10
|
); |
|
2
|
|
|
|
|
3
|
|
19
|
|
|
|
|
|
|
$XMLNS = 'http://www.perl.com/CPAN/authors/id/S/SB/SBURKE/pxml_0.01.dtd'; |
20
|
|
|
|
|
|
|
$VERSION = '0.12'; |
21
|
|
|
|
|
|
|
# I'm going to try to keep the major version numbers in the DTD and the |
22
|
|
|
|
|
|
|
# module in synch. I dunno about the fractional part, tho. |
23
|
|
|
|
|
|
|
$LATIN_1 = 1; |
24
|
|
|
|
|
|
|
$XML_VALIDATE = 1; |
25
|
|
|
|
|
|
|
$HIGH_BIT_OK = 0; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
$LINK_TEXT_INFER = 0; |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
$FUSE_ADJACENT_PRES = 1; |
30
|
|
|
|
|
|
|
# Whether to make " foo\n\n bar" as a single PRE, |
31
|
|
|
|
|
|
|
# as if it were from " foo\n \n bar\n\n" |
32
|
|
|
|
|
|
|
# TODO: set to 1 |
33
|
|
|
|
|
|
|
|
34
|
2
|
50
|
|
2
|
|
72
|
BEGIN { *DEBUG = sub () {0} unless defined &DEBUG } |
35
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
my $nil = []; |
37
|
|
|
|
|
|
|
|
38
|
2
|
|
|
2
|
|
20
|
use Carp; |
|
2
|
|
|
|
|
3
|
|
|
2
|
|
|
|
|
138
|
|
39
|
2
|
|
|
2
|
|
2023
|
use utf8; |
|
2
|
|
|
|
|
20
|
|
|
2
|
|
|
|
|
11
|
|
40
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
# POD entities are just HTML entities plus verbar and sol |
42
|
|
|
|
|
|
|
#------------------------------------------------------------------------ |
43
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# Fill out Char2podent, Char2xmlent. |
45
|
|
|
|
|
|
|
{ |
46
|
2
|
|
|
2
|
|
1689
|
use HTML::Entities (); |
|
2
|
|
|
|
|
13813
|
|
|
2
|
|
|
|
|
7573
|
|
47
|
|
|
|
|
|
|
die "\%HTML::Entities::char2entity is empty?" |
48
|
|
|
|
|
|
|
unless keys %HTML::Entities::char2entity; |
49
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
my($c,$e); |
51
|
|
|
|
|
|
|
while(($c,$e) = each(%HTML::Entities::char2entity)) { |
52
|
|
|
|
|
|
|
if($e =~ m{^(\d+);$}s) { |
53
|
|
|
|
|
|
|
$Char2podent{ord $c} = "E<$1>"; |
54
|
|
|
|
|
|
|
#print "num $e => E<$1>\n"; |
55
|
|
|
|
|
|
|
# { => E<123> |
56
|
|
|
|
|
|
|
# $Char2xmlent{ord $c} = $e; |
57
|
|
|
|
|
|
|
} elsif($e =~ m{^&([^;]+);$}s) { |
58
|
|
|
|
|
|
|
$Char2podent{ord $c} = "E<$1>"; |
59
|
|
|
|
|
|
|
#print "eng $e => E<$1>\n"; |
60
|
|
|
|
|
|
|
# é => E |
61
|
|
|
|
|
|
|
# $Char2xmlent{ord $c} = $e; |
62
|
|
|
|
|
|
|
} else { |
63
|
|
|
|
|
|
|
warn "Unknown thingy in %HTML::Entities::char2entity: $c => $e" |
64
|
|
|
|
|
|
|
# if $^W; |
65
|
|
|
|
|
|
|
} |
66
|
|
|
|
|
|
|
} |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
# Points of difference between HTML entities and POD entities: |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
$Char2podent{ord "\xA0"} = "E<160>"; # there is no E |
71
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
$Char2podent{ord "\xAB"} = "E"; |
73
|
|
|
|
|
|
|
$Char2podent{ord "\xBB"} = "E"; |
74
|
|
|
|
|
|
|
# Altho new POD processors also know E and E |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# Old POD processors don't know these two -- so leave numeric |
77
|
|
|
|
|
|
|
# $Char2podent{ord '/'} = 'E'; |
78
|
|
|
|
|
|
|
# $Char2podent{ord '|'} = 'E'; |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
# And a few that we have to make completely sure are present. |
81
|
|
|
|
|
|
|
$Char2xmlent{ord '"'} = '"' ; |
82
|
|
|
|
|
|
|
$Char2xmlent{ord '<'} = '<' ; |
83
|
|
|
|
|
|
|
$Char2xmlent{ord '>'} = '>' ; |
84
|
|
|
|
|
|
|
$Char2podent{ord '<'} = 'E' ; |
85
|
|
|
|
|
|
|
$Char2podent{ord '>'} = 'E' ; |
86
|
|
|
|
|
|
|
} |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
#print STDERR "Sanity: 214 is ", $Char2podent{214}, "\n"; |
89
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
#------------------------------------------------------------------------ |
91
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub pod2xml ($) { |
93
|
0
|
|
|
0
|
1
|
|
require Pod::Tree; |
94
|
|
|
|
|
|
|
|
95
|
0
|
|
|
|
|
|
my $content = $_[0]; |
96
|
|
|
|
|
|
|
|
97
|
0
|
|
|
|
|
|
my $tree = Pod::Tree->new; |
98
|
0
|
0
|
|
|
|
|
if(ref($content) eq 'SCALAR') { |
99
|
0
|
|
|
|
|
|
$tree->load_string($$content); |
100
|
|
|
|
|
|
|
} else { |
101
|
0
|
|
|
|
|
|
$tree->load_file($content); |
102
|
|
|
|
|
|
|
} |
103
|
0
|
0
|
|
|
|
|
unless($tree->loaded) { croak("Couldn't load pod") } |
|
0
|
|
|
|
|
|
|
104
|
0
|
|
|
|
|
|
return _pod_tree_as_xml($tree); |
105
|
|
|
|
|
|
|
} |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
#------------------------------------------------------------------------ |
108
|
|
|
|
|
|
|
# Real work: |
109
|
|
|
|
|
|
|
|
110
|
|
|
|
|
|
|
sub _pod_tree_as_xml { |
111
|
0
|
|
|
0
|
|
|
my $root = $_[0]->get_root; |
112
|
0
|
|
|
|
|
|
DEBUG > 2 and print "TREE DUMP: <<\n", $_[0]->dump, ">>\n\n"; |
113
|
|
|
|
|
|
|
|
114
|
0
|
0
|
|
|
|
|
return "\n\n" unless $root; |
115
|
0
|
|
|
|
|
|
my $out = ''; |
116
|
|
|
|
|
|
|
|
117
|
0
|
|
|
|
|
|
my $trav; |
118
|
|
|
|
|
|
|
my $x; # scratch |
119
|
|
|
|
|
|
|
$trav = sub { |
120
|
0
|
|
|
0
|
|
|
my $it = $_[0]; |
121
|
0
|
|
|
|
|
|
my $type = $it->get_type; |
122
|
0
|
|
|
|
|
|
my $post = ''; |
123
|
0
|
|
|
|
|
|
DEBUG and print "Hitting $type\n"; |
124
|
0
|
0
|
|
|
|
|
if($type eq 'root') { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
125
|
0
|
|
|
|
|
|
$out .= join "\n", |
126
|
|
|
|
|
|
|
qq{}, |
127
|
|
|
|
|
|
|
qq{
|
128
|
|
|
|
|
|
|
qq{ "$XMLNS">}, |
129
|
|
|
|
|
|
|
qq{}, |
130
|
|
|
|
|
|
|
"", |
131
|
|
|
|
|
|
|
'', |
132
|
|
|
|
|
|
|
'', |
133
|
|
|
|
|
|
|
; |
134
|
|
|
|
|
|
|
|
135
|
0
|
|
|
|
|
|
$post = "\n"; # harmless newline, I figure. |
136
|
|
|
|
|
|
|
|
137
|
|
|
|
|
|
|
} elsif($type eq 'for') { |
138
|
0
|
|
|
|
|
|
$out .= "get_arg) . "\">"; |
139
|
0
|
|
|
|
|
|
$out .= xml_escape_maybe_cdata($it->get_text); |
140
|
0
|
|
|
|
|
|
$out .= "\n\n"; |
141
|
0
|
|
|
|
|
|
return; |
142
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
} elsif($type eq 'sequence') { |
144
|
0
|
|
|
|
|
|
$type = lc($it->get_letter); |
145
|
0
|
|
|
|
|
|
DEBUG and print "Sequence type \"$type\"\n"; |
146
|
0
|
0
|
|
|
|
|
if($type eq 'e') { |
|
|
0
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# An unresolved entity. |
148
|
0
|
|
|
|
|
|
$x = $it->get_children; |
149
|
0
|
0
|
0
|
|
|
|
if($x and @$x ==1 and $x->[0]->get_type eq 'text') { |
|
|
|
0
|
|
|
|
|
150
|
0
|
|
|
|
|
|
$x = $x->[0]->get_text; |
151
|
0
|
0
|
|
|
|
|
die "Impossible entity name \"$x\"" if $x =~ m/[ \t<>]/s; |
152
|
|
|
|
|
|
|
# minimal sanity |
153
|
0
|
|
|
|
|
|
$out .= '&' . $x . ';'; |
154
|
|
|
|
|
|
|
} else { |
155
|
|
|
|
|
|
|
# $out .= '&WHAT;'; |
156
|
0
|
|
|
|
|
|
die "Aberrant E<..> content \"", $it->get_deep_text, "\""; |
157
|
|
|
|
|
|
|
} |
158
|
0
|
|
|
|
|
|
return; |
159
|
|
|
|
|
|
|
} elsif($type eq 'l') { |
160
|
|
|
|
|
|
|
# At time of writing, Pod::Tree is less than sterling in its |
161
|
|
|
|
|
|
|
# treatment of L<...> sequences. |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
#use Data::Dumper; |
164
|
|
|
|
|
|
|
#print "LINK DUMP: {{\n", Dumper($it), "}}\n"; |
165
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
# Some special treatment... |
167
|
0
|
|
0
|
|
|
|
my $target = $it->get_target || die 'targetless link?'; |
168
|
|
|
|
|
|
|
|
169
|
0
|
|
|
|
|
|
my($page, $section); |
170
|
0
|
|
|
|
|
|
$out .= "
|
171
|
0
|
|
|
|
|
|
$page = xml_attr_escape( $target->get_page ); |
172
|
0
|
0
|
|
|
|
|
$out .= " page=\"$page\"" if length $page; |
173
|
0
|
|
|
|
|
|
$section = xml_attr_escape( $target->get_section ); |
174
|
0
|
0
|
|
|
|
|
$out .= " section=\"$section\"" if length $section; |
175
|
0
|
|
|
|
|
|
$out .= ">"; |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
#if(!$LINK_TEXT_INFER and not(($x = $target->get_children) and @$x)) { |
178
|
0
|
0
|
0
|
|
|
|
unless(($x = $target->get_children) and @$x) { |
179
|
|
|
|
|
|
|
# There was no gloss (i.e., the bit after the "|"). |
180
|
0
|
0
|
|
|
|
|
if(! $LINK_TEXT_INFER) { |
181
|
|
|
|
|
|
|
# subvert the normal processing of children of this sequence. |
182
|
0
|
|
|
|
|
|
$out .= ""; |
183
|
0
|
|
|
|
|
|
return; |
184
|
|
|
|
|
|
|
} else { |
185
|
|
|
|
|
|
|
# Infer the text instead. |
186
|
0
|
|
|
|
|
|
my $ch; |
187
|
0
|
0
|
0
|
|
|
|
if(($ch = $it->get_children) and @$ch == 1 |
|
|
|
0
|
|
|
|
|
188
|
|
|
|
|
|
|
and $ch->[0]->get_type eq 'text' |
189
|
|
|
|
|
|
|
) { |
190
|
|
|
|
|
|
|
# So this /is/ just some text bit that Pod::Tree implicated. |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
# To replicate Pod::Text's inscrutible weirdness as |
193
|
|
|
|
|
|
|
# best we can, for sake of continuity if not actual |
194
|
|
|
|
|
|
|
# good sense or clarity. |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
# The moral of the story is to always have L !!! |
197
|
|
|
|
|
|
|
|
198
|
0
|
|
|
|
|
|
$x = ''; |
199
|
0
|
0
|
|
|
|
|
if (!length $section) { |
|
|
0
|
|
|
|
|
|
200
|
0
|
0
|
|
|
|
|
$x = "the $page manpage" if length $page; |
201
|
|
|
|
|
|
|
} elsif ($section =~ m/^[:\w]+(?:\(\))?/) { |
202
|
0
|
|
|
|
|
|
$x .= "the $section entry"; |
203
|
0
|
0
|
|
|
|
|
$x .= (length $page) ? " in the $page manpage" |
204
|
|
|
|
|
|
|
: " elsewhere in this document"; |
205
|
|
|
|
|
|
|
} else { |
206
|
0
|
|
|
|
|
|
$section =~ s/^\"\s*//; |
207
|
0
|
|
|
|
|
|
$section =~ s/\s*\"$//; |
208
|
0
|
|
|
|
|
|
$x .= 'the section on "' . $section . '"'; |
209
|
0
|
0
|
|
|
|
|
$x .= " in the $page manpage" if length $page; |
210
|
|
|
|
|
|
|
} |
211
|
0
|
|
|
|
|
|
$out .= "$x"; |
212
|
0
|
|
|
|
|
|
return; # subvert the usual processing. |
213
|
|
|
|
|
|
|
} |
214
|
|
|
|
|
|
|
# Else it's complicated and scary. Fall thru. |
215
|
|
|
|
|
|
|
} |
216
|
|
|
|
|
|
|
} |
217
|
0
|
|
|
|
|
|
$post = ''; |
218
|
|
|
|
|
|
|
|
219
|
|
|
|
|
|
|
} else { |
220
|
|
|
|
|
|
|
# Unknown sequence. Ahwell, pass thru. |
221
|
0
|
|
|
|
|
|
$out .= "<$type>"; |
222
|
0
|
|
|
|
|
|
$post = "$type>"; |
223
|
|
|
|
|
|
|
} |
224
|
|
|
|
|
|
|
} elsif($type eq 'list') { |
225
|
0
|
|
|
|
|
|
$x = xml_attr_escape($it->get_arg); |
226
|
0
|
0
|
|
|
|
|
$out .= length($x) ? "\n\n" : "\n\n"; |
227
|
|
|
|
|
|
|
|
228
|
|
|
|
|
|
|
# used to have: |
229
|
|
|
|
|
|
|
# sprintf "\n\n", |
230
|
|
|
|
|
|
|
# xml_attr_escape($it->get_list_type), |
231
|
|
|
|
|
|
|
# xml_attr_escape($it->get_arg) ; |
232
|
|
|
|
|
|
|
|
233
|
0
|
|
|
|
|
|
$post = "\n\n"; |
234
|
|
|
|
|
|
|
|
235
|
|
|
|
|
|
|
} elsif($type eq 'ordinary') { |
236
|
0
|
|
|
|
|
|
$out .= " "; |
237
|
0
|
|
|
|
|
|
$post = "\n\n"; |
238
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
} elsif($type eq 'command') { |
240
|
0
|
|
|
|
|
|
$x = $it->get_command(); |
241
|
0
|
0
|
|
|
|
|
if($x =~ m/^head[1234]$/is) { |
242
|
0
|
|
|
|
|
|
$x = lc($x); |
243
|
0
|
|
|
|
|
|
$out .= "<$x>"; |
244
|
0
|
|
|
|
|
|
$post = "$x>\n\n"; |
245
|
|
|
|
|
|
|
} else { |
246
|
0
|
|
|
|
|
|
die "Unknown POD command \"$x\""; |
247
|
|
|
|
|
|
|
} |
248
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
} elsif($type eq 'item') { |
250
|
|
|
|
|
|
|
# Needs special recursion! |
251
|
0
|
|
|
|
|
|
$out .= '- ';
|
252
|
|
|
|
|
|
|
# used to have: sprintf '- ',
|
253
|
|
|
|
|
|
|
# xml_attr_escape($it->get_item_type); |
254
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
# Recurse for the item's children: |
256
|
0
|
0
|
|
|
|
|
foreach my $c (@{ $it->get_children || $nil }) { $trav->($c) } |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
257
|
0
|
|
|
|
|
|
$out .= "\n\n"; |
258
|
|
|
|
|
|
|
|
259
|
|
|
|
|
|
|
# Then recurse for the bastards further down... |
260
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
} elsif($type eq 'verbatim') { |
262
|
0
|
0
|
0
|
|
|
|
( $FUSE_ADJACENT_PRES and $out =~ s/<\/pre>\n\n$//s ) |
263
|
|
|
|
|
|
|
or $out .= ""; |
264
|
|
|
|
|
|
|
# possibly combine adjacent verbatims into a single 'pre' |
265
|
0
|
|
|
|
|
|
$out .= xml_escape_maybe_cdata("\n" . $it->get_text . "\n"); |
266
|
0
|
0
|
|
|
|
|
$out =~ s/]]>$/s; |
267
|
|
|
|
|
|
|
# combining adjacent CDATA sections is nice, and always harmless |
268
|
0
|
|
|
|
|
|
$out .= "\n\n"; |
269
|
0
|
|
|
|
|
|
return; |
270
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
} elsif($type eq 'text') { |
272
|
0
|
|
|
|
|
|
$out .= xml_escape($it->get_text); |
273
|
0
|
|
|
|
|
|
return; |
274
|
|
|
|
|
|
|
|
275
|
|
|
|
|
|
|
} else { |
276
|
0
|
|
|
|
|
|
$out .= "\n\n"; |
277
|
0
|
|
|
|
|
|
return; |
278
|
|
|
|
|
|
|
} |
279
|
|
|
|
|
|
|
|
280
|
0
|
0
|
|
|
|
|
foreach my $c (@{ # Recurse... |
|
0
|
0
|
|
|
|
|
|
281
|
|
|
|
|
|
|
(($type eq 'item') ? $it->get_siblings() : $it->get_children()) |
282
|
|
|
|
|
|
|
|| $nil |
283
|
0
|
|
|
|
|
|
}) { $trav->($c) } |
284
|
|
|
|
|
|
|
|
285
|
0
|
|
|
|
|
|
$out .= $post; |
286
|
0
|
|
|
|
|
|
return; |
287
|
0
|
|
|
|
|
|
}; |
288
|
0
|
|
|
|
|
|
$trav->($root); |
289
|
0
|
|
|
|
|
|
undef $trav; # break cyclicity |
290
|
0
|
|
|
|
|
|
print "\n\n" if DEBUG; |
291
|
|
|
|
|
|
|
|
292
|
0
|
|
|
|
|
|
sanitize_newlines($out); |
293
|
|
|
|
|
|
|
|
294
|
0
|
|
|
|
|
|
return $out; |
295
|
|
|
|
|
|
|
} |
296
|
|
|
|
|
|
|
|
297
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
298
|
|
|
|
|
|
|
|
299
|
|
|
|
|
|
|
sub xml_escape_maybe_cdata { # not destructive |
300
|
0
|
|
|
0
|
0
|
|
my $x; |
301
|
0
|
0
|
|
|
|
|
$x = '' unless defined($x = $_[0]); |
302
|
0
|
0
|
0
|
|
|
|
if($x =~ m/[&<>]/ and not $x =~ m/[^\x00-\x80]/) { |
303
|
|
|
|
|
|
|
# CDATA only if uses those [&<>], and does not use anything highbit. |
304
|
0
|
|
|
|
|
|
$x =~ s/]]>/]]>]]>
|
305
|
0
|
|
|
|
|
|
$x = ""; |
306
|
|
|
|
|
|
|
} else { |
307
|
|
|
|
|
|
|
# Otherwise escape things. |
308
|
0
|
|
|
|
|
|
$x =~ s/&/&/g; |
309
|
0
|
|
|
|
|
|
$x =~ s/</g; |
310
|
0
|
|
|
|
|
|
$x =~ s/>/>/g; |
311
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
#$x =~ s/([^\x00-\x7E])/$Char2xmlent{ord $1} or "".ord($1).";"/eg; |
313
|
0
|
0
|
|
|
|
|
$x =~ s/([^\x00-\x7E])/"".ord($1).";"/eg unless $HIGH_BIT_OK; |
|
0
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
|
315
|
|
|
|
|
|
|
# Why care about highbittyness? Even tho we're declaring this content |
316
|
|
|
|
|
|
|
# to be in UTF8, might as well entitify what we can. |
317
|
|
|
|
|
|
|
} |
318
|
0
|
|
|
|
|
|
return $x; |
319
|
|
|
|
|
|
|
} |
320
|
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
sub xml_escape { # not destructive |
322
|
0
|
|
|
0
|
0
|
|
my $x; |
323
|
0
|
0
|
|
|
|
|
return '' unless defined($x = $_[0]); |
324
|
0
|
0
|
|
|
|
|
if($HIGH_BIT_OK) { |
325
|
0
|
0
|
|
|
|
|
$x =~ s/([&<>])/$Char2xmlent{ord $1} or "".ord($1).";"/eg; |
|
0
|
|
|
|
|
|
|
326
|
|
|
|
|
|
|
# Encode '&', and '<' and '>' |
327
|
|
|
|
|
|
|
} else { |
328
|
0
|
0
|
|
|
|
|
$x =~ s/([^\cm\cj\f\t !-%'-;=?-~])/$Char2xmlent{ord $1} or "".ord($1).";"/eg; |
|
0
|
|
|
|
|
|
|
329
|
|
|
|
|
|
|
# Encode control chars, high bit chars, '&', and '<' and '>' |
330
|
|
|
|
|
|
|
} |
331
|
0
|
|
|
|
|
|
return $x; |
332
|
|
|
|
|
|
|
} |
333
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
sub xml_attr_escape { # not destructive |
335
|
0
|
|
|
0
|
0
|
|
my $x; |
336
|
0
|
0
|
|
|
|
|
return '' unless defined($x = $_[0]); |
337
|
|
|
|
|
|
|
|
338
|
0
|
0
|
|
|
|
|
if($HIGH_BIT_OK) { |
339
|
0
|
0
|
|
|
|
|
$x =~ s/([&<>"])/$Char2xmlent{ord $1} or "".ord($1).";"/eg; |
|
0
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
# Encode '&', '"', and '<' and '>' |
341
|
|
|
|
|
|
|
} else { |
342
|
0
|
0
|
|
|
|
|
$x =~ s/([^\cm\cj\f\t !\#-\%'-;=?-~])/$Char2xmlent{ord $1} or "".ord($1).";"/eg; |
|
0
|
|
|
|
|
|
|
343
|
|
|
|
|
|
|
# Encode control chars, high bit chars, '"', '&', and '<' and '>' |
344
|
|
|
|
|
|
|
} |
345
|
0
|
|
|
|
|
|
return $x; |
346
|
|
|
|
|
|
|
} |
347
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
349
|
|
|
|
|
|
|
sub sanitize_newlines { # DESTRUCTIVE |
350
|
0
|
|
|
0
|
0
|
|
if("\n" eq "\cm") { |
351
|
|
|
|
|
|
|
$_[0] =~ s/\cm?\cj/\n/g; # turn \cj and \cm\cj into \n |
352
|
|
|
|
|
|
|
} elsif("\n" eq "\cj") { |
353
|
0
|
|
|
|
|
|
$_[0] =~ s/\cm\cj/\n/g; # turn \cm and \cm\cj into \n |
354
|
|
|
|
|
|
|
} else { |
355
|
|
|
|
|
|
|
$_[0] =~ s/(?:(?:\cm?\cj)|\cm)/\n/g; |
356
|
|
|
|
|
|
|
# turn \cm\cj, \cj, or \cm into \n |
357
|
|
|
|
|
|
|
} |
358
|
0
|
|
|
|
|
|
return; |
359
|
|
|
|
|
|
|
} |
360
|
|
|
|
|
|
|
|
361
|
|
|
|
|
|
|
########################################################################### |
362
|
|
|
|
|
|
|
########################################################################### |
363
|
|
|
|
|
|
|
|
364
|
2
|
|
|
2
|
|
22
|
use vars qw(%Acceptable_children); |
|
2
|
|
|
|
|
5
|
|
|
2
|
|
|
|
|
5855
|
|
365
|
|
|
|
|
|
|
{ |
366
|
|
|
|
|
|
|
# This just recapitulates what's in the DTD: |
367
|
|
|
|
|
|
|
my $style = {map{;$_,1} qw(b i c x f s link)}; |
368
|
|
|
|
|
|
|
my $pstyle = {'#PCDATA',1, %$style}; |
369
|
|
|
|
|
|
|
my $pcdata = {'#PCDATA',1}; |
370
|
|
|
|
|
|
|
%Acceptable_children = ( |
371
|
|
|
|
|
|
|
'pod' => {map{;$_,1} qw(head1 head2 head3 head4 p pre list for)}, |
372
|
|
|
|
|
|
|
map(($_=>$pstyle), qw(head1 head2 head3 head4 p)), |
373
|
|
|
|
|
|
|
'pre' => $pcdata, |
374
|
|
|
|
|
|
|
'list' => {map{;$_,1} qw(item p pre list for)}, |
375
|
|
|
|
|
|
|
'item' => $pstyle, |
376
|
|
|
|
|
|
|
'for' => $pcdata, |
377
|
|
|
|
|
|
|
map(($_=>$pstyle), qw(link b i c f x s)), |
378
|
|
|
|
|
|
|
); |
379
|
|
|
|
|
|
|
} |
380
|
|
|
|
|
|
|
|
381
|
|
|
|
|
|
|
sub xml2pod ($) { |
382
|
0
|
|
|
0
|
1
|
|
my $content = $_[0]; |
383
|
0
|
|
|
|
|
|
require XML::Parser; |
384
|
|
|
|
|
|
|
|
385
|
0
|
|
|
|
|
|
my $out; |
386
|
0
|
|
|
|
|
|
my($gi, %attr, $text, $cm_set); # scratch |
387
|
|
|
|
|
|
|
|
388
|
0
|
|
|
|
|
|
my(@stack); |
389
|
0
|
|
|
|
|
|
my @paragraph_stack; |
390
|
|
|
|
|
|
|
# pop/pushed only by paragraph-containing elements, and link |
391
|
0
|
|
|
|
|
|
my @for_stack; # kept by 'for' elements |
392
|
0
|
|
|
|
|
|
my @link_stack; # kept by 'link' elements |
393
|
|
|
|
|
|
|
my $xml = XML::Parser->new( 'Handlers' => { |
394
|
|
|
|
|
|
|
|
395
|
|
|
|
|
|
|
## |
396
|
|
|
|
|
|
|
## |
397
|
|
|
|
|
|
|
## On the way in... |
398
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
'Start' => sub { |
400
|
0
|
|
|
0
|
|
|
(undef, $gi, %attr) = @_; |
401
|
0
|
|
|
|
|
|
push @stack, $gi; |
402
|
0
|
|
|
|
|
|
DEBUG > 1 and print ' ', join('.', @stack), "+\n"; |
403
|
|
|
|
|
|
|
|
404
|
0
|
0
|
|
|
|
|
if($XML_VALIDATE) { |
405
|
0
|
0
|
|
|
|
|
if(@stack < 2) { |
|
|
0
|
|
|
|
|
|
406
|
0
|
0
|
|
|
|
|
unless($gi eq 'pod') { |
407
|
|
|
|
|
|
|
# I think XML::Parser would catch this, but anyway. |
408
|
0
|
|
|
|
|
|
die "Can't have a childless \"$gi\" element, in $content"; |
409
|
|
|
|
|
|
|
} |
410
|
|
|
|
|
|
|
} elsif(defined($cm_set = $Acceptable_children{$stack[-2]})) { |
411
|
0
|
0
|
|
|
|
|
die "Can't have a \"$gi\" in a \"$stack[-2]\", in $content (stack @stack)" |
412
|
|
|
|
|
|
|
unless $cm_set->{$gi}; |
413
|
|
|
|
|
|
|
} else { |
414
|
0
|
|
|
|
|
|
die "Unknown element \"$gi\""; |
415
|
|
|
|
|
|
|
} |
416
|
|
|
|
|
|
|
# TODO: attribute validation! |
417
|
|
|
|
|
|
|
} |
418
|
|
|
|
|
|
|
|
419
|
0
|
0
|
0
|
|
|
|
if($gi =~ m/^[bicxfs]$/s) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
420
|
0
|
|
|
|
|
|
$paragraph_stack[-1] .= "\U$gi<"; |
421
|
|
|
|
|
|
|
} elsif($gi eq 'p' or $gi eq 'pre') { |
422
|
0
|
|
|
|
|
|
push @paragraph_stack, ''; |
423
|
|
|
|
|
|
|
} elsif($gi eq 'for') { |
424
|
0
|
|
0
|
|
|
|
$text = $attr{'target'} || '????'; |
425
|
0
|
|
|
|
|
|
push @for_stack, $text; |
426
|
0
|
|
|
|
|
|
push @paragraph_stack, ''; |
427
|
|
|
|
|
|
|
} elsif($gi eq 'list') { |
428
|
0
|
|
|
|
|
|
$text = $attr{'indent'}; |
429
|
0
|
0
|
0
|
|
|
|
$out .= (defined($text) && length($text)) |
430
|
|
|
|
|
|
|
? "=over $text\n\n" : "=over\n\n"; |
431
|
|
|
|
|
|
|
} elsif($gi eq 'item') { |
432
|
0
|
|
|
|
|
|
$out .= '=item '; |
433
|
0
|
|
|
|
|
|
push @paragraph_stack, ''; |
434
|
|
|
|
|
|
|
} elsif($gi =~ m/^head[1234]$/s) { |
435
|
0
|
|
|
|
|
|
push @paragraph_stack, '=' . $gi . ' '; |
436
|
|
|
|
|
|
|
} elsif($gi eq 'link') { # a hack |
437
|
0
|
|
|
|
|
|
push @link_stack, [$attr{'page'}, $attr{'section'}]; |
438
|
0
|
|
|
|
|
|
push @paragraph_stack, ''; |
439
|
|
|
|
|
|
|
} elsif($gi eq 'pod') { |
440
|
0
|
|
0
|
|
|
|
my $text = $attr{'xmlns'} || $XMLNS; |
441
|
0
|
0
|
|
|
|
|
die "pod has a foreign namespace: \"$text\" instead of \"$XMLNS\"" |
442
|
|
|
|
|
|
|
unless $text eq $XMLNS; |
443
|
|
|
|
|
|
|
} else { |
444
|
0
|
|
|
|
|
|
DEBUG and print "Opening unknown element \"$gi\"\n"; |
445
|
|
|
|
|
|
|
} |
446
|
0
|
|
|
|
|
|
return; |
447
|
|
|
|
|
|
|
}, |
448
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
## |
450
|
|
|
|
|
|
|
## |
451
|
|
|
|
|
|
|
## And on the way out... |
452
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
'End' => sub { |
454
|
0
|
|
|
0
|
|
|
$gi = $_[1]; |
455
|
0
|
|
|
|
|
|
DEBUG > 1 and print ' ', join('.', @stack), "-\n"; |
456
|
0
|
0
|
|
|
|
|
die "INSANE! Stack mismatch! $text ne $gi" |
457
|
|
|
|
|
|
|
unless $gi eq ($text = pop @stack); |
458
|
|
|
|
|
|
|
|
459
|
0
|
0
|
|
|
|
|
if($gi =~ m/^[bicxfs]$/s) { |
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
460
|
0
|
|
|
|
|
|
$paragraph_stack[-1] .= ">"; |
461
|
|
|
|
|
|
|
} elsif($gi eq 'p') { |
462
|
|
|
|
|
|
|
# A paragraph must start with non-WS, non-=, and must contain |
463
|
|
|
|
|
|
|
# no \n\n's until its very end. |
464
|
|
|
|
|
|
|
|
465
|
0
|
|
|
|
|
|
$text = pop @paragraph_stack; |
466
|
0
|
|
|
|
|
|
$text =~ s/^(\s)/Z<>$1/s; # make sure we're NOT indented |
467
|
0
|
|
|
|
|
|
$text =~ s/^=/Z<>=/s; # make sure we're NOT =-initial |
468
|
0
|
|
|
|
|
|
$text =~ s/\n+$//s; # nix terminal newlines! |
469
|
0
|
|
|
|
|
|
$text =~ s/\n(?=\n)/\n /g; # separate double-newlines |
470
|
0
|
0
|
|
|
|
|
unless(length $text) { |
471
|
0
|
|
|
|
|
|
DEBUG and print "Odd, null p-paragraph\n"; |
472
|
0
|
|
|
|
|
|
return; |
473
|
|
|
|
|
|
|
} |
474
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
# These don't beautify /everything/ beautifiable, but they try. |
476
|
0
|
|
|
|
|
|
while($text =~ s/([^a-zA-Z<])E/$1
|
|
0
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
# Turn E's that obviously don't need escaping, back into <'s |
478
|
0
|
|
|
|
|
|
while($text =~ s/^([^<]*)E/$1>/) {1} |
|
0
|
|
|
|
|
|
|
479
|
|
|
|
|
|
|
# Turn obviously harmless E's back into ">"'s. |
480
|
|
|
|
|
|
|
|
481
|
0
|
|
|
|
|
|
$text .= "\n\n"; |
482
|
0
|
|
|
|
|
|
$out .= $text; |
483
|
|
|
|
|
|
|
} elsif($gi eq 'pre') { |
484
|
|
|
|
|
|
|
# A verbatim paragraph must start with WS, and must contain |
485
|
|
|
|
|
|
|
# no \n\n's until its very end. |
486
|
|
|
|
|
|
|
|
487
|
0
|
|
|
|
|
|
$text = pop @paragraph_stack; |
488
|
0
|
|
|
|
|
|
$text =~ s/^\n+//s; # nix leading strictly-blank lines |
489
|
0
|
|
|
|
|
|
$text =~ s/^(\S)/ \n$1/s; # make sure we ARE indented |
490
|
|
|
|
|
|
|
# that means we don't have to make sure we don't start with a '=' |
491
|
0
|
|
|
|
|
|
$text =~ s/\n+$//s; # nix terminal newlines! |
492
|
0
|
|
|
|
|
|
$text =~ s/\n(?=\n)/\n /g; # separate double-newlines |
493
|
|
|
|
|
|
|
#$text =~ tr/\0-\xFF//CU if $LATIN_1; # since we can't E<..> things |
494
|
0
|
0
|
|
|
|
|
unless(length $text) { |
495
|
0
|
|
|
|
|
|
DEBUG and print "Odd, null pre-paragraph\n"; |
496
|
0
|
|
|
|
|
|
return; |
497
|
|
|
|
|
|
|
} |
498
|
0
|
|
|
|
|
|
$text .= "\n\n"; |
499
|
0
|
|
|
|
|
|
$out .= $text; |
500
|
|
|
|
|
|
|
|
501
|
|
|
|
|
|
|
} elsif($gi eq 'for') { |
502
|
0
|
|
|
|
|
|
my $kind = pop @for_stack; |
503
|
0
|
|
|
|
|
|
$text = "\n\n=begin $kind\n\n" . pop @paragraph_stack; |
504
|
0
|
|
|
|
|
|
$text =~ s/\n+$//s; # nix terminal newlines! |
505
|
0
|
|
|
|
|
|
$text =~ s/\n(?=\n)/\n /g; # separate double-newlines |
506
|
0
|
|
|
|
|
|
$text .= "\n\n=end $kind\n\n"; |
507
|
0
|
|
|
|
|
|
$out .= $text; |
508
|
|
|
|
|
|
|
|
509
|
|
|
|
|
|
|
} elsif($gi eq 'list') { |
510
|
0
|
|
|
|
|
|
$out .= "=back\n\n"; |
511
|
|
|
|
|
|
|
|
512
|
|
|
|
|
|
|
} elsif($gi eq 'item') { |
513
|
0
|
|
|
|
|
|
$text = pop @paragraph_stack; |
514
|
0
|
|
|
|
|
|
$text =~ s/^\s*//s; # kill leading space |
515
|
0
|
|
|
|
|
|
$text =~ s/\n+$//s; # nix terminal newlines! |
516
|
0
|
|
|
|
|
|
$text =~ s/\n(?=\n)/\n /g; # separate double-newlines |
517
|
0
|
|
|
|
|
|
$text .= "\n\n"; |
518
|
|
|
|
|
|
|
|
519
|
|
|
|
|
|
|
# These don't beautify /everything/ beautifiable, but they try. |
520
|
0
|
|
|
|
|
|
while($text =~ s/([^a-zA-Z<])E/$1
|
|
0
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
# Turn E's that obviously don't need escaping, back into <'s |
522
|
0
|
|
|
|
|
|
while($text =~ s/^([^<]*)E/$1>/) {1} |
|
0
|
|
|
|
|
|
|
523
|
|
|
|
|
|
|
# Turn obviously harmless E's back into ">"'s. |
524
|
|
|
|
|
|
|
|
525
|
0
|
|
|
|
|
|
$out .= $text; |
526
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
} elsif($gi =~ m/^head[1234]$/s) { |
528
|
0
|
|
|
|
|
|
$text = pop @paragraph_stack; |
529
|
0
|
|
|
|
|
|
$text =~ s/^(\s)/Z<>$1/s; # make sure we're NOT (visibly) indented |
530
|
0
|
|
|
|
|
|
$text =~ s/\n+$//s; # nix terminal newlines! |
531
|
0
|
|
|
|
|
|
$text =~ s/\n(?=\n)/\n /g; # nix any double-newlines |
532
|
0
|
|
|
|
|
|
$text .= "\n\n"; |
533
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
# These don't beautify /everything/ beautifiable, but they try. |
535
|
0
|
|
|
|
|
|
while($text =~ s/([^a-zA-Z<])E/$1
|
|
0
|
|
|
|
|
|
|
536
|
|
|
|
|
|
|
# Turn E's that obviously don't need escaping, back into <'s |
537
|
0
|
|
|
|
|
|
while($text =~ s/^([^<]*)E/$1>/) {1} |
|
0
|
|
|
|
|
|
|
538
|
|
|
|
|
|
|
# Turn obviously harmless E's back into ">"'s. |
539
|
|
|
|
|
|
|
|
540
|
0
|
|
|
|
|
|
$out .= $text; |
541
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
} elsif($gi eq 'link') { # a hack |
543
|
0
|
|
|
|
|
|
$text = pop @paragraph_stack; |
544
|
|
|
|
|
|
|
# "Text cannot contain the characters '/' and '|'" |
545
|
0
|
|
|
|
|
|
$text =~ s/\|/E<124>/g; # AKA verbar |
546
|
0
|
|
|
|
|
|
$text =~ s{/}{E<47>}g; # AKA sol |
547
|
0
|
|
|
|
|
|
$text =~ s/\n(?=\n)/\n /g; |
548
|
|
|
|
|
|
|
# nix any double-newlines, just for good measure |
549
|
0
|
0
|
|
|
|
|
$text .= '|' if length $text; |
550
|
|
|
|
|
|
|
|
551
|
0
|
|
|
|
|
|
my($xref, $section) = @{pop @link_stack}; |
|
0
|
|
|
|
|
|
|
552
|
0
|
0
|
|
|
|
|
$xref = '' unless defined $xref; # "" means 'in this document' |
553
|
0
|
0
|
|
|
|
|
$section = '' unless defined $section; |
554
|
|
|
|
|
|
|
|
555
|
0
|
|
|
|
|
|
$xref = pod_escape($xref); |
556
|
0
|
|
|
|
|
|
$xref =~ s{/}{E<47>}g; |
557
|
0
|
0
|
|
|
|
|
$section = pod_escape("/\"$section\"") if length $section; |
558
|
|
|
|
|
|
|
|
559
|
0
|
0
|
0
|
|
|
|
$section = '/"???"' unless length $xref or length $section; |
560
|
|
|
|
|
|
|
# signals aberrant input! |
561
|
|
|
|
|
|
|
|
562
|
0
|
|
|
|
|
|
$paragraph_stack[-1] .= "L<$text$xref$section>"; |
563
|
|
|
|
|
|
|
|
564
|
|
|
|
|
|
|
} elsif($gi eq 'pod') { |
565
|
|
|
|
|
|
|
# no-op |
566
|
|
|
|
|
|
|
} else { |
567
|
0
|
|
|
|
|
|
DEBUG and print "Closing unknown element \"$gi\"\n"; |
568
|
|
|
|
|
|
|
} |
569
|
0
|
|
|
|
|
|
return; |
570
|
|
|
|
|
|
|
}, |
571
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
## |
573
|
|
|
|
|
|
|
## |
574
|
|
|
|
|
|
|
## Character data! MATANGA!!! |
575
|
|
|
|
|
|
|
'Char' => sub { |
576
|
0
|
|
|
0
|
|
|
shift; |
577
|
0
|
0
|
0
|
|
|
|
return unless defined $_[0] and length $_[0]; # sanity |
578
|
|
|
|
|
|
|
|
579
|
0
|
0
|
|
|
|
|
if(!@stack) { |
580
|
0
|
0
|
|
|
|
|
die "Non-WS text on empty stack: \"$_[0]\"" |
581
|
|
|
|
|
|
|
unless $_[0] =~ m/^\s+$/s; |
582
|
|
|
|
|
|
|
} else { |
583
|
0
|
0
|
0
|
|
|
|
if(($Acceptable_children{$stack[-1]} |
584
|
|
|
|
|
|
|
|| die "Putting text under unknown element \"$stack[-1]\"" |
585
|
|
|
|
|
|
|
)->{'#PCDATA'}) { |
586
|
|
|
|
|
|
|
# This is the only case where we can add: |
587
|
0
|
0
|
|
|
|
|
die "\@paragraph_stack is empty? (stack: @stack)" |
588
|
|
|
|
|
|
|
unless @paragraph_stack; |
589
|
0
|
0
|
|
|
|
|
if($stack[-1] eq 'pre') { |
590
|
0
|
|
|
|
|
|
$paragraph_stack[-1] .= $_[0]; |
591
|
|
|
|
|
|
|
} else { |
592
|
0
|
|
|
|
|
|
$paragraph_stack[-1] .= pod_escape($_[0]); |
593
|
|
|
|
|
|
|
} |
594
|
|
|
|
|
|
|
} else { |
595
|
|
|
|
|
|
|
# doesn't allow PCDATA |
596
|
0
|
0
|
|
|
|
|
die "Can't have non-WS text in a \"$stack[-1]\"" |
597
|
|
|
|
|
|
|
unless $_[0] =~ m/^\s+$/s; |
598
|
|
|
|
|
|
|
# Else it's just ignorable whitespace. |
599
|
|
|
|
|
|
|
} |
600
|
|
|
|
|
|
|
} |
601
|
|
|
|
|
|
|
|
602
|
0
|
|
|
|
|
|
return; |
603
|
|
|
|
|
|
|
}, |
604
|
|
|
|
|
|
|
|
605
|
|
|
|
|
|
|
# 'Comment' => sub { }, |
606
|
|
|
|
|
|
|
# 'Proc' => sub { }, |
607
|
|
|
|
|
|
|
# 'Attlist' => sub { }, |
608
|
|
|
|
|
|
|
# 'Element' => sub { }, |
609
|
|
|
|
|
|
|
# 'Doctype' => sub { }, |
610
|
0
|
|
|
|
|
|
}); |
611
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
# Now actually process... |
613
|
0
|
|
|
|
|
|
$out = ""; |
614
|
0
|
0
|
|
|
|
|
if(ref($content) eq 'SCALAR') { |
615
|
0
|
|
|
|
|
|
$xml->parse($$content); |
616
|
|
|
|
|
|
|
} else { |
617
|
0
|
|
|
|
|
|
$xml->parsefile($content); |
618
|
|
|
|
|
|
|
} |
619
|
|
|
|
|
|
|
|
620
|
0
|
|
|
|
|
|
$out =~ s/^([^=])/=pod\n\n$1/; |
621
|
|
|
|
|
|
|
# make sure that we start with a =-thingie, one way or another. |
622
|
|
|
|
|
|
|
|
623
|
0
|
|
|
|
|
|
$out .= "=cut\n\n"; |
624
|
|
|
|
|
|
|
|
625
|
0
|
|
|
|
|
|
sanitize_newlines($out); |
626
|
0
|
|
|
|
|
|
return $out; |
627
|
|
|
|
|
|
|
} |
628
|
|
|
|
|
|
|
|
629
|
|
|
|
|
|
|
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - |
630
|
|
|
|
|
|
|
|
631
|
|
|
|
|
|
|
{ |
632
|
|
|
|
|
|
|
my %e = ('<' => 'E', '>' => 'E' ); |
633
|
|
|
|
|
|
|
sub pod_escape { |
634
|
|
|
|
|
|
|
#print STDERR "IN: <$_[0]>\n"; |
635
|
0
|
|
|
0
|
0
|
|
my $it = $_[0]; |
636
|
0
|
0
|
|
|
|
|
$it =~ s/([^\cm\cj\f\t !-;=?-~])/$Char2podent{ord $1} or "E<".ord($1).">"/eg; |
|
0
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
# Encode control chars, high bit chars and '<' and '>' |
638
|
|
|
|
|
|
|
#print STDERR "OUT: <$_[0]>\n\n"; |
639
|
0
|
|
|
|
|
|
return $it; |
640
|
|
|
|
|
|
|
} |
641
|
|
|
|
|
|
|
} |
642
|
|
|
|
|
|
|
|
643
|
|
|
|
|
|
|
########################################################################### |
644
|
|
|
|
|
|
|
########################################################################### |
645
|
|
|
|
|
|
|
1; |
646
|
|
|
|
|
|
|
|