File Coverage

lib/Pod/XML.pm

Criterion	Covered	Total	%
statement	116	165	70.3
branch	49	86	56.9
condition	9	12	75.0
subroutine	13	14	92.8
pod	0	10	0.0
total	187	287	65.1

line	stmt	bran	cond	sub	pod	time	code
1							package Pod::XML;
2
3							# $Id: XML.pm 30 2007-02-03 16:50:07Z matt $
4
5	5			5		184742	use strict;
	5					13
	5					205
6	5			5		27	use warnings;
	5					11
	5					177
7	5			5		36	use vars qw(@ISA $VERSION);
	5					7
	5					329
8
9	5			5		27	use Pod::Parser;
	5					15
	5					15128
10
11							@ISA = ( 'Pod::Parser' );
12
13							$VERSION = '0.99';
14
15							# I'm not sure why Matt Sergeant did this in this way but I'll leave it for
16							# the time being
17							my %head2sect = (
18							1 => "sect1",
19							2 => "sect2",
20							3 => "sect3",
21							4 => "sect4",
22							);
23
24							# a hash array of HTML escape codes
25							my %HTML_Escapes = (
26							"apos" => "#x27", # apostrophe
27							"Aacute" => "#xC1", # capital A, acute accent
28							"aacute" => "#xE1", # small a, acute accent
29							"Acirc" => "#xC2", # capital A, circumflex accent
30							"acirc" => "#xE2", # small a, circumflex accent
31							"AElig" => "#xC6", # capital AE diphthong (ligature)
32							"aelig" => "#xE6", # small ae diphthong (ligature)
33							"Agrave" => "#xC0", # capital A, grave accent
34							"agrave" => "#xE0", # small a, grave accent
35							"Aring" => "#xC5", # capital A, ring
36							"aring" => "#xE5", # small a, ring
37							"Atilde" => "#xC3", # capital A, tilde
38							"atilde" => "#xE3", # small a, tilde
39							"Auml" => "#xC4", # capital A, dieresis or umlaut mark
40							"auml" => "#xE4", # small a, dieresis or umlaut mark
41							"Ccedil" => "#xC7", # capital C, cedilla
42							"ccedil" => "#xE7", # small c, cedilla
43							"Eacute" => "#xC9", # capital E, acute accent
44							"eacute" => "#xE9", # small e, acute accent
45							"Ecirc" => "#xCA", # capital E, circumflex accent
46							"ecirc" => "#xEA", # small e, circumflex accent
47							"Egrave" => "#xC8", # capital E, grave accent
48							"egrave" => "#xE8", # small e, grave accent
49							"ETH" => "#xD0", # capital Eth, Icelandic
50							"eth" => "#xF0", # small eth, Icelandic
51							"Euml" => "#xCB", # capital E, dieresis or umlaut mark
52							"euml" => "#xEB", # small e, dieresis or umlaut mark
53							"Iacute" => "#xCD", # capital I, acute accent
54							"iacute" => "#xED", # small i, acute accent
55							"Icirc" => "#xCE", # capital I, circumflex accent
56							"icirc" => "#xEE", # small i, circumflex accent
57							"Igrave" => "#xCD", # capital I, grave accent
58							"igrave" => "#xED", # small i, grave accent
59							"Iuml" => "#xCF", # capital I, dieresis or umlaut mark
60							"iuml" => "#xEF", # small i, dieresis or umlaut mark
61							"Ntilde" => "#xD1", # capital N, tilde
62							"ntilde" => "#xF1", # small n, tilde
63							"Oacute" => "#xD3", # capital O, acute accent
64							"oacute" => "#xF3", # small o, acute accent
65							"Ocirc" => "#xD4", # capital O, circumflex accent
66							"ocirc" => "#xF4", # small o, circumflex accent
67							"Ograve" => "#xD2", # capital O, grave accent
68							"ograve" => "#xF2", # small o, grave accent
69							"Oslash" => "#xD8", # capital O, slash
70							"oslash" => "#xF8", # small o, slash
71							"Otilde" => "#xD5", # capital O, tilde
72							"otilde" => "#xF5", # small o, tilde
73							"Ouml" => "#xD6", # capital O, dieresis or umlaut mark
74							"ouml" => "#xF6", # small o, dieresis or umlaut mark
75							"szlig" => "#xDF", # small sharp s, German (sz ligature)
76							"THORN" => "#xDE", # capital THORN, Icelandic
77							"thorn" => "#xFE", # small thorn, Icelandic
78							"Uacute" => "#xDA", # capital U, acute accent
79							"uacute" => "#xFA", # small u, acute accent
80							"Ucirc" => "#xDB", # capital U, circumflex accent
81							"ucirc" => "#xFB", # small u, circumflex accent
82							"Ugrave" => "#xD9", # capital U, grave accent
83							"ugrave" => "#xF9", # small u, grave accent
84							"Uuml" => "#xDC", # capital U, dieresis or umlaut mark
85							"uuml" => "#xFC", # small u, dieresis or umlaut mark
86							"Yacute" => "#xDD", # capital Y, acute accent
87							"yacute" => "#xFD", # small y, acute accent
88							"yuml" => "#xFF", # small y, dieresis or umlaut mark
89							"lchevron" => "#xAB", # left chevron (double less than)
90							"rchevron" => "#xBB", # right chevron (double greater than)
91							);
92
93							sub html_escape
94							{
95	21		100	21	0	65	my $text = shift \|\| '';
96
97							# ampersand MUST be done first!
98	21					41	$text =~ s/&/\&/g;
99
100							# handle < and > too
101	21					37	$text =~ s/
102	21					33	$text =~ s/>/\>/g;
103
104							# convert other {tag:...} markers
105	21					86	$text =~ s/{tag:escape ref='([^']*)'}/\&$1;/g;
106
107	21					60	return $text;
108							}
109
110							sub finalise_output
111							{
112	4			4	0	10	my $parser = shift;
113
114							# put something pretty together
115	4		50			58	$parser->{xml_string} =
116							"
117							( $parser->{Encoding} \|\| "iso-8859-1" ) . "'?>\n" .
118							"\n" .
119							"\n" .
120							"" . html_escape ( $parser->{title} ) . "\n" .
121							"\n" .
122							$parser->{xml_string} .
123							"\n";
124
125	4	50				21	if ( $parser->{Encoding} )
126							{
127	0					0	my $tmp = Encode::encode ( $parser->{Encoding}, $parser->{xml_string} );
128	0					0	$parser->{xml_string} = $tmp;
129							}
130
131	4	50				23	if ( ! $parser->{send_to_string} )
132							{
133	4					30	my $fh = $parser->output_handle ();
134
135	4					31	print $fh $parser->{xml_string};
136							}
137							}
138
139							sub xml_output
140							{
141	21			21	0	55	my ( $parser, @strings ) = @_;
142
143	21					1022	$parser->{xml_string} .= join ( '', @strings );
144							}
145
146							sub begin_pod
147							{
148	4			4	0	1859	my ( $parser ) = @_;
149
150	4	50				43	if ( $parser->{Encoding} )
151							{
152							# can we use the Encode module?
153							eval
154	0					0	{
155	0					0	require Encode;
156							};
157
158	0	0				0	die ( "Need Encode module to specify specific output encoding - " . $@ )
159							if ( $@ );
160
161							# make sure we can encode to the specific encoding
162							eval
163	0					0	{
164	0					0	Encode::encode ( $parser->{Encoding}, "" );
165							};
166
167	0	0				0	die ( "Encoding issue - " . $@ ) if ( $@ );
168							}
169
170	4					13	$parser->{headlevel} = 0;
171	4					10	$parser->{seentitle} = 0;
172	4					12	$parser->{closeitem} = 0;
173	4					10	$parser->{in_begin_block} = 0;
174	4					10	$parser->{this_is_name} = 0;
175	4					11	$parser->{title} = '';
176	4					569	$parser->{xml_string} = '';
177							}
178
179							sub end_pod
180							{
181	4			4	0	10	my ( $parser ) = @_;
182
183	4					18	while ( $parser->{headlevel} )
184							{
185	4					35	$parser->xml_output ( "{headlevel}-- } .
186							">\n" );
187							}
188
189	4					18	$parser->finalise_output;
190							}
191
192							sub command
193							{
194	8			8	0	29	my ( $parser, $command, $paragraph ) = @_;
195
196	8					47	$paragraph =~ s/\s*$//;
197	8					27	$paragraph =~ s/^\s*//;
198
199	8					641	$paragraph = $parser->interpolate ( $paragraph );
200	8					28	$paragraph = uri_find ( $paragraph );
201	8					27	$paragraph = html_escape ( $paragraph );
202	8					16	$paragraph =~ s/\{(\/?)tag:(.*?)\}/<$1$2>/g;
203	8					16	$paragraph =~ s/\{code:(\d+)\}/&#$1/g;
204
205	8	100				33	if ( $parser->{in_begin_block} == 0 )
		50
206							{
207	7	100				45	if ( $command =~ /^head(\d+)/ )
		50
		50
		50
		50
208							{
209	6					17	my $headlevel = $1;
210
211							# we should use "NAME" as the title
212	6	100	100			76	$parser->{this_is_name}++
213							if ( $paragraph =~ m/^name$/i && $parser->{this_is_name} == 0 );
214
215	6	100				28	if ( $headlevel <= $parser->{headlevel} )
216							{
217	2					9	while ( $headlevel <= $parser->{headlevel} )
218							{
219	2					9	$parser->xml_output ( "{headlevel}-- },
220							">\n" );
221							}
222							}
223
224	6					30	while ( $headlevel > ( $parser->{headlevel} + 1 ) )
225							{
226	0					0	$parser->xml_output ( "<", $head2sect { ++$parser->{headlevel} },
227							">\n" );
228							}
229
230	6					14	$parser->{headlevel} = $headlevel;
231	6					109	$parser->xml_output ( "<", $head2sect { $headlevel }, ">\n",
232							"", $paragraph, "\n" );
233							}
234							elsif ( $command eq "over" )
235							{
236	0	0				0	if ( $parser->{closeitem} )
237							{
238	0					0	$parser->xml_output ( "\n" );
239	0					0	$parser->{closeitem} = 0;
240							}
241
242	0					0	$parser->xml_output ( "\n" );
243							}
244							elsif ( $command eq "back" )
245							{
246	0	0				0	if ( $parser->{closeitem} )
247							{
248	0					0	$parser->xml_output ( "\n" );
249	0					0	$parser->{closeitem} = 0;
250							}
251
252	0					0	$parser->xml_output ( "\n" );
253							}
254							elsif ( $command eq "item" )
255							{
256	0	0				0	if ( $parser->{closeitem} )
257							{
258	0					0	$parser->xml_output ( "\n" );
259	0					0	$parser->{closeitem} = 0;
260							}
261
262	0					0	$parser->xml_output ( "" );
263
264	0	0				0	if ( $paragraph ne '*' )
265							{
266	0					0	$paragraph =~ s/^\*\s+//;
267	0					0	$parser->xml_output ( "", $paragraph, "\n" );
268							}
269
270	0					0	$parser->{closeitem}++;
271							}
272							elsif ( $command eq 'begin' )
273							{
274							# this is to strip out =begin ... =end blocks, which aren't generally POD
275	1					65	$parser->{in_begin_block} = 1;
276							}
277							}
278							elsif ( $command eq 'end' )
279							{
280	1					53	$parser->{in_begin_block} = 0;
281							}
282							}
283
284							sub verbatim
285							{
286	0			0	0	0	my ( $parser, $paragraph ) = @_;
287
288	0	0				0	return if $parser->{in_begin_block};
289
290	0	0				0	if ( $paragraph =~ s/^(\s*)// )
291							{
292	0					0	my $indent = $1;
293
294	0					0	$paragraph =~ s/\s*$//;
295
296	0	0				0	return unless length $paragraph;
297
298	0					0	$paragraph =~ s/^$indent//mg; # un-indent
299	0					0	$paragraph =~ s/\]\]>/\]\]>\]\]>
300
301							# is this the title block?
302	0	0				0	if ( $parser->{this_is_name} == 1 )
303							{
304							# increment, rather than setting back to zero; this way we can ensure
305							# the first NAME is used, but not proceeding ones
306	0					0	$parser->{this_is_name}++;
307	0					0	$parser->{title} = "";
308							}
309
310	0					0	$parser->xml_output ( "
311							"\n]]>\n" );
312							}
313							}
314
315							sub textblock
316							{
317	10			10	0	19	my ( $parser, $paragraph, $line_num ) = @_;
318
319	10	100				80	return if $parser->{in_begin_block};
320
321	9					35	$paragraph =~ s/^\s*//;
322	9					161	$paragraph =~ s/\s*$//;
323
324	9					837	my $text = $parser->interpolate ( $paragraph );
325
326	9					26	$text = uri_find ( $text );
327	9					22	$text = html_escape ( $text );
328	9					114	$text =~ s/\{(\/?)tag:(.*?)\}/<$1$2>/g;
329	9					19	$text =~ s/\{code:(\d+)\}/&#$1/g;
330
331	9	100				60	if ( $parser->{this_is_name} == 1 )
332							{
333							# increment, rather than setting back to zero; this way we can ensure the
334							# first NAME is used, but not proceeding ones
335	2					4	$parser->{this_is_name}++;
336	2					4	$parser->{title} = $paragraph;
337							}
338
339	9	50				30	if ( $parser->{headlevel} == 0 )
340							{
341	0					0	$parser->xml_output ( "\n", $parser->{title}, </td> </tr> <tr> <td class="h" > <a name="342">342</a> </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td > </td> <td class="s"> "\n" );
343	0					0	$parser->{headlevel}++;
344							}
345
346	9					29	$parser->xml_output ( "\n", $text, "\n\n" );
347							}
348
349							sub uri_find
350							{
351	17		100	17	0	53	my $text = shift \|\| '';
352
353							# Code from the Perl Cookbook
354	17					22	my $urls = '(https\|http\|telnet\|gopher\|file\|wais\|ftp\|mailto)';
355	17					25	my $ltrs = '\w';
356	17					20	my $gunk = '/#~:.?+=&%@!\-';
357	17					21	my $punc = '.:?\-!,';
358	17					34	my $any = $ltrs . $gunk . $punc;
359
360	17					17	my $new;
361
362	17					1331	while (
363							$text =~ m{
364							\G # anchor to last match place
365							(.*?) # catch stuff before match in $1
366							\b # start at word boundary
367							( # BEGIN $2
368							$urls : # http:
369							(?![:/]) # negative lookahead for : or /
370							[$any]+? # followed by 1 or more allowed charact
371							) # END $2
372							(?= # look ahead after $2
373							[$punc]* # for 0 or more punctuation characters
374							(
375							[^$any] # followed by a non-URL character
376							\| \Z # or alternatively the end of the html
377							)
378							) # end of look ahead
379							}igcsox )
380							{
381	0					0	my ( $pre, $url ) = ( $1, $2 );
382	0					0	$new .= $pre;
383	0					0	$new .= "\{tag:xlink uri='" . $url . "'\}" . $url . "\{/tag:xlink\}";
384							}
385
386	17					68	$text =~ /\G(.*)/gcs;
387	17	50				71	$new .= $1 if defined $1;
388
389	17					53	return $new;
390							}
391
392							sub interior_sequence
393							{
394	31			31	0	77	my ( $parser, $seq_command, $seq_argument ) = @_;
395	31					147	my $fh = $parser->output_handle ();
396
397	31	50				239	if ( $seq_command eq 'C' )
		50
		50
		50
		50
		50
		100
		50
398							{
399	0					0	return "\{tag:code\}" . $seq_argument . "\{\/tag:code\}";
400							}
401							elsif ( $seq_command eq 'I' )
402							{
403	0					0	return "\{tag:emphasis\}" . $seq_argument . "\{\/tag:emphasis\}";
404							}
405							elsif ( $seq_command eq 'B' )
406							{
407	0					0	return "\{tag:strong\}" . $seq_argument . "\{\/tag:strong\}";
408							}
409							elsif ( $seq_command eq 'S' )
410							{
411	0					0	$seq_argument =~ s/ /\{char:160\}/g;
412
413	0					0	return $seq_argument;
414							}
415							elsif ( $seq_command eq 'F' )
416							{
417	0					0	return "\{tag:filename\}" . $seq_argument . "\{\/tag:filename\}";
418							}
419							elsif ( $seq_command eq 'X' )
420							{
421	0					0	return "\{tag:index\}" . $seq_argument . "\{\/tag:index\}";
422							}
423							elsif ( $seq_command eq 'L' )
424							{
425							# parse L<>, can be any of:
426							# L or L (other page or section in this page)
427							# L (item in a other page)
428							# L (section in other page)
429							# L<"sect"> (same as L)
430							# L (same as L)
431							# L (same as L)
432							# plus any of the above can be prefixed with text\| to use
433							# that text as the link text.
434
435							# Additionally, there can also be;
436							# L
437							# which SHOULD NOT be prepended label\|
438	17					31	$seq_argument =~ s/[\r\n]/ /g;
439	17					24	my $text = $seq_argument;
440
441	17	100				54	if ( $seq_argument =~ /^([^\|]+)\\|(.*)$/ )
442							{
443	7					15	$text = $1;
444	7					15	$seq_argument = $2;
445							}
446
447	17	100				80	if ( $seq_argument =~ /^[a-z]+:\//i )
		100
		100
448							{
449	4		33			12	$text \|\|= $seq_argument;
450							}
451							elsif ( $seq_argument =~ /^(.?)\/(.)$/ )
452							{
453							# name/ident or name/"sect"
454	8					18	my $ident_or_sect = $2;
455	8					14	$seq_argument = $1;
456
457	8	100				24	if ( $ident_or_sect =~ /^\"(.*)\"$/ )
458							{
459	4					9	my $sect = $1;
460	4					7	$sect = substr ( $sect, 0, 30 );
461	4					7	$sect =~ s/\s/_/g;
462	4					8	$seq_argument .= '#' . $sect;
463							}
464							else
465							{
466	4					9	$seq_argument .= '#' . $ident_or_sect;
467							}
468							}
469							elsif ( $seq_argument =~ /^\\?\"(.*)\"$/ )
470							{
471	2					4	my $sect = $1;
472	2					6	$sect = substr ( $sect, 0, 30 );
473	2					3	$sect =~ s/\s/_/g;
474	2					15	$seq_argument = '#' . $sect;
475							}
476
477	17					1370	return "\{tag:link xref='" . $seq_argument . "'\}" . $text .
478							"\{\/tag:link\}";
479							}
480							elsif ( $seq_command eq 'E' )
481							{
482							# E<> codes can be numerical!
483	14	100				63	if ( $seq_argument =~ m/^(0[0-7]+\|[0-9]+)$/ )
484							{
485							# it's octal, convert to decimal!
486	2	100				9	$seq_argument = oct ( $seq_argument ) if $seq_argument =~ m/^0/;
487
488							# convert to hex
489	2					13	$seq_argument = sprintf ( '0x%x', $seq_argument );
490							}
491
492	14	100				34	if ( $seq_argument =~ m/^0x([0-9A-Fa-f]{2,4})$/ )
493							{
494							# E<> is hex!
495	3					15	$seq_argument = "#x" . $1;
496							}
497							else
498							{
499							# if we know about this code then translate it into hex
500	11	100				33	if ( exists $HTML_Escapes { $seq_argument } )
501							{
502	1					3	$seq_argument = $HTML_Escapes { $seq_argument };
503							}
504							}
505
506							# probably a HTML escape code
507	14					29	$seq_argument = "{tag:escape ref='" . $seq_argument . "'}";
508
509	14					1030	return $seq_argument;
510							}
511							}
512
513							1;
514
515							__END__