File Coverage

blib/lib/PPI/Token/BOM.pm

Criterion	Covered	Total	%
statement	15	16	93.7
branch	4	6	66.6
condition			n/a
subroutine	3	3	100.0
pod			n/a
total	22	25	88.0

line	stmt	bran	sub	time	code
1					package PPI::Token::BOM;
2
3					=pod
4
5					=head1 NAME
6
7					PPI::Token::BOM - Tokens representing Unicode byte order marks
8
9					=head1 INHERITANCE
10
11					PPI::Token::BOM
12					isa PPI::Token
13					isa PPI::Element
14
15					=head1 DESCRIPTION
16
17					This is a special token in that it can only occur at the beginning of
18					documents. If a BOM byte mark occurs elsewhere in a file, it should
19					be treated as L. We recognize the byte order
20					marks identified at this URL:
21					L
22
23					UTF-32, big-endian 00 00 FE FF
24					UTF-32, little-endian FF FE 00 00
25					UTF-16, big-endian FE FF
26					UTF-16, little-endian FF FE
27					UTF-8 EF BB BF
28
29					Note that as of this writing, PPI only has support for UTF-8
30					(namely, in POD and strings) and no support for UTF-16 or UTF-32. We
31					support the BOMs of the latter two for completeness only.
32
33					The BOM is considered non-significant, like white space.
34
35					=head1 METHODS
36
37					There are no additional methods beyond those provided by the parent
38					L and L classes.
39
40					=cut
41
42	65		65	363	use strict;
	65			123
	65			1451
43	65		65	282	use PPI::Token ();
	65			107
	65			15732
44
45					our $VERSION = '1.276';
46
47					our @ISA = "PPI::Token";
48
49					sub significant() { '' }
50
51
52
53
54
55					#####################################################################
56					# Parsing Methods
57
58					my %bom_types = (
59					"\x00\x00\xfe\xff" => 'UTF-32',
60					"\xff\xfe\x00\x00" => 'UTF-32',
61					"\xfe\xff" => 'UTF-16',
62					"\xff\xfe" => 'UTF-16',
63					"\xef\xbb\xbf" => 'UTF-8',
64					);
65
66					sub __TOKENIZER__on_line_start {
67	16794		16794	21514	my $t = $_[1];
68	16794			22349	$_ = $t->{line};
69
70	16794	100		43092	if (m/^(\x00\x00\xfe\xff \| # UTF-32, big-endian
71					\xff\xfe\x00\x00 \| # UTF-32, little-endian
72					\xfe\xff \| # UTF-16, big-endian
73					\xff\xfe \| # UTF-16, little-endian
74					\xef\xbb\xbf) # UTF-8
75					/xs) {
76	2			22	my $bom = $1;
77
78	2	50		10	if ($bom_types{$bom} ne 'UTF-8') {
79	0			0	return $t->_error("$bom_types{$bom} is not supported");
80					}
81
82	2	50		8	$t->_new_token('BOM', $bom) or return undef;
83	2			5	$t->{line_cursor} += length $bom;
84					}
85
86					# Continue just as if there was no BOM
87	16794			22945	$t->{class} = 'PPI::Token::Whitespace';
88	16794			42785	return $t->{class}->__TOKENIZER__on_line_start($t);
89					}
90
91					1;
92
93					=pod
94
95					=head1 SUPPORT
96
97					See the L in the main module
98
99					=head1 AUTHOR
100
101					Chris Dolan Ecdolan@cpan.orgE
102
103					=head1 COPYRIGHT
104
105					Copyright 2001 - 2011 Adam Kennedy.
106
107					This program is free software; you can redistribute
108					it and/or modify it under the same terms as Perl itself.
109
110					The full text of the license can be found in the
111					LICENSE file included with this module.
112
113					=cut