File Coverage

blib/lib/PPI/Token/BOM.pm
Criterion Covered Total %
statement 15 16 93.7
branch 4 6 66.6
condition n/a
subroutine 3 3 100.0
pod n/a
total 22 25 88.0


line stmt bran cond sub pod time code
1             package PPI::Token::BOM;
2              
3             =pod
4              
5             =head1 NAME
6              
7             PPI::Token::BOM - Tokens representing Unicode byte order marks
8              
9             =head1 INHERITANCE
10              
11             PPI::Token::BOM
12             isa PPI::Token
13             isa PPI::Element
14              
15             =head1 DESCRIPTION
16              
17             This is a special token in that it can only occur at the beginning of
18             documents. If a BOM byte mark occurs elsewhere in a file, it should
19             be treated as L. We recognize the byte order
20             marks identified at this URL:
21             L
22              
23             UTF-32, big-endian 00 00 FE FF
24             UTF-32, little-endian FF FE 00 00
25             UTF-16, big-endian FE FF
26             UTF-16, little-endian FF FE
27             UTF-8 EF BB BF
28              
29             Note that as of this writing, PPI only has support for UTF-8
30             (namely, in POD and strings) and no support for UTF-16 or UTF-32. We
31             support the BOMs of the latter two for completeness only.
32              
33             The BOM is considered non-significant, like white space.
34              
35             =head1 METHODS
36              
37             There are no additional methods beyond those provided by the parent
38             L and L classes.
39              
40             =cut
41              
42 65     65   468 use strict;
  65         120  
  65         1834  
43 65     65   323 use PPI::Token ();
  65         156  
  65         19919  
44              
45             our $VERSION = '1.277';
46              
47             our @ISA = "PPI::Token";
48              
49             sub significant() { '' }
50              
51              
52              
53              
54              
55             #####################################################################
56             # Parsing Methods
57              
58             my %bom_types = (
59             "\x00\x00\xfe\xff" => 'UTF-32',
60             "\xff\xfe\x00\x00" => 'UTF-32',
61             "\xfe\xff" => 'UTF-16',
62             "\xff\xfe" => 'UTF-16',
63             "\xef\xbb\xbf" => 'UTF-8',
64             );
65              
66             sub __TOKENIZER__on_line_start {
67 16851     16851   24954 my $t = $_[1];
68 16851         27819 $_ = $t->{line};
69              
70 16851 100       52239 if (m/^(\x00\x00\xfe\xff | # UTF-32, big-endian
71             \xff\xfe\x00\x00 | # UTF-32, little-endian
72             \xfe\xff | # UTF-16, big-endian
73             \xff\xfe | # UTF-16, little-endian
74             \xef\xbb\xbf) # UTF-8
75             /xs) {
76 2         7 my $bom = $1;
77              
78 2 50       12 if ($bom_types{$bom} ne 'UTF-8') {
79 0         0 return $t->_error("$bom_types{$bom} is not supported");
80             }
81              
82 2 50       11 $t->_new_token('BOM', $bom) or return undef;
83 2         6 $t->{line_cursor} += length $bom;
84             }
85              
86             # Continue just as if there was no BOM
87 16851         28581 $t->{class} = 'PPI::Token::Whitespace';
88 16851         55183 return $t->{class}->__TOKENIZER__on_line_start($t);
89             }
90              
91             1;
92              
93             =pod
94              
95             =head1 SUPPORT
96              
97             See the L in the main module
98              
99             =head1 AUTHOR
100              
101             Chris Dolan Ecdolan@cpan.orgE
102              
103             =head1 COPYRIGHT
104              
105             Copyright 2001 - 2011 Adam Kennedy.
106              
107             This program is free software; you can redistribute
108             it and/or modify it under the same terms as Perl itself.
109              
110             The full text of the license can be found in the
111             LICENSE file included with this module.
112              
113             =cut