File Coverage

blib/lib/PPI/Token/BOM.pm
Criterion Covered Total %
statement 15 16 93.7
branch 4 6 66.6
condition n/a
subroutine 3 3 100.0
pod n/a
total 22 25 88.0


line stmt bran cond sub pod time code
1             package PPI::Token::BOM;
2              
3             =pod
4              
5             =head1 NAME
6              
7             PPI::Token::BOM - Tokens representing Unicode byte order marks
8              
9             =head1 INHERITANCE
10              
11             PPI::Token::BOM
12             isa PPI::Token
13             isa PPI::Element
14              
15             =head1 DESCRIPTION
16              
17             This is a special token in that it can only occur at the beginning of
18             documents. If a BOM byte mark occurs elsewhere in a file, it should
19             be treated as L. We recognize the byte order
20             marks identified at this URL:
21             L
22              
23             UTF-32, big-endian 00 00 FE FF
24             UTF-32, little-endian FF FE 00 00
25             UTF-16, big-endian FE FF
26             UTF-16, little-endian FF FE
27             UTF-8 EF BB BF
28              
29             Note that as of this writing, PPI only has support for UTF-8
30             (namely, in POD and strings) and no support for UTF-16 or UTF-32. We
31             support the BOMs of the latter two for completeness only.
32              
33             The BOM is considered non-significant, like white space.
34              
35             =head1 METHODS
36              
37             There are no additional methods beyond those provided by the parent
38             L and L classes.
39              
40             =cut
41              
42 65     65   363 use strict;
  65         123  
  65         1451  
43 65     65   282 use PPI::Token ();
  65         107  
  65         15732  
44              
45             our $VERSION = '1.276';
46              
47             our @ISA = "PPI::Token";
48              
49             sub significant() { '' }
50              
51              
52              
53              
54              
55             #####################################################################
56             # Parsing Methods
57              
58             my %bom_types = (
59             "\x00\x00\xfe\xff" => 'UTF-32',
60             "\xff\xfe\x00\x00" => 'UTF-32',
61             "\xfe\xff" => 'UTF-16',
62             "\xff\xfe" => 'UTF-16',
63             "\xef\xbb\xbf" => 'UTF-8',
64             );
65              
66             sub __TOKENIZER__on_line_start {
67 16794     16794   21514 my $t = $_[1];
68 16794         22349 $_ = $t->{line};
69              
70 16794 100       43092 if (m/^(\x00\x00\xfe\xff | # UTF-32, big-endian
71             \xff\xfe\x00\x00 | # UTF-32, little-endian
72             \xfe\xff | # UTF-16, big-endian
73             \xff\xfe | # UTF-16, little-endian
74             \xef\xbb\xbf) # UTF-8
75             /xs) {
76 2         22 my $bom = $1;
77              
78 2 50       10 if ($bom_types{$bom} ne 'UTF-8') {
79 0         0 return $t->_error("$bom_types{$bom} is not supported");
80             }
81              
82 2 50       8 $t->_new_token('BOM', $bom) or return undef;
83 2         5 $t->{line_cursor} += length $bom;
84             }
85              
86             # Continue just as if there was no BOM
87 16794         22945 $t->{class} = 'PPI::Token::Whitespace';
88 16794         42785 return $t->{class}->__TOKENIZER__on_line_start($t);
89             }
90              
91             1;
92              
93             =pod
94              
95             =head1 SUPPORT
96              
97             See the L in the main module
98              
99             =head1 AUTHOR
100              
101             Chris Dolan Ecdolan@cpan.orgE
102              
103             =head1 COPYRIGHT
104              
105             Copyright 2001 - 2011 Adam Kennedy.
106              
107             This program is free software; you can redistribute
108             it and/or modify it under the same terms as Perl itself.
109              
110             The full text of the license can be found in the
111             LICENSE file included with this module.
112              
113             =cut