File Coverage

blib/lib/PPI/Token/BOM.pm
Criterion Covered Total %
statement 15 16 93.7
branch 4 6 66.6
condition n/a
subroutine 3 3 100.0
pod n/a
total 22 25 88.0


line stmt bran cond sub pod time code
1             package PPI::Token::BOM;
2              
3             =pod
4              
5             =head1 NAME
6              
7             PPI::Token::BOM - Tokens representing Unicode byte order marks
8              
9             =head1 INHERITANCE
10              
11             PPI::Token::BOM
12             isa PPI::Token
13             isa PPI::Element
14              
15             =head1 DESCRIPTION
16              
17             This is a special token in that it can only occur at the beginning of
18             documents. If a BOM byte mark occurs elsewhere in a file, it should
19             be treated as L. We recognize the byte order
20             marks identified at this URL:
21             L
22              
23             UTF-32, big-endian 00 00 FE FF
24             UTF-32, little-endian FF FE 00 00
25             UTF-16, big-endian FE FF
26             UTF-16, little-endian FF FE
27             UTF-8 EF BB BF
28              
29             Note that as of this writing, PPI only has support for UTF-8
30             (namely, in POD and strings) and no support for UTF-16 or UTF-32. We
31             support the BOMs of the latter two for completeness only.
32              
33             The BOM is considered non-significant, like white space.
34              
35             =head1 METHODS
36              
37             There are no additional methods beyond those provided by the parent
38             L and L classes.
39              
40             =cut
41              
42 64     64   356 use strict;
  64         100  
  64         1412  
43 64     64   263 use PPI::Token ();
  64         91  
  64         15549  
44              
45             our $VERSION = '1.275';
46              
47             our @ISA = "PPI::Token";
48              
49             sub significant() { '' }
50              
51              
52              
53              
54              
55             #####################################################################
56             # Parsing Methods
57              
58             my %bom_types = (
59             "\x00\x00\xfe\xff" => 'UTF-32',
60             "\xff\xfe\x00\x00" => 'UTF-32',
61             "\xfe\xff" => 'UTF-16',
62             "\xff\xfe" => 'UTF-16',
63             "\xef\xbb\xbf" => 'UTF-8',
64             );
65              
66             sub __TOKENIZER__on_line_start {
67 16788     16788   21633 my $t = $_[1];
68 16788         22124 $_ = $t->{line};
69              
70 16788 100       43498 if (m/^(\x00\x00\xfe\xff | # UTF-32, big-endian
71             \xff\xfe\x00\x00 | # UTF-32, little-endian
72             \xfe\xff | # UTF-16, big-endian
73             \xff\xfe | # UTF-16, little-endian
74             \xef\xbb\xbf) # UTF-8
75             /xs) {
76 2         6 my $bom = $1;
77              
78 2 50       9 if ($bom_types{$bom} ne 'UTF-8') {
79 0         0 return $t->_error("$bom_types{$bom} is not supported");
80             }
81              
82 2 50       7 $t->_new_token('BOM', $bom) or return undef;
83 2         7 $t->{line_cursor} += length $bom;
84             }
85              
86             # Continue just as if there was no BOM
87 16788         22270 $t->{class} = 'PPI::Token::Whitespace';
88 16788         43549 return $t->{class}->__TOKENIZER__on_line_start($t);
89             }
90              
91             1;
92              
93             =pod
94              
95             =head1 SUPPORT
96              
97             See the L in the main module
98              
99             =head1 AUTHOR
100              
101             Chris Dolan Ecdolan@cpan.orgE
102              
103             =head1 COPYRIGHT
104              
105             Copyright 2001 - 2011 Adam Kennedy.
106              
107             This program is free software; you can redistribute
108             it and/or modify it under the same terms as Perl itself.
109              
110             The full text of the license can be found in the
111             LICENSE file included with this module.
112              
113             =cut