File Coverage

blib/lib/HTML/HTML5/Parser/Charset/DecodeHandle.pm

Criterion	Covered	Total	%
statement	96	578	16.6
branch	36	344	10.4
condition	1	169	0.5
subroutine	11	34	32.3
pod	0	3	0.0
total	144	1128	12.7

line	stmt	bran	cond	sub	pod	time	code
1							package HTML::HTML5::Parser::Charset::DecodeHandle;
2							## skip Test::Tabs
3	9			9		76	use strict;
	9					20
	9					294
4	9			9		53	use warnings;
	9					71
	9					407
5
6							our $VERSION = '0.991';
7
8							## NOTE: \|Message::Charset::Info\| uses this module without calling
9							## the constructor.
10	9			9		59	use HTML::HTML5::Parser::Charset::Info;
	9					19
	9					1562
11
12							my $XML_AUTO_CHARSET = q;
13							my $IANA_CHARSET = q;
14							my $PERL_CHARSET = q;
15							my $XML_CHARSET = q;
16
17							## ->create_decode_handle ($charset_uri, $byte_stream, $onerror)
18							sub create_decode_handle ($$$;$) {
19	0			0	0	0	my $csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$_[1]};
20							my $obj = {
21							category => 0,
22							char_buffer => \(my $s = ''),
23							char_buffer_pos => 0,
24							character_queue => [],
25							filehandle => $_[2],
26							charset => $_[1],
27							byte_buffer => '',
28				0			onerror => $_[3] \|\| sub {},
29							#onerror_set
30	0		0			0	};
31	0	0	0			0	if ($csdef->{uri}->{$XML_AUTO_CHARSET} or
		0
		0
32							$obj->{charset} eq $XML_AUTO_CHARSET) {
33	0					0	my $b = ''; # UTF-8 w/o BOM
34	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
35	0					0	$obj->{input_encoding} = 'UTF-8';
36	0	0				0	if (read $obj->{filehandle}, $b, 256) {
37	9			9		68	no warnings "substr";
	9					18
	9					498
38	9			9		53	no warnings "uninitialized";
	9					24
	9					62547
39	0	0				0	if (substr ($b, 0, 1) eq "<") {
		0
		0
		0
		0
		0
40	0	0				0	if (substr ($b, 1, 1) eq "?") { # ASCII8
		0
41	0	0				0	if ($b =~ /^<\?xml\s+(?:version\s=\s["'][^"']["']\s)?
42							encoding\s=\s["']([^"']*)/x) {
43	0					0	$obj->{input_encoding} = $1;
44	0					0	my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
45	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
46	0	0	0			0	if (not $csdef->{ascii8} or $csdef->{bom_required}) {
47							$obj->{onerror}->(undef, 'charset-name-mismatch-error',
48							charset_uri => $uri,
49	0					0	charset_name => $obj->{input_encoding});
50							}
51							} else {
52	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
53	0					0	$obj->{input_encoding} = 'UTF-8';
54							}
55	0	0				0	if (defined $csdef->{no_bom_variant}) {
56	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant}};
57							}
58							} elsif (substr ($b, 1, 1) eq "\x00") {
59	0	0				0	if (substr ($b, 2, 2) eq "?\x00") { # ASCII16LE
		0
60	0					0	my $c = $b; $c =~ tr/\x00//d;
	0					0
61	0	0				0	if ($c =~ /^<\?xml\s+(?:version\s=\s["'][^"']["']\s)?
62							encoding\s=\s["']([^"']*)/x) {
63	0					0	$obj->{input_encoding} = $1;
64	0					0	my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
65	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
66	0	0	0			0	if (not $csdef->{ascii16} or $csdef->{ascii16be} or
			0
67							$csdef->{bom_required}) {
68							$obj->{onerror}->(undef, 'charset-name-mismatch-error',
69							charset_uri => $uri,
70	0					0	charset_name => $obj->{input_encoding});
71							}
72							} else {
73	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
74	0					0	$obj->{input_encoding} = 'UTF-8';
75							}
76	0	0				0	if (defined $csdef->{no_bom_variant16le}) {
77	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant16le}};
78							}
79							} elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321
80	0					0	my $c = $b; $c =~ tr/\x00//d;
	0					0
81	0	0				0	if ($c =~ /^<\?xml\s+(?:version\s=\s["'][^"']["']\s)?
82							encoding\s=\s["']([^"']*)/x) {
83	0					0	$obj->{input_encoding} = $1;
84	0					0	my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
85	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
86	0	0	0			0	if (not $csdef->{ascii32} or
			0
			0
			0
87							$csdef->{ascii32endian1234} or
88							$csdef->{ascii32endian2143} or
89							$csdef->{ascii32endian3412} or
90							$csdef->{bom_required}) {
91							$obj->{onerror}->(undef, 'charset-name-mismatch-error',
92							charset_uri => $uri,
93	0					0	charset_name => $obj->{input_encoding});
94							}
95							} else {
96	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
97	0					0	$obj->{input_encoding} = 'UTF-8';
98							}
99	0	0				0	if (defined $csdef->{no_bom_variant32endian4321}) {
100	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}};
101							}
102							}
103							}
104							} elsif (substr ($b, 0, 3) eq "\xEF\xBB\xBF") { # UTF8
105	0					0	$obj->{has_bom} = 1;
106	0					0	substr ($b, 0, 3) = '';
107	0					0	my $c = $b;
108	0	0				0	if ($c =~ /^<\?xml\s+(?:version\s=\s["'][^"']["']\s)?
109							encoding\s=\s["']([^"']*)/x) {
110	0					0	$obj->{input_encoding} = $1;
111	0					0	my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
112	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
113	0	0	0			0	if (not $csdef->{utf8_encoding_scheme} or
114							not $csdef->{bom_allowed}) {
115							$obj->{onerror}->(undef, 'charset-name-mismatch-error',
116							charset_uri => $uri,
117	0					0	charset_name => $obj->{input_encoding});
118							}
119							} else {
120	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
121	0					0	$obj->{input_encoding} = 'UTF-8';
122							}
123	0	0				0	if (defined $csdef->{no_bom_variant}) {
124	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant}};
125							}
126							} elsif (substr ($b, 0, 2) eq "\x00<") {
127	0	0				0	if (substr ($b, 2, 2) eq "\x00?") { # ASCII16BE
		0
128	0					0	my $c = $b; $c =~ tr/\x00//d;
	0					0
129	0	0				0	if ($c =~ /^<\?xml\s+(?:version\s=\s["'][^"']["']\s)?
130							encoding\s=\s["']([^"']*)/x) {
131	0					0	$obj->{input_encoding} = $1;
132	0					0	my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
133	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
134	0	0	0			0	if (not $csdef->{ascii16} or $csdef->{ascii16le} or
			0
135							$csdef->{bom_required}) {
136							$obj->{onerror}->(undef, 'charset-name-mismatch-error',
137							charset_uri => $uri,
138	0					0	charset_name => $obj->{input_encoding});
139							}
140							} else {
141	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
142	0					0	$obj->{input_encoding} = 'UTF-8';
143							}
144	0	0				0	if (defined $csdef->{no_bom_variant16be}) {
145	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant16be}};
146							}
147							} elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412
148	0					0	my $c = $b; $c =~ tr/\x00//d;
	0					0
149	0	0				0	if ($c =~ /^<\?xml\s+(?:version\s=\s["'][^"']["']\s)?
150							encoding\s=\s["']([^"']*)/x) {
151	0					0	$obj->{input_encoding} = $1;
152	0					0	my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
153	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
154	0	0	0			0	if (not $csdef->{ascii32} or
			0
			0
			0
155							$csdef->{ascii32endian1234} or
156							$csdef->{ascii32endian2143} or
157							$csdef->{ascii32endian4321} or
158							$csdef->{bom_required}) {
159							$obj->{onerror}->(undef, 'charset-name-mismatch-error',
160							charset_uri => $uri,
161	0					0	charset_name => $obj->{input_encoding});
162							}
163							} else {
164	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
165	0					0	$obj->{input_encoding} = 'UTF-8';
166							}
167	0	0				0	if (defined $csdef->{no_bom_variant32endian3412}) {
168	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}};
169							}
170							}
171							} elsif (substr ($b, 0, 2) eq "\xFE\xFF") {
172	0	0				0	if (substr ($b, 2, 2) eq "\x00<") { # ASCII16BE
		0
173	0					0	$obj->{has_bom} = 1;
174	0					0	substr ($b, 0, 2) = '';
175	0					0	my $c = $b; $c =~ tr/\x00//d;
	0					0
176	0	0				0	if ($c =~ /^<\?xml\s+(?:version\s=\s["'][^"']["']\s)?
177							encoding\s=\s["']([^"']*)/x) {
178	0					0	$obj->{input_encoding} = $1;
179	0					0	my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
180	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
181	0	0	0			0	if (not $csdef->{ascii16} or
			0
182							$csdef->{ascii16le} or
183							not $csdef->{bom_allowed}) {
184							$obj->{onerror}->(undef, 'charset-name-mismatch-error',
185							charset_uri => $uri,
186	0					0	charset_name => $obj->{input_encoding});
187							}
188							} else {
189	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
190	0					0	$obj->{input_encoding} = 'UTF-16';
191							}
192	0	0				0	if (defined $csdef->{no_bom_variant16be}) {
193	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant16be}};
194							}
195							} elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian3412
196	0					0	$obj->{has_bom} = 1;
197	0					0	substr ($b, 0, 4) = '';
198	0					0	my $c = $b; $c =~ tr/\x00//d;
	0					0
199	0	0				0	if ($c =~ /^<\?xml\s+(?:version\s=\s["'][^"']["']\s)?
200							encoding\s=\s["']([^"']*)/x) {
201	0					0	$obj->{input_encoding} = $1;
202	0					0	my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
203	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
204	0	0	0			0	if (not $csdef->{ascii32} or
			0
			0
			0
205							$csdef->{ascii32endian1234} or
206							$csdef->{ascii32endian2143} or
207							$csdef->{ascii32endian4321} or
208							not $csdef->{bom_allowed}) {
209							$obj->{onerror}->(undef, 'charset-name-mismatch-error',
210							charset_uri => $uri,
211	0					0	charset_name => $obj->{input_encoding});
212							}
213							} else {
214	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
215	0					0	$obj->{input_encoding} = 'UTF-16';
216	0					0	$obj->{byte_buffer} .= "\x00\x00";
217							}
218	0	0				0	if (defined $csdef->{no_bom_variant32endian3412}) {
219	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian3412}};
220							}
221							} else {
222	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
223	0					0	$obj->{input_encoding} = 'UTF-16';
224	0					0	substr ($b, 0, 2) = '';
225	0					0	$obj->{has_bom} = 1;
226							}
227							} elsif (substr ($b, 0, 2) eq "\xFF\xFE") {
228	0	0				0	if (substr ($b, 2, 2) eq "<\x00") { # ASCII16LE
		0
229	0					0	$obj->{has_bom} = 1;
230	0					0	substr ($b, 0, 2) = '';
231	0					0	my $c = $b; $c =~ tr/\x00//d;
	0					0
232	0	0				0	if ($c =~ /^<\?xml\s+(?:version\s=\s["'][^"']["']\s)?
233							encoding\s=\s["']([^"']*)/x) {
234	0					0	$obj->{input_encoding} = $1;
235	0					0	my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
236	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
237	0	0	0			0	if (not $csdef->{ascii16} or
			0
238							$csdef->{ascii16be} or
239							not $csdef->{bom_allowed}) {
240							$obj->{onerror}->(undef, 'charset-name-mismatch-error',
241							charset_uri => $uri,
242	0					0	charset_name => $obj->{input_encoding});
243							}
244							} else {
245	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
246	0					0	$obj->{input_encoding} = 'UTF-16';
247							}
248	0	0				0	if (defined $csdef->{no_bom_variant16le}) {
249	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant16le}};
250							}
251							} elsif (substr ($b, 2, 2) eq "\x00\x00") { # ASCII32Endian4321
252	0					0	$obj->{has_bom} = 1;
253	0					0	substr ($b, 0, 4) = '';
254	0					0	my $c = $b; $c =~ tr/\x00//d;
	0					0
255	0	0				0	if ($c =~ /^<\?xml\s+(?:version\s=\s["'][^"']["']\s)?
256							encoding\s=\s["']([^"']*)/x) {
257	0					0	$obj->{input_encoding} = $1;
258	0					0	my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
259	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
260	0	0	0			0	if (not $csdef->{ascii32} or
			0
			0
			0
261							$csdef->{ascii32endian1234} or
262							$csdef->{ascii32endian2143} or
263							$csdef->{ascii32endian3412} or
264							not $csdef->{bom_allowed}) {
265							$obj->{onerror}->(undef, 'charset-name-mismatch-error',
266							charset_uri => $uri,
267	0					0	charset_name => $obj->{input_encoding});
268							}
269							} else {
270	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
271	0					0	$obj->{input_encoding} = 'UTF-16';
272	0					0	$obj->{byte_buffer} .= "\x00\x00";
273							}
274	0	0				0	if (defined $csdef->{no_bom_variant32endian4321}) {
275	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian4321}};
276							}
277							} else {
278	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
279	0					0	$obj->{input_encoding} = 'UTF-16';
280	0					0	substr ($b, 0, 2) = '';
281	0					0	$obj->{has_bom} = 1;
282							}
283							} elsif (substr ($b, 0, 2) eq "\x00\x00") {
284	0	0				0	if (substr ($b, 2, 2) eq "\x00<") { # ASCII32Endian1234
		0
		0
		0
285	0					0	my $c = $b; $c =~ tr/\x00//d;
	0					0
286	0	0				0	if ($c =~ /^<\?xml\s+(?:version\s=\s["'][^"']["']\s)?
287							encoding\s=\s["']([^"']*)/x) {
288	0					0	$obj->{input_encoding} = $1;
289	0					0	my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
290	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
291	0	0	0			0	if (not $csdef->{ascii32} or
			0
			0
			0
292							$csdef->{ascii32endian2143} or
293							$csdef->{ascii32endian3412} or
294							$csdef->{ascii32endian4321} or
295							$csdef->{bom_required}) {
296							$obj->{onerror}->(undef, 'charset-name-mismatch-error',
297							charset_uri => $uri,
298	0					0	charset_name => $obj->{input_encoding});
299							}
300							} else {
301	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
302	0					0	$obj->{input_encoding} = 'UTF-8';
303							}
304	0	0				0	if (defined $csdef->{no_bom_variant32endian1234}) {
305	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}};
306							}
307							} elsif (substr ($b, 2, 2) eq "<\x00") { # ASCII32Endian2143
308	0					0	my $c = $b; $c =~ tr/\x00//d;
	0					0
309	0	0				0	if ($c =~ /^<\?xml\s+(?:version\s=\s["'][^"']["']\s)?
310							encoding\s=\s["']([^"']*)/x) {
311	0					0	$obj->{input_encoding} = $1;
312	0					0	my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
313	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
314	0	0	0			0	if (not $csdef->{ascii32} or
			0
			0
			0
315							$csdef->{ascii32endian1234} or
316							$csdef->{ascii32endian3412} or
317							$csdef->{ascii32endian4321} or
318							$csdef->{bom_required}) {
319							$obj->{onerror}->(undef, 'charset-name-mismatch-error',
320							charset_uri => $uri,
321	0					0	charset_name => $obj->{input_encoding});
322							}
323							} else {
324	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
325	0					0	$obj->{input_encoding} = 'UTF-8';
326							}
327	0	0				0	if (defined $csdef->{no_bom_variant32endian2143}) {
328	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}};
329							}
330							} elsif (substr ($b, 2, 2) eq "\xFE\xFF") { # ASCII32Endian1234
331	0					0	$obj->{has_bom} = 1;
332	0					0	substr ($b, 0, 4) = '';
333	0					0	my $c = $b; $c =~ tr/\x00//d;
	0					0
334	0	0				0	if ($c =~ /^<\?xml\s+(?:version\s=\s["'][^"']["']\s)?
335							encoding\s=\s["']([^"']*)/x) {
336	0					0	$obj->{input_encoding} = $1;
337	0					0	my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
338	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
339	0	0	0			0	if (not $csdef->{ascii32} or
			0
			0
			0
340							$csdef->{ascii32endian2143} or
341							$csdef->{ascii32endian3412} or
342							$csdef->{ascii32endian4321} or
343							$csdef->{bom_required}) {
344							$obj->{onerror}->(undef, 'charset-name-mismatch-error',
345							charset_uri => $uri,
346	0					0	charset_name => $obj->{input_encoding});
347							}
348							} else {
349	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
350	0					0	$obj->{input_encoding} = 'UTF-8';
351	0					0	$obj->{has_bom} = 0;
352	0					0	$obj->{byte_buffer} .= "\x00\x00\xFE\xFF";
353							}
354	0	0				0	if (defined $csdef->{no_bom_variant32endian1234}) {
355	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian1234}};
356							}
357							} elsif (substr ($b, 2, 2) eq "\xFF\xFE") { # ASCII32Endian2143
358	0					0	$obj->{has_bom} = 1;
359	0					0	substr ($b, 0, 4) = '';
360	0					0	my $c = $b; $c =~ tr/\x00//d;
	0					0
361	0	0				0	if ($c =~ /^<\?xml\s+(?:version\s=\s["'][^"']["']\s)?
362							encoding\s=\s["']([^"']*)/x) {
363	0					0	$obj->{input_encoding} = $1;
364	0					0	my $uri = name_to_uri (undef, 'xml', $obj->{input_encoding});
365	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
366	0	0	0			0	if (not $csdef->{ascii32} or
			0
			0
			0
367							$csdef->{ascii32endian1234} or
368							$csdef->{ascii32endian3412} or
369							$csdef->{ascii32endian4321} or
370							$csdef->{bom_required}) {
371							$obj->{onerror}->(undef, 'charset-name-mismatch-error',
372							charset_uri => $uri,
373	0					0	charset_name => $obj->{input_encoding});
374							}
375							} else {
376	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'};
377	0					0	$obj->{input_encoding} = 'UTF-8';
378	0					0	$obj->{has_bom} = 0;
379	0					0	$obj->{byte_buffer} .= "\x00\x00\xFF\xFE";
380							}
381	0	0				0	if (defined $csdef->{no_bom_variant32endian2143}) {
382	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$csdef->{no_bom_variant32endian2143}};
383							}
384							}
385							# \x4C\x6F\xA7\x94 EBCDIC
386							} # buffer
387	0					0	$obj->{byte_buffer} .= $b;
388							} # read
389							} elsif ($csdef->{uri}->{$XML_CHARSET.'utf-8'}) {
390							## BOM is optional.
391	0					0	my $b = '';
392	0	0				0	if (read $obj->{filehandle}, $b, 3) {
393	0	0				0	if ($b eq "\xEF\xBB\xBF") {
394	0					0	$obj->{has_bom} = 1;
395							} else {
396	0					0	$obj->{byte_buffer} .= $b;
397							}
398							}
399	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-8'}; # UTF-8 w/o BOM
400							} elsif ($csdef->{uri}->{$XML_CHARSET.'utf-16'}) {
401							## BOM is mandated.
402	0					0	my $b = '';
403	0	0				0	if (read $obj->{filehandle}, $b, 2) {
404	0	0				0	if ($b eq "\xFE\xFF") {
		0
405	0					0	$obj->{has_bom} = 1; # UTF-16BE w/o BOM
406	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
407							} elsif ($b eq "\xFF\xFE") {
408	0					0	$obj->{has_bom} = 1; # UTF-16LE w/o BOM
409	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16le'};
410							} else {
411	0					0	$obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset});
412	0					0	$obj->{has_bom} = 0;
413	0					0	$obj->{byte_buffer} .= $b; # UTF-16BE w/o BOM
414	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
415							}
416							} else {
417	0					0	$obj->{onerror}->(undef, 'no-bom-error', charset_uri => $obj->{charset});
418	0					0	$obj->{has_bom} = 0; # UTF-16BE w/o BOM
419	0					0	$csdef = $HTML::HTML5::Parser::Charset::CharsetDef->{$PERL_CHARSET.'utf-16be'};
420							}
421							}
422
423	0	0				0	if ($csdef->{uri}->{$XML_CHARSET.'iso-2022-jp'}) {
		0
		0
424	0					0	$obj->{state_2440} = 'gl-jis-1997-swapped';
425	0					0	$obj->{state_2442} = 'gl-jis-1997';
426	0					0	$obj->{state} = 'state_2842';
427	0					0	require Encode::GLJIS1997Swapped;
428	0					0	require Encode::GLJIS1997;
429	0	0	0			0	if (Encode::find_encoding ($obj->{state_2440}) and
430							Encode::find_encoding ($obj->{state_2442})) {
431	0					0	return bless $obj, 'HTML::HTML5::Parser::Charset::DecodeHandle::ISO2022JP';
432							}
433							} elsif ($csdef->{uri}->{$IANA_CHARSET.'iso-2022-jp'}) {
434	0					0	$obj->{state_2440} = 'gl-jis-1978';
435	0					0	$obj->{state_2442} = 'gl-jis-1983';
436	0					0	$obj->{state} = 'state_2842';
437	0					0	require Encode::GLJIS1978;
438	0					0	require Encode::GLJIS1983;
439	0	0	0			0	if (Encode::find_encoding ($obj->{state_2440}) and
440							Encode::find_encoding ($obj->{state_2442})) {
441	0					0	return bless $obj, 'HTML::HTML5::Parser::Charset::DecodeHandle::ISO2022JP';
442							}
443							} elsif (defined $csdef->{perl_name}->[0]) {
444	0	0	0			0	if ($csdef->{uri}->{$XML_CHARSET.'euc-jp'} or
		0	0
		0
445							$csdef->{uri}->{$IANA_CHARSET.'euc-jp'}) {
446	0					0	$obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
447	0					0	require Encode::EUCJP1997;
448	0	0				0	if (Encode::find_encoding ($obj->{perl_encoding_name})) {
449	0					0	$obj->{category} \|= HTML::HTML5::Parser::Charset::Info::CHARSET_CATEGORY_EUCJP;
450	0					0	return bless $obj, 'HTML::HTML5::Parser::Charset::DecodeHandle::Encode';
451							}
452							} elsif ($csdef->{uri}->{$XML_CHARSET.'shift_jis'} or
453							$csdef->{uri}->{$IANA_CHARSET.'shift_jis'}) {
454	0					0	$obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
455	0					0	require Encode::ShiftJIS1997;
456	0	0				0	if (Encode::find_encoding ($obj->{perl_encoding_name})) {
457	0					0	return bless $obj, 'HTML::HTML5::Parser::Charset::DecodeHandle::Encode';
458							}
459							} elsif ($csdef->{is_block_safe}) {
460	0					0	$obj->{perl_encoding_name} = $csdef->{perl_name}->[0];
461	0					0	require Encode;
462	0	0				0	if (Encode::find_encoding ($obj->{perl_encoding_name})) {
463	0					0	return bless $obj, 'HTML::HTML5::Parser::Charset::DecodeHandle::Encode';
464							}
465							}
466							}
467
468							$obj->{onerror}->(undef, 'charset-not-supported-error',
469	0					0	charset_uri => $obj->{charset});
470	0					0	return undef;
471							} # create_decode_handle
472
473							sub name_to_uri ($$$) {
474	0			0	0	0	my $domain = $_[1];
475	0					0	my $name = lc $_[2];
476
477	0	0				0	if ($domain eq 'ietf') {
		0
478	0					0	return $IANA_CHARSET . $name;
479							} elsif ($domain eq 'xml') {
480	0	0				0	if ({
481							'utf-8' => 1,
482							'utf-16' => 1,
483							'iso-10646-ucs-2' => 1,
484							'iso-10646-ucs-4' => 1,
485							'iso-8859-1' => 1,
486							'iso-8859-2' => 1,
487							'iso-8859-3' => 1,
488							'iso-8859-4' => 1,
489							'iso-8859-5' => 1,
490							'iso-8859-6' => 1,
491							'iso-8859-7' => 1,
492							'iso-8859-8' => 1,
493							'iso-8859-9' => 1,
494							'iso-8859-10' => 1,
495							'iso-8859-11' => 1,
496							'iso-8859-13' => 1,
497							'iso-8859-14' => 1,
498							'iso-8859-15' => 1,
499							'iso-8859-16' => 1,
500							'iso-2022-jp' => 1,
501							'shift_jis' => 1,
502							'euc-jp' => 1,
503							}->{$name}) {
504	0					0	return $XML_CHARSET . $name;
505							}
506
507	0					0	my $uri = $IANA_CHARSET . $name;
508	0	0				0	return $uri if $HTML::HTML5::Parser::Charset::CharsetDef->{$uri};
509
510	0					0	return $XML_CHARSET . $name;
511							} else {
512	0					0	return undef;
513							}
514							} # name_to_uri
515
516							sub uri_to_name ($$$) {
517	0			0	0	0	my (undef, $domain, $uri) = @_;
518
519	0	0				0	if ($domain eq 'xml') {
520	0					0	my $v = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri}->{xml_name};
521	0	0				0	return $v if defined $v;
522
523	0	0				0	if (substr ($uri, 0, length $XML_CHARSET) eq $XML_CHARSET) {
524	0					0	return substr ($uri, length $XML_CHARSET);
525							}
526
527	0					0	$domain = 'ietf'; ## TODO: XML encoding name has smaller range
528							}
529
530	0	0				0	if ($domain eq 'ietf') {
531	0					0	my $v = $HTML::HTML5::Parser::Charset::CharsetDef->{$uri}->{iana_name};
532	0	0				0	return $v->[0] if defined $v;
533
534	0	0				0	if (substr ($uri, 0, length $IANA_CHARSET) eq $IANA_CHARSET) {
535	0					0	return substr ($uri, length $IANA_CHARSET);
536							}
537							}
538
539	0					0	return undef;
540							} # uri_to_name
541
542							require IO::Handle;
543
544							package HTML::HTML5::Parser::Charset::DecodeHandle::ByteBuffer;
545
546							## NOTE: Provides a byte buffer wrapper object.
547
548							sub new ($$) {
549	708			708		2180	my $self = bless {
550							buffer => '',
551							}, shift;
552	708					1548	$self->{filehandle} = shift;
553	708					1532	return $self;
554							} # new
555
556							sub read {
557	2589			2589		4194	my $self = shift;
558	2589					4282	my $pos = length $self->{buffer};
559	2589					8367	my $r = $self->{filehandle}->read ($self->{buffer}, $_[1], $pos);
560	2589					22585	substr ($_[0], $_[2]) = substr ($self->{buffer}, $pos);
561							## NOTE: This would do different behavior from Perl's standard
562							## \|read\| when $pos points beyond the end of the string.
563	2589					6146	return $r;
564							} # read
565
566	0			0		0	sub close { $_[0]->{filehandle}->close }
567
568							package HTML::HTML5::Parser::Charset::DecodeHandle::CharString;
569
570							## NOTE: Same as Perl's standard \|open $handle, '<', \$char_string\|,
571							## but supports \|ungetc\| and other extensions.
572
573							sub new ($$) {
574	0			0		0	my $self = bless {pos => 0}, shift;
575	0					0	$self->{string} = shift; # must be a scalar ref
576	0					0	return $self;
577							} # new
578
579							sub getc ($) {
580	0			0		0	my $self = shift;
581	0	0				0	if ($self->{pos} < length ${$self->{string}}) {
	0					0
582	0					0	return substr ${$self->{string}}, $self->{pos}++, 1;
	0					0
583							} else {
584	0					0	return undef;
585							}
586							} # getc
587
588							sub read ($$$$) {
589							#my ($self, $scalar, $length, $offset) = @_;
590	0			0		0	my $self = $_[0];
591	0		0			0	my $length = $_[2] \|\| 0;
592	0		0			0	my $offset = $_[3] \|\| 0;
593							## NOTE: We don't support standard Perl semantics if $offset is
594							## greater than the length of $scalar.
595	0					0	substr ($_[1], $offset) = substr (${$self->{string}}, $self->{pos}, $length);
	0					0
596	0					0	my $count = (length $_[1]) - $offset;
597	0					0	$self->{pos} += $count;
598	0					0	return $count;
599							} # read
600
601							sub manakai_read_until ($$$;$) {
602							#my ($self, $scalar, $pattern, $offset) = @_;
603	0			0		0	my $self = $_[0];
604	0					0	pos (${$self->{string}}) = $self->{pos};
	0					0
605	0	0				0	if (${$self->{string}} =~ /\G(?>$_[2])+/) {
	0					0
606	0					0	substr ($_[1], $_[3]) = substr (${$self->{string}}, $-[0], $+[0] - $-[0]);
	0					0
607	0					0	$self->{pos} += $+[0] - $-[0];
608	0					0	return $+[0] - $-[0];
609							} else {
610	0					0	return 0;
611							}
612							} # manakai_read_until
613
614							sub ungetc ($$) {
615	0			0		0	my $self = shift;
616							## Ignore second parameter.
617	0	0				0	$self->{pos}-- if $self->{pos} > 0;
618							} # ungetc
619
620				0			sub close ($) { }
621
622				0			sub onerror ($;$) { }
623
624							package HTML::HTML5::Parser::Charset::DecodeHandle::Encode;
625
626							## NOTE: Provides a Perl \|Encode\| module wrapper object.
627
628	0			0		0	sub charset ($) { $_[0]->{charset} }
629
630	0			0		0	sub close ($) { $_[0]->{filehandle}->close }
631
632							sub getc ($) {
633	0			0		0	my $c = '';
634	0					0	my $l = $_[0]->read ($c, 1);
635	0	0				0	if ($l) {
636	0					0	return $c;
637							} else {
638	0					0	return undef;
639							}
640							} # getc
641
642							sub read ($$$;$) {
643	2740			2740		4286	my $self = $_[0];
644							#my $scalar = $_[1];
645	2740					4525	my $length = $_[2];
646	2740		50			8862	my $offset = $_[3] \|\| 0;
647	2740					4006	my $count = 0;
648	2740					4041	my $eof;
649							## NOTE: It is incompatible with the standard Perl semantics
650							## if $offset is greater than the length of $scalar.
651
652							A: {
653	2740	50				4437	return $count if $length < 1;
	5340					11386
654
655	5340	100				7971	if (my $l = (length ${$self->{char_buffer}}) - $self->{char_buffer_pos}) {
	5340					14483
656	1227	100				3185	if ($l >= $length) {
657							substr ($_[1], $offset)
658	144					457	= substr (${$self->{char_buffer}}, $self->{char_buffer_pos},
659	144					226	$length);
660	144					260	$count += $length;
661	144					238	$self->{char_buffer_pos} += $length;
662	144					225	$length = 0;
663	144					416	return $count;
664							} else {
665							substr ($_[1], $offset)
666	1083					1602	= substr (${$self->{char_buffer}}, $self->{char_buffer_pos});
	1083					3875
667	1083					2159	$count += $l;
668	1083					1567	$length -= $l;
669	1083					2053	${$self->{char_buffer}} = '';
	1083					2670
670	1083					1896	$self->{char_buffer_pos} = 0;
671							}
672	1083					1892	$offset = length $_[1];
673							}
674
675	5196	100				10577	if ($eof) {
676	2596					6361	return $count;
677							}
678
679	2600					4047	my $error;
680	2600	50				7937	if ($self->{continue}) {
		100
681	0	0				0	if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
682							length $self->{byte_buffer})) {
683							#
684							} else {
685	0					0	$error = 1;
686							}
687	0					0	$self->{continue} = 0;
688							} elsif (512 > length $self->{byte_buffer}) {
689	2598	50				7550	if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
690							length $self->{byte_buffer})) {
691							#
692							} else {
693	2598					4241	$eof = 1;
694							}
695							}
696
697	2600	50				5904	unless ($error) {
698	2600	100				6333	if (not $self->{bom_checked}) {
699	711	100				1566	if (defined $self->{bom_pattern}) {
700	709	50				4205	if ($self->{byte_buffer} =~ s/^$self->{bom_pattern}//) {
701	0					0	$self->{has_bom} = 1;
702							}
703							}
704	711					2026	$self->{bom_checked} = 1;
705							}
706	2600					4046	my $string = do {
707	2600	0		0		18211	local $SIG{__WARN__} = sub { warn $_[0] unless $_[0] =~ /^Code point/ };
	0					0
708							Encode::decode (
709							$self->{perl_encoding_name},
710							$self->{byte_buffer},
711	2600					16013	Encode::FB_QUIET(),
712							);
713							};
714	2600	100				74968	if (length $string) {
715	711					1822	$self->{char_buffer} = \$string;
716	711					1308	$self->{char_buffer_pos} = 0;
717	711	50				2003	if (length $self->{byte_buffer}) {
718	0					0	$self->{continue} = 1;
719							}
720							} else {
721	1889	50				4452	if (length $self->{byte_buffer}) {
722	0					0	$error = 1;
723							} else {
724							## NOTE: No further input.
725	1889					4313	redo A;
726							}
727							}
728							}
729
730	711	50				1425	if ($error) {
731	0					0	my $r = substr $self->{byte_buffer}, 0, 1, '';
732	0					0	my $fallback;
733	0					0	my $etype = 'illegal-octets-error';
734	0					0	my %earg;
735	0	0				0	if ($self->{category}
		0
736							& HTML::HTML5::Parser::Charset::Info::CHARSET_CATEGORY_SJIS) {
737	0	0				0	if ($r =~ /^[\x81-\x9F\xE0-\xFC]/) {
		0
738	0	0				0	if ($self->{byte_buffer} =~ s/(.)//s) {
739	0					0	$r .= $1; # not limited to \x40-\xFC - \x7F
740	0					0	$etype = 'unassigned-code-point-error';
741							}
742							## NOTE: Range [\xF0-\xFC] is unassigned and may be used as a
743							## single-byte character or as the first-byte of a double-byte
744							## character, according to JIS X 0208:1997 Appendix 1. However, the
745							## current practice is using the range as first-bytes of double-byte
746							## characters.
747							} elsif ($r =~ /^[\x80\xA0\xFD-\xFF]/) {
748	0					0	$etype = 'unassigned-code-point-error';
749							}
750							} elsif ($self->{category}
751							& HTML::HTML5::Parser::Charset::Info::CHARSET_CATEGORY_EUCJP) {
752	0	0	0			0	if ($r =~ /^[\xA1-\xFE]/) {
		0
		0
		0
753	0	0				0	if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) {
754	0					0	$r .= $1;
755	0					0	$etype = 'unassigned-code-point-error';
756							}
757							} elsif ($r eq "\x8F") {
758	0	0				0	if ($self->{byte_buffer} =~ s/^([\xA1-\xFE][\xA1-\xFE]?)//) {
759	0					0	$r .= $1;
760	0	0				0	$etype = 'unassigned-code-point-error' if length $1 == 2;
761							}
762							} elsif ($r eq "\x8E") {
763	0	0				0	if ($self->{byte_buffer} =~ s/^([\xA1-\xFE])//) {
764	0					0	$r .= $1;
765	0					0	$etype = 'unassigned-code-point-error';
766							}
767							} elsif ($r eq "\xA0" or $r eq "\xFF") {
768	0					0	$etype = 'unassigned-code-point-error';
769							}
770							} else {
771	0					0	$fallback = $self->{fallback}->{$r};
772	0	0				0	if (defined $fallback) {
		0
773							## NOTE: This is an HTML5 parse error.
774	0					0	$etype = 'fallback-char-error';
775	0					0	$earg{char} = \$fallback;
776							} elsif (exists $self->{fallback}->{$r}) {
777							## NOTE: This is an HTML5 parse error. In addition, the octet
778							## is not assigned with a character.
779	0					0	$etype = 'fallback-unassigned-error';
780							}
781							}
782
783							## NOTE: Fixup line/column number by counting the number of
784							## lines/columns in the string that is to be retuend by this
785							## method call.
786	0					0	my $line_diff = 0;
787	0					0	my $col_diff = 0;
788	0					0	my $set_col;
789	0					0	for (my $i = 0; $i < $count; $i++) {
790	0					0	my $s = substr $_[1], $i - $count, 1;
791	0	0				0	if ($s eq "\x0D") {
		0
792	0					0	$line_diff++;
793	0					0	$col_diff = 0;
794	0					0	$set_col = 1;
795	0	0				0	$i++ if substr ($_[1], $i - $count + 1, 1) eq "\x0A";
796							} elsif ($s eq "\x0A") {
797	0					0	$line_diff++;
798	0					0	$col_diff = 0;
799	0					0	$set_col = 1;
800							} else {
801	0					0	$col_diff++;
802							}
803							}
804	0					0	my $i = $self->{char_buffer_pos};
805	0	0	0			0	if ($count and substr (${$self->{char_buffer}}, -1, 1) eq "\x0D") {
	0					0
806	0	0				0	if (substr (${$self->{char_buffer}}, $i, 1) eq "\x0A") {
	0					0
807	0					0	$i++;
808							}
809							}
810	0					0	my $cb_length = length ${$self->{char_buffer}};
	0					0
811	0					0	for (; $i < $cb_length; $i++) {
812	0					0	my $s = substr $_[1], $i, 1;
813	0	0				0	if ($s eq "\x0D") {
		0
814	0					0	$line_diff++;
815	0					0	$col_diff = 0;
816	0					0	$set_col = 1;
817	0	0				0	$i++ if substr ($_[1], $i + 1, 1) eq "\x0A";
818							} elsif ($s eq "\x0A") {
819	0					0	$line_diff++;
820	0					0	$col_diff = 0;
821	0					0	$set_col = 1;
822							} else {
823	0					0	$col_diff++;
824							}
825							}
826							$self->{onerror}->($self, $etype, octets => \$r, %earg,
827	0	0				0	level => $self->{level}->{$self->{error_level}->{$etype}},
828							line_diff => $line_diff,
829							($set_col ? (column => 1) : ()),
830							column_diff => $col_diff);
831							## NOTE: Error handler may modify \|octets\| parameter, which
832							## would be returned as part of the output. Note that what
833							## is returned would affect what \|manakai_read_until\| returns.
834	0	0				0	${$self->{char_buffer}} .= defined $fallback ? $fallback : $r;
	0					0
835							}
836
837	711					1364	redo A;
838							} # A
839							} # read
840
841							sub manakai_read_until ($$$;$) {
842							#my ($self, $scalar, $pattern, $offset) = @_;
843	1894			1894		3659	my $self = $_[0];
844	1894					3307	my $s = '';
845	1894					5710	$self->read ($s, 255);
846	1894	100				22340	if ($s =~ /^(?>$_[2])+/) {
		100
847	883					3587	my $rem_length = (length $s) - $+[0];
848	883	100				2341	if ($rem_length) {
849	174	100				522	if ($self->{char_buffer_pos} > $rem_length) {
850	3					9	$self->{char_buffer_pos} -= $rem_length;
851							} else {
852	171					520	substr (${$self->{char_buffer}}, 0, $self->{char_buffer_pos})
853	171					480	= substr ($s, $+[0]);
854	171					430	$self->{char_buffer_pos} = 0;
855							}
856							}
857	883					3924	substr ($_[1], $_[3]) = substr ($s, $-[0], $+[0] - $-[0]);
858	883					4380	return $+[0];
859							} elsif (length $s) {
860	209	100				647	if ($self->{char_buffer_pos} > length $s) {
861	2					3	$self->{char_buffer_pos} -= length $s;
862							} else {
863	207					331	substr (${$self->{char_buffer}}, 0, $self->{char_buffer_pos}) = $s;
	207					653
864	207					474	$self->{char_buffer_pos} = 0;
865							}
866							}
867	1011					3449	return 0;
868							} # manakai_read_until
869
870	0			0		0	sub has_bom ($) { $_[0]->{has_bom} }
871
872							sub input_encoding ($) {
873	0			0		0	my $v = $_[0]->{input_encoding};
874	0	0				0	return $v if defined $v;
875
876	0					0	my $uri = $_[0]->{charset};
877	0	0				0	if (defined $uri) {
878	0					0	return HTML::HTML5::Parser::Charset::DecodeHandle->uri_to_name (xml => $uri);
879							}
880
881	0					0	return undef;
882							} # input_encoding
883
884							sub onerror ($;$) {
885	1422	100		1422		3584	if (@_ > 1) {
886	711	50				1767	if ($_[1]) {
887	711					1925	$_[0]->{onerror} = $_[1];
888	711					1493	$_[0]->{onerror_set} = 1;
889							} else {
890	0			0		0	$_[0]->{onerror} = sub { };
891	0					0	delete $_[0]->{onerror_set};
892							}
893							}
894
895	1422	50				4385	return $_[0]->{onerror_set} ? $_[0]->{onerror} : undef;
896							} # onerror
897
898							sub ungetc ($$) {
899	0		0	0			unshift @{$_[0]->{character_queue}}, chr int ($_[1] or 0);
	0
900							} # ungetc
901
902							package HTML::HTML5::Parser::Charset::DecodeHandle::ISO2022JP;
903							push our @ISA, 'HTML::HTML5::Parser::Charset::DecodeHandle::Encode';
904
905							sub getc ($) {
906	0			0			my $self = $_[0];
907	0	0					return shift @{$self->{character_queue}} if @{$self->{character_queue}};
	0
	0
908
909	0						my $r;
910							A: {
911	0						my $error;
	0
912	0	0					if ($self->{continue}) {
		0
913	0	0					if ($self->{filehandle}->read ($self->{byte_buffer}, 256,
914							length $self->{byte_buffer})) {
915							#
916							} else {
917	0						$error = 1;
918							}
919	0						$self->{continue} = 0;
920							} elsif (512 > length $self->{byte_buffer}) {
921							$self->{filehandle}->read ($self->{byte_buffer}, 256,
922	0						length $self->{byte_buffer});
923							}
924
925	0	0					unless ($error) {
926	0	0					if ($self->{byte_buffer} =~ s/^\x1B(\x24[\x40\x42]\|\x28[\x42\x4A])//) {
		0
		0
		0
		0
927							$self->{state} = {
928							"\x24\x40" => 'state_2440',
929							"\x24\x42" => 'state_2442',
930							"\x28\x42" => 'state_2842',
931							"\x28\x4A" => 'state_284A',
932	0						}->{$1};
933	0						redo A;
934							} elsif ($self->{state} eq 'state_2842') { # IRV
935	0	0					if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) {
936	0						push @{$self->{character_queue}}, split //, $1;
	0
937	0						$r = shift @{$self->{character_queue}};
	0
938							} else {
939	0	0					if (length $self->{byte_buffer}) {
940	0						$error = 1;
941							} else {
942	0						$r = undef;
943							}
944							}
945							} elsif ($self->{state} eq 'state_284A') { # 0201
946	0	0					if ($self->{byte_buffer} =~ s/^([\x00-\x0D\x10-\x1A\x1C-\x7F]+)//) {
947	0						my $v = $1;
948	9			9		108	$v =~ tr/\x5C\x7E/\xA5\x{203E}/;
	9					24
	9					194
	0
949	0						push @{$self->{character_queue}}, split //, $v;
	0
950	0						$r = shift @{$self->{character_queue}};
	0
951							} else {
952	0	0					if (length $self->{byte_buffer}) {
953	0						$error = 1;
954							} else {
955	0						$r = undef;
956							$self->{onerror}->($self, 'invalid-state-error',
957							state => $self->{state},
958	0						level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});
959							}
960							}
961							} elsif ($self->{state} eq 'state_2442') { # 1983
962							my $v = Encode::decode ($self->{state_2442},
963							$self->{byte_buffer},
964	0						Encode::FB_QUIET ());
965	0	0					if (length $v) {
966	0						push @{$self->{character_queue}}, split //, $v;
	0
967	0						$r = shift @{$self->{character_queue}};
	0
968							} else {
969	0	0					if (length $self->{byte_buffer}) {
970	0						$error = 1;
971							} else {
972	0						$r = undef;
973							$self->{onerror}->($self, 'invalid-state-error',
974							state => $self->{state},
975	0						level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});
976							}
977							}
978							} elsif ($self->{state} eq 'state_2440') { # 1978
979							my $v = Encode::decode ($self->{state_2440},
980							$self->{byte_buffer},
981	0						Encode::FB_QUIET ());
982	0	0					if (length $v) {
983	0						push @{$self->{character_queue}}, split //, $v;
	0
984	0						$r = shift @{$self->{character_queue}};
	0
985							} else {
986	0	0					if (length $self->{byte_buffer}) {
987	0						$error = 1;
988							} else {
989	0						$r = undef;
990							$self->{onerror}->($self, 'invalid-state-error',
991							state => $self->{state},
992	0						level => $self->{level}->{$self->{error_level}->{'invalid-state-error'}});
993							}
994							}
995							} else {
996	0						$error = 1;
997							}
998							}
999
1000	0	0					if ($error) {
1001	0						$r = substr $self->{byte_buffer}, 0, 1, '';
1002	0						my $etype = 'illegal-octets-error';
1003	0	0	0				if (($self->{state} eq 'state_2442' or
		0	0
			0
			0
1004							$self->{state} eq 'state_2440') and
1005							$r =~ /^[\x21-\x7E]/ and
1006							$self->{byte_buffer} =~ s/^([\x21-\x7E])//) {
1007	0						$r .= $1;
1008	0						$etype = 'unassigned-code-point-error';
1009							} elsif ($r eq "\x1B" and
1010							$self->{byte_buffer} =~ s/^\(H//) { # Old 0201
1011	0						$r .= "(H";
1012	0						$self->{state} = 'state_284A';
1013							}
1014							$self->{onerror}->($self, $etype, octets => \$r,
1015	0						level => $self->{level}->{$self->{error_level}->{$etype}});
1016							}
1017							} # A
1018
1019	0						return $r;
1020							} # getc
1021
1022							## TODO: This is not good for performance. Should be replaced
1023							## by read-centric implementation.
1024							sub read ($$$;$) {
1025							#my ($self, $scalar, $length, $offset) = @_;
1026	0			0			my $length = $_[2];
1027	0						my $r = '';
1028	0						while ($length > 0) {
1029	0						my $c = $_[0]->getc;
1030	0	0					last unless defined $c;
1031	0						$r .= $c;
1032	0						$length--;
1033							}
1034	0						substr ($_[1], $_[3]) = $r;
1035							## NOTE: This would do different thing from what Perl's \|read\| do
1036							## if $offset points beyond the end of the $scalar.
1037	0						return length $r;
1038							} # read
1039
1040							sub manakai_read_until ($$$;$) {
1041							#my ($self, $scalar, $pattern, $offset) = @_;
1042	0			0			my $self = $_[0];
1043	0						my $c = $self->getc;
1044	0	0					if ($c =~ /^$_[2]/) {
		0
1045	0						substr ($_[1], $_[3]) = $c;
1046	0						return 1;
1047							} elsif (defined $c) {
1048	0						$self->ungetc (ord $c);
1049	0						return 0;
1050							} else {
1051	0						return 0;
1052							}
1053							} # manakai_read_until
1054
1055							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us-ascii'} =
1056							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:us'} =
1057							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso646-us'} =
1058							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:cp367'} =
1059							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ibm367'} =
1060							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1986'} =
1061							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ansi_x3.4-1968'} =
1062							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-ir-6'} =
1063							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:csascii'} =
1064							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_646.irv:1991'} =
1065							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:ascii'} = {ascii8 =>
1066							'1',
1067							is_block_safe =>
1068							'1',
1069							ietf_name =>
1070							['ansi_x3.4-1968',
1071							'ansi_x3.4-1986',
1072							'ascii',
1073							'cp367',
1074							'csascii',
1075							'ibm367',
1076							'iso-ir-6',
1077							'iso646-us',
1078							'iso_646.irv:1991',
1079							'us',
1080							'us-ascii',
1081							'us-ascii'],
1082							mime_name =>
1083							'us-ascii',
1084							perl_name =>
1085							['ascii',
1086							'iso-646-us',
1087							'us-ascii'],
1088							utf8_encoding_scheme =>
1089							'1',
1090							'uri',
1091							{'urn:x-suika-fam-cx:charset:ansi_x3.4-1968',
1092							'1',
1093							'urn:x-suika-fam-cx:charset:ansi_x3.4-1986',
1094							'1',
1095							'urn:x-suika-fam-cx:charset:ascii',
1096							'1',
1097							'urn:x-suika-fam-cx:charset:cp367',
1098							'1',
1099							'urn:x-suika-fam-cx:charset:csascii',
1100							'1',
1101							'urn:x-suika-fam-cx:charset:ibm367',
1102							'1',
1103							'urn:x-suika-fam-cx:charset:iso-ir-6',
1104							'1',
1105							'urn:x-suika-fam-cx:charset:iso646-us',
1106							'1',
1107							'urn:x-suika-fam-cx:charset:iso_646.irv:1991',
1108							'1',
1109							'urn:x-suika-fam-cx:charset:us',
1110							'1',
1111							'urn:x-suika-fam-cx:charset:us-ascii',
1112							'1'},
1113							};
1114
1115							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl'} = {perl_name =>
1116							['ascii-ctrl'],
1117							'uri',
1118							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ascii-ctrl',
1119							'1'}};
1120							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null'} = {perl_name =>
1121							['null'],
1122							'uri',
1123							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.null',
1124							'1'}};
1125							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8'} = {ascii8 =>
1126							'1',
1127							bom_allowed =>
1128							'1',
1129							no_bom_variant =>
1130							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
1131							utf8_encoding_scheme =>
1132							'1',
1133							'uri',
1134							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-8',
1135							'1'},
1136							xml_name => 'UTF-8',
1137							};
1138
1139							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279'} = {ascii8 =>
1140							'1',
1141							bom_allowed =>
1142							'1',
1143							no_bom_variant =>
1144							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
1145							utf8_encoding_scheme =>
1146							'1',
1147							'uri',
1148							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/UTF-8.RFC2279',
1149							'1'}};
1150							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8'} = {
1151							ascii8 => 1,
1152							is_block_safe =>
1153							'1',
1154							perl_name =>
1155							['utf-8'],
1156							utf8_encoding_scheme =>
1157							'1',
1158							'uri',
1159							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8',
1160							'1'}};
1161							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-8'} = {
1162							ascii8 => 1,
1163							bom_allowed =>
1164							'1',
1165							no_bom_variant =>
1166							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-8',
1167							ietf_name =>
1168							['utf-8'],
1169							mime_name =>
1170							'utf-8',
1171							utf8_encoding_scheme =>
1172							'1',
1173							'uri',
1174							{'urn:x-suika-fam-cx:charset:utf-8',
1175							'1'},
1176							};
1177
1178							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8'} = {ascii8 =>
1179							'1',
1180							is_block_safe =>
1181							'1',
1182							perl_name =>
1183							['utf8'],
1184							utf8_encoding_scheme =>
1185							'1',
1186							'uri',
1187							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf8',
1188							'1'}};
1189							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16'} = {
1190							ascii16 => 1,
1191							bom_allowed =>
1192							'1',
1193							bom_required =>
1194							'1',
1195							no_bom_variant =>
1196							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1197							no_bom_variant16be =>
1198							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1199							no_bom_variant16le =>
1200							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1201							perl_name =>
1202							['utf-16'],
1203							'uri',
1204							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.utf-16',
1205							'1'},
1206							xml_name => 'UTF-16',
1207							};
1208
1209							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16'} = {
1210							ascii16 => 1,
1211							bom_allowed =>
1212							'1',
1213							no_bom_variant =>
1214							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1215							no_bom_variant16be =>
1216							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1217							no_bom_variant16le =>
1218							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1219							ietf_name =>
1220							['utf-16'],
1221							mime_name =>
1222							'utf-16',
1223							'uri',
1224							{'urn:x-suika-fam-cx:charset:utf-16',
1225							'1'},
1226							};
1227
1228							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16be'} = {
1229							ascii16 => 1,
1230							ascii16be => 1,
1231							bom_allowed =>
1232							'1',
1233							no_bom_variant =>
1234							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1235							no_bom_variant16be =>
1236							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1237							ietf_name =>
1238							['utf-16be'],
1239							mime_name =>
1240							'utf-16be',
1241							'uri',
1242							{'urn:x-suika-fam-cx:charset:utf-16be',
1243							'1'},
1244							};
1245
1246							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:utf-16le'} = {
1247							ascii16 => 1,
1248							ascii16le => 1,
1249							bom_allowed =>
1250							'1',
1251							no_bom_variant =>
1252							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1253							no_bom_variant16le =>
1254							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1255							ietf_name =>
1256							['utf-16le'],
1257							mime_name =>
1258							'utf-16le',
1259							'uri',
1260							{'urn:x-suika-fam-cx:charset:utf-16le',
1261							'1'},
1262							};
1263
1264							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be'} = {
1265							ascii16 => 1,
1266							ascii16be => 1,
1267							is_block_safe =>
1268							'1',
1269							perl_name =>
1270							['utf-16be'],
1271							'uri',
1272							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16be',
1273							'1'}};
1274							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le'} = {
1275							ascii16 => 1,
1276							ascii16le => 1,
1277							is_block_safe =>
1278							'1',
1279							perl_name =>
1280							['utf-16le'],
1281							'uri',
1282							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-16le',
1283							'1'}};
1284							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2'} = $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-2'} = {
1285							ascii16 => 1,
1286							bom_allowed =>
1287							'1',
1288							no_bom_variant =>
1289							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1290							no_bom_variant16be =>
1291							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be',
1292							no_bom_variant16le =>
1293							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1294							ietf_name =>
1295							['csunicode',
1296							'iso-10646-ucs-2'],
1297							mime_name =>
1298							'iso-10646-ucs-2',
1299							'uri',
1300							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-2',
1301							'1',
1302							'urn:x-suika-fam-cx:charset:iso-10646-ucs-2',
1303							'1'},
1304							xml_name => 'ISO-10646-UCS-2',
1305							};
1306
1307							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be'} = {
1308							ascii16 => 1,
1309							ascii16be => 1,
1310							is_block_safe =>
1311							'1',
1312							perl_name =>
1313							['ucs-2be'],
1314							'uri',
1315							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2be',
1316							'1'}};
1317							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le'} = {
1318							ascii16 => 1,
1319							ascii16le => 1,
1320							is_block_safe =>
1321							'1',
1322							perl_name =>
1323							['ucs-2le'],
1324							'uri',
1325							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.ucs-2le',
1326							'1'}};
1327							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4'} = $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-10646-ucs-4'} = {
1328							ascii32 => 1,
1329							bom_allowed =>
1330							'1',
1331							no_bom_variant =>
1332							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1333							no_bom_variant32endian1234 =>
1334							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be',
1335							no_bom_variant32endian4321 =>
1336							'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1337							ietf_name =>
1338							['csucs4',
1339							'iso-10646-ucs-4'],
1340							mime_name =>
1341							'iso-10646-ucs-4',
1342							'uri',
1343							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-10646-ucs-4',
1344							'1',
1345							'urn:x-suika-fam-cx:charset:iso-10646-ucs-4',
1346							'1'},
1347							xml_name => 'ISO-10646-UCS-4',
1348							};
1349
1350							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be'} = {
1351							ascii32 => 1,
1352							ascii32endian1234 => 1,
1353							is_block_safe =>
1354							'1',
1355							perl_name =>
1356							['ucs-4be',
1357							'utf-32be'],
1358							'uri',
1359							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32be',
1360							'1'}};
1361							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le'} = {
1362							ascii32 => 1,
1363							ascii32endian4321 => 1,
1364							is_block_safe =>
1365							'1',
1366							perl_name =>
1367							['ucs-4le',
1368							'utf-32le'],
1369							'uri',
1370							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.utf-32le',
1371							'1'}};
1372							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso_8859-1:1987'} = $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1'} = {ascii8 =>
1373							'1',
1374							is_block_safe =>
1375							'1',
1376							ietf_name =>
1377							['cp819',
1378							'csisolatin1',
1379							'ibm819',
1380							'iso-8859-1',
1381							'iso-8859-1',
1382							'iso-ir-100',
1383							'iso_8859-1',
1384							'iso_8859-1:1987',
1385							'l1',
1386							'latin1'],
1387							mime_name =>
1388							'iso-8859-1',
1389							perl_name =>
1390							['iso-8859-1',
1391							'latin1'],
1392							'uri',
1393							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-1',
1394							'1',
1395							'urn:x-suika-fam-cx:charset:iso_8859-1:1987',
1396							'1'},
1397							xml_name => 'ISO-8859-1',
1398							};
1399
1400							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2'} = {ascii8 =>
1401							'1',
1402							is_block_safe =>
1403							'1',
1404							'uri',
1405							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-2',
1406							'1'},
1407							xml_name => 'ISO-8859-2',
1408							};
1409
1410							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3'} = {ascii8 =>
1411							'1',
1412							is_block_safe =>
1413							'1',
1414							'uri',
1415							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-3',
1416							'1'},
1417							xml_name => 'ISO-8859-3',
1418							};
1419
1420							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4'} = {ascii8 =>
1421							'1',
1422							is_block_safe =>
1423							'1',
1424							'uri',
1425							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-4',
1426							'1'},
1427							xml_name => 'ISO-8859-4',
1428							};
1429
1430							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5'} = {ascii8 =>
1431							'1',
1432							is_block_safe =>
1433							'1',
1434							'uri',
1435							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-5',
1436							'1'},
1437							xml_name => 'ISO-8859-5',
1438							};
1439
1440							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6'} = {ascii8 =>
1441							'1',
1442							is_block_safe =>
1443							'1',
1444							'uri',
1445							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-6',
1446							'1'},
1447							xml_name => 'ISO-8859-6',
1448							};
1449
1450							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7'} = {ascii8 =>
1451							'1',
1452							is_block_safe =>
1453							'1',
1454							'uri',
1455							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-7',
1456							'1'},
1457							xml_name => 'ISO-8859-7',
1458							};
1459
1460							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8'} = {ascii8 =>
1461							'1',
1462							is_block_safe =>
1463							'1',
1464							'uri',
1465							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-8',
1466							'1'},
1467							xml_name => 'ISO-8859-8',
1468							};
1469
1470							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9'} = {ascii8 =>
1471							'1',
1472							is_block_safe =>
1473							'1',
1474							'uri',
1475							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-9',
1476							'1'},
1477							xml_name => 'ISO-8859-9',
1478							};
1479
1480							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10'} = {ascii8 =>
1481							'1',
1482							is_block_safe =>
1483							'1',
1484							'uri',
1485							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-10',
1486							'1'},
1487							xml_name => 'ISO-8859-10',
1488							};
1489
1490							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11'} = {ascii8 =>
1491							'1',
1492							is_block_safe =>
1493							'1',
1494							'uri',
1495							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-11',
1496							'1'},
1497							xml_name => 'ISO-8859-11',
1498							};
1499
1500							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13'} = {ascii8 =>
1501							'1',
1502							is_block_safe =>
1503							'1',
1504							'uri',
1505							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-13',
1506							'1'},
1507							xml_name => 'ISO-8859-13',
1508							};
1509
1510							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14'} = {ascii8 =>
1511							'1',
1512							is_block_safe =>
1513							'1',
1514							'uri',
1515							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-14',
1516							'1'},
1517							xml_name => 'ISO-8859-14',
1518							};
1519
1520							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15'} = {ascii8 =>
1521							'1',
1522							is_block_safe =>
1523							'1',
1524							'uri',
1525							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-15',
1526							'1'},
1527							xml_name => 'ISO-8859-15',
1528							};
1529
1530							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16'} = {ascii8 =>
1531							'1',
1532							is_block_safe =>
1533							'1',
1534							'uri',
1535							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-8859-16',
1536							'1'},
1537							xml_name => 'ISO-8859-16',
1538							};
1539
1540							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp'} = {ascii8 =>
1541							'1',
1542							'uri',
1543							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.iso-2022-jp',
1544							'1'},
1545							xml_name => 'ISO-2022-JP',
1546							};
1547
1548							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:iso-2022-jp'} = {ascii8 =>
1549							'1',
1550							ietf_name =>
1551							['csiso2022jp',
1552							'iso-2022-jp',
1553							'iso-2022-jp'],
1554							mime_name =>
1555							'iso-2022-jp',
1556							'uri',
1557							{'urn:x-suika-fam-cx:charset:iso-2022-jp',
1558							'1'},
1559							};
1560
1561							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp'} = {ascii8 =>
1562							'1',
1563							perl_name =>
1564							['iso-2022-jp'],
1565							'uri',
1566							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.iso-2022-jp',
1567							'1'}};
1568							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:shift_jis'} = $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis'} = {ascii8 =>
1569							'1',
1570							is_block_safe =>
1571							'1',
1572							ietf_name =>
1573							['csshiftjis',
1574							'ms_kanji',
1575							'shift_jis',
1576							'shift_jis'],
1577							mime_name =>
1578							'shift_jis',
1579							perl_name =>
1580							['shift-jis-1997'],
1581							'uri',
1582							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.shift_jis',
1583							'1',
1584							'urn:x-suika-fam-cx:charset:shift_jis',
1585							'1'},
1586							xml_name => 'Shift_JIS',
1587							};
1588
1589							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis'} = {ascii8 =>
1590							'1',
1591							is_block_safe =>
1592							'1',
1593							perl_name =>
1594							['shiftjis',
1595							'sjis'],
1596							'uri',
1597							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.shiftjis',
1598							'1'}};
1599							$HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:euc-jp'} = $HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp'} = $HTML::HTML5::Parser::Charset::CharsetDef->{'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese'} = {ascii8 =>
1600							'1',
1601							is_block_safe =>
1602							'1',
1603							ietf_name =>
1604							['cseucpkdfmtjapanese',
1605							'euc-jp',
1606							'euc-jp',
1607							'extended_unix_code_packed_format_for_japanese'],
1608							mime_name =>
1609							'euc-jp',
1610							perl_name =>
1611							['euc-jp-1997'],
1612							'uri',
1613							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/XML.euc-jp',
1614							'1',
1615							'urn:x-suika-fam-cx:charset:euc-jp',
1616							'1',
1617							'urn:x-suika-fam-cx:charset:extended_unix_code_packed_format_for_japanese',
1618							'1'},
1619							xml_name => 'EUC-JP',
1620							};
1621
1622							$HTML::HTML5::Parser::Charset::CharsetDef->{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp'} = {ascii8 =>
1623							'1',
1624							is_block_safe =>
1625							'1',
1626							perl_name =>
1627							['euc-jp',
1628							'ujis'],
1629							'uri',
1630							{'http://suika.fam.cx/~wakaba/archive/2004/dis/Charset/Perl.euc-jp',
1631							'1'}};
1632
1633							1;
1634							## $Date: 2008/09/15 07:19:03 $